One Million Pounds has been Awarded to in you in our BT PROMO.Send your

2010-05-19 Thread British Telecom

Names...
Country...
Occupation...
Tel...


--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH/RFC] opensm: toggle sweeping V2

2010-05-19 Thread Arthur Kepner

One of our customers recently merged some new systems into a 
large, existing cluster. They requested a mechanism to prevent 
opensm from sweeping while the new equipment was being added to 
the IB fabric, and then resume sweeping once they felt confident 
that the newly added (sub)fabric was correctly cabled, and fully 
functional. They used something similar to the following patch. 

Comments?

Signed-off-by: Arthur Kepner 

--- 

 include/opensm/osm_subnet.h |6 ++
 opensm/osm_console.c|   32 
 opensm/osm_state_mgr.c  |8 +++-
 opensm/osm_subnet.c |1 +
 opensm/osm_trap_rcv.c   |   35 +--
 5 files changed, 67 insertions(+), 15 deletions(-)

diff --git a/opensm/include/opensm/osm_subnet.h 
b/opensm/include/opensm/osm_subnet.h
index d79ed8f..2a1db99 100644
--- a/opensm/include/opensm/osm_subnet.h
+++ b/opensm/include/opensm/osm_subnet.h
@@ -532,6 +532,7 @@ typedef struct osm_subn {
boolean_t in_sweep_hop_0;
boolean_t first_time_master_sweep;
boolean_t coming_out_of_standby;
+   boolean_t sweeping_enabled;
unsigned need_update;
cl_fmap_t mgrp_mgid_tbl;
void *mboxes[IB_LID_MCAST_END_HO - IB_LID_MCAST_START_HO + 1];
@@ -651,6 +652,11 @@ typedef struct osm_subn {
 *  The flag is set true if the SM state was standby and now
 *  changed to MASTER it is reset at the end of the sweep.
 *
+*  sweeping_enabled
+*  FALSE - sweeping is administratively disabled, all
+*  sweeping is inhibited, TRUE - sweeping is done
+*  normally
+*
 *  need_update
 *  This flag should be on during first non-master heavy
 *  (including pre-master discovery stage)
diff --git a/opensm/opensm/osm_console.c b/opensm/opensm/osm_console.c
index 968486e..bc7bea3 100644
--- a/opensm/opensm/osm_console.c
+++ b/opensm/opensm/osm_console.c
@@ -150,6 +150,16 @@ static void help_reroute(FILE * out, int detail)
}
 }
 
+static void help_sweep(FILE * out, int detail)
+{
+   fprintf(out, "sweep [on|off]\n");
+   if (detail) {
+   fprintf(out, "enable or disable sweeping\n");
+   fprintf(out, "   [on] sweep normally\n");
+   fprintf(out, "   [off] inhibit all sweeping\n");
+   }
+}
+
 static void help_status(FILE * out, int detail)
 {
fprintf(out, "status [loop]\n");
@@ -427,11 +437,15 @@ static void print_status(osm_opensm_t * p_osm, FILE * out)
p_osm->stats.sa_mads_ignored);
fprintf(out, "\n   Subnet flags\n"
"   \n"
+   "   Sweeping enabled   : %d\n"
+   "   Sweep interval (seconds)   : %d\n"
"   Ignore existing lfts   : %d\n"
"   Subnet Init errors : %d\n"
"   In sweep hop 0 : %d\n"
"   First time master sweep: %d\n"
"   Coming out of standby  : %d\n",
+   p_osm->subn.sweeping_enabled,
+   p_osm->subn.opt.sweep_interval,
p_osm->subn.ignore_existing_lfts,
p_osm->subn.subnet_initialization_error,
p_osm->subn.in_sweep_hop_0,
@@ -495,6 +509,23 @@ static void reroute_parse(char **p_last, osm_opensm_t * 
p_osm, FILE * out)
osm_opensm_sweep(p_osm);
 }
 
+static void sweep_parse(char **p_last, osm_opensm_t * p_osm, FILE * out)
+{
+   char *p_cmd;
+
+   p_cmd = next_token(p_last);
+   if (!p_cmd ||
+   (strcmp(p_cmd, "on") != 0 && strcmp(p_cmd, "off") != 0)) {
+   fprintf(out, "Invalid sweep command\n");
+   help_sweep(out, 1);
+   } else {
+   if (strcmp(p_cmd, "on") == 0)
+   p_osm->subn.sweeping_enabled = TRUE;
+   else
+   p_osm->subn.sweeping_enabled = FALSE;
+   }
+}
+
 static void logflush_parse(char **p_last, osm_opensm_t * p_osm, FILE * out)
 {
fflush(p_osm->log.out_port);
@@ -1332,6 +1363,7 @@ static const struct command console_cmds[] = {
{"priority", &help_priority, &priority_parse},
{"resweep", &help_resweep, &resweep_parse},
{"reroute", &help_reroute, &reroute_parse},
+   {"sweep", &help_sweep, &sweep_parse},
{"status", &help_status, &status_parse},
{"logflush", &help_logflush, &logflush_parse},
{"querylid", &help_querylid, &querylid_parse},
diff --git a/opensm/opensm/osm_state_mgr.c b/opensm/opensm/osm_state_mgr.c
index e43463f..81c8f54 100644
--- a/opensm/opensm/osm_state_mgr.c
+++ b/opensm/opensm/osm_state_mgr.c
@@ -1415,7 +1415,13 @@ void osm_state_mgr_process(IN osm_sm_t * sm, IN 
osm_signal_t signal)
 
switch (signal) {
case 

[PATCH] dapl-2.0: dapltest: server info devicename is not large enough for dapl_name storage

2010-05-19 Thread Davis, Arlin R

Server info device name is a 80 char array but the dapl device name
that is copied is 256 bytes. Increase started_server.devicename definition.
Chalk one up for windows SDK OACR (auto code review).

Signed-off-by: Sean Hefty 
---
 test/dapltest/include/dapl_server_info.h |2 +-
 1 files changed, 1 insertions(+), 1 deletions(-)

diff --git a/test/dapltest/include/dapl_server_info.h 
b/test/dapltest/include/dapl_server_info.h
index de038c5..898f9cc 100644
--- a/test/dapltest/include/dapl_server_info.h
+++ b/test/dapltest/include/dapl_server_info.h
@@ -37,7 +37,7 @@
 
 struct started_server
 {
-chardevicename[80];
+chardevicename[256];
 struct started_server *next;
 };
 
-- 
1.5.2.5

--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH] dapl-2.0: cma, scm: add new provider entries for Mellanox RDMA over Ethernet device

2010-05-19 Thread Davis, Arlin R

Add options for netdev eth2/eth3 for cma and for device mlx4_0 port 1/2 for scm.

ofa-v2-cma-roe-eth2 u2.0 nonthreadsafe default libdaplofa.so.2 dapl.2.0 "eth2 
0" ""
ofa-v2-cma-roe-eth3 u2.0 nonthreadsafe default libdaplofa.so.2 dapl.2.0 "eth3 
0" ""
ofa-v2-scm-roe-mlx4_0-1 u2.0 nonthreadsafe default libdaploscm.so.2 dapl.2.0 
"mlx4_0 1" ""
ofa-v2-scm-roe-mlx4_0-2 u2.0 nonthreadsafe default libdaploscm.so.2 dapl.2.0 
"mlx4_0 2" ""

Signed-off-by: Arlin Davis 
---
 
diff --git a/Makefile.am b/Makefile.am
index bf82853..bef95ec 100755
--- a/Makefile.am
+++ b/Makefile.am
@@ -562,7 +562,11 @@ install-exec-hook:
echo ofa-v2-mlx4_0-1u u2.0 nonthreadsafe default libdaploucm.so.2 
dapl.2.0 '"mlx4_0 1" ""' >> $(DESTDIR)$(sysconfdir)/dat.conf; \
echo ofa-v2-mlx4_0-2u u2.0 nonthreadsafe default libdaploucm.so.2 
dapl.2.0 '"mlx4_0 2" ""' >> $(DESTDIR)$(sysconfdir)/dat.conf; \
echo ofa-v2-mthca0-1u u2.0 nonthreadsafe default libdaploucm.so.2 
dapl.2.0 '"mthca0 1" ""' >> $(DESTDIR)$(sysconfdir)/dat.conf; \
-   echo ofa-v2-mthca0-2u u2.0 nonthreadsafe default libdaploucm.so.2 
dapl.2.0 '"mthca0 2" ""' >> $(DESTDIR)$(sysconfdir)/dat.conf;
+   echo ofa-v2-mthca0-2u u2.0 nonthreadsafe default libdaploucm.so.2 
dapl.2.0 '"mthca0 2" ""' >> $(DESTDIR)$(sysconfdir)/dat.conf; \
+   echo ofa-v2-cma-roe-eth2 u2.0 nonthreadsafe default libdaplofa.so.2 
dapl.2.0 '"eth2 0" ""' >> $(DESTDIR)$(sysconfdir)/dat.conf; \
+   echo ofa-v2-cma-roe-eth3 u2.0 nonthreadsafe default libdaplofa.so.2 
dapl.2.0 '"eth3 0" ""' >> $(DESTDIR)$(sysconfdir)/dat.conf; \
+   echo ofa-v2-scm-roe-mlx4_0-1 u2.0 nonthreadsafe default 
libdaploscm.so.2 dapl.2.0 '"mlx4_0 1" ""' >> $(DESTDIR)$(sysconfdir)/dat.conf; \
+   echo ofa-v2-scm-roe-mlx4_0-2 u2.0 nonthreadsafe default 
libdaploscm.so.2 dapl.2.0 '"mlx4_0 2" ""' >> $(DESTDIR)$(sysconfdir)/dat.conf; 
 
 uninstall-hook:
if test -e $(DESTDIR)$(sysconfdir)/dat.conf; then \
 
diff --git a/dapl.spec.in b/dapl.spec.in
index 0f2c380..45187db 100644
--- a/dapl.spec.in
+++ b/dapl.spec.in
@@ -110,6 +110,10 @@ echo ofa-v2-mlx4_0-1u u2.0 nonthreadsafe default 
libdaploucm.so.2 dapl.2.0 '"mlx
 echo ofa-v2-mlx4_0-2u u2.0 nonthreadsafe default libdaploucm.so.2 dapl.2.0 
'"mlx4_0 2" ""' >> %{_sysconfdir}/dat.conf
 echo ofa-v2-mthca0-1u u2.0 nonthreadsafe default libdaploucm.so.2 dapl.2.0 
'"mthca0 1" ""' >> %{_sysconfdir}/dat.conf
 echo ofa-v2-mthca0-2u u2.0 nonthreadsafe default libdaploucm.so.2 dapl.2.0 
'"mthca0 2" ""' >> %{_sysconfdir}/dat.conf
+echo ofa-v2-cma-roe-eth2 u2.0 nonthreadsafe default libdaplofa.so.2 dapl.2.0 
'"eth2 0" ""' >> %{_sysconfdir}/dat.conf
+echo ofa-v2-cma-roe-eth3 u2.0 nonthreadsafe default libdaplofa.so.2 dapl.2.0 
'"eth3 0" ""' >> %{_sysconfdir}/dat.conf
+echo ofa-v2-scm-roe-mlx4_0-1 u2.0 nonthreadsafe default libdaploscm.so.2 
dapl.2.0 '"mlx4_0 1" ""' >> %{_sysconfdir}/dat.conf
+echo ofa-v2-scm-roe-mlx4_0-2 u2.0 nonthreadsafe default libdaploscm.so.2 
dapl.2.0 '"mlx4_0 2" ""' >> %{_sysconfdir}/dat.conf
 
 %postun 
 /sbin/ldconfig
-- 
1.5.2.5

--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH] dapl-2.0: windows: need to include linux directory to pick up _errno.h

2010-05-19 Thread Davis, Arlin R


Signed-off-by: Sean Hefty 
---
 dapl/openib_cma/SOURCES |2 +-
 dapl/openib_scm/SOURCES |2 +-
 dapl/openib_ucm/SOURCES |3 ++-
 3 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/dapl/openib_cma/SOURCES b/dapl/openib_cma/SOURCES
index d6b97a2..0c3764b 100644
--- a/dapl/openib_cma/SOURCES
+++ b/dapl/openib_cma/SOURCES
@@ -25,7 +25,7 @@ SOURCES = \
cm.c
 
 INCLUDES = ..\include;..\openib_common;..\common;windows;..\..\dat\include;\
-  ..\..\dat\udat\windows;..\udapl\windows;\
+  
..\..\dat\udat\windows;..\udapl\windows;..\..\..\..\inc\user\linux;\
   
..\..\..\..\inc;..\..\..\..\inc\user;..\..\..\libibverbs\include;\
   ..\..\..\librdmacm\include
 
diff --git a/dapl/openib_scm/SOURCES b/dapl/openib_scm/SOURCES
index 6e4ad30..2129e27 100644
--- a/dapl/openib_scm/SOURCES
+++ b/dapl/openib_scm/SOURCES
@@ -25,7 +25,7 @@ SOURCES = \
 cm.c
 
 INCLUDES = ..\include;..\openib_common\;..\common;windows;..\..\dat\include;\
-  ..\..\dat\udat\windows;..\udapl\windows;\
+  
..\..\dat\udat\windows;..\udapl\windows;..\..\..\..\inc\user\linux;\
   
..\..\..\..\inc;..\..\..\..\inc\user;..\..\..\libibverbs\include
 
 DAPL_OPTS = -DEXPORT_DAPL_SYMBOLS -DDAT_EXTENSIONS -DSOCK_CM -DOPENIB 
-DCQ_WAIT_OBJECT
diff --git a/dapl/openib_ucm/SOURCES b/dapl/openib_ucm/SOURCES
index 7eecf48..a2b5dce 100644
--- a/dapl/openib_ucm/SOURCES
+++ b/dapl/openib_ucm/SOURCES
@@ -21,7 +21,8 @@ SOURCES = udapl.rc ..\dapl_common_src.c ..\dapl_udapl_src.c 
..\openib_common.c \
 
 INCLUDES = ..\include;..\openib_common\;..\common;windows;..\..\dat\include;\
   ..\..\dat\udat\windows;..\udapl\windows;\
-  
..\..\..\..\inc;..\..\..\..\inc\user;..\..\..\libibverbs\include
+  
..\..\..\..\inc;..\..\..\..\inc\user;..\..\..\libibverbs\include;\
+  ..\..\..\..\inc\user\linux;
 
 DAPL_OPTS = -DEXPORT_DAPL_SYMBOLS -DDAT_EXTENSIONS -DOPENIB -DCQ_WAIT_OBJECT
 
-- 
1.5.2.5

--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH] dapl-2.0: windows: comp_channel.cpp is included by util.c in the openib_common.

2010-05-19 Thread Davis, Arlin R

Remove it from device.c in individual providers to avoid
duplicate definitions.

Line endings were corrected to linux format from windows as part of
the change.

Signed-off-by: Sean Hefty 
---
 dapl/openib_cma/device.c |   75 ++---
 dapl/openib_scm/device.c |1 -
 dapl/openib_ucm/device.c |5 +--
 3 files changed, 39 insertions(+), 42 deletions(-)

diff --git a/dapl/openib_cma/device.c b/dapl/openib_cma/device.c
index 99b8c55..e4ff22e 100644
--- a/dapl/openib_cma/device.c
+++ b/dapl/openib_cma/device.c
@@ -54,7 +54,6 @@ DAPL_OS_LOCK g_hca_lock;
 struct dapl_llist_entry *g_hca_list;
 
 #if defined(_WIN64) || defined(_WIN32)
-#include "..\..\..\..\..\etc\user\comp_channel.cpp"
 #include 
 
 static COMP_SET ufds;
@@ -144,43 +143,43 @@ static int dapls_thread_signal(void)
 }
 #endif
 
-/* Get IP address using network name, address, or device name */
-static int getipaddr(char *name, char *addr, int len)
-{
-struct addrinfo *res;
-
-/* assume netdev for first attempt, then network and address type */
-if (getipaddr_netdev(name, addr, len)) {
-if (getaddrinfo(name, NULL, NULL, &res)) {
-dapl_log(DAPL_DBG_TYPE_ERR,
- " open_hca: getaddr_netdev ERROR:"
- " %s. Is %s configured?\n",
- strerror(errno), name);
-return 1;
-} else {
-if (len >= res->ai_addrlen)
-memcpy(addr, res->ai_addr, res->ai_addrlen);
-else {
-freeaddrinfo(res);
-return 1;
-}
-freeaddrinfo(res);
-}
-}
-
-dapl_dbg_log(
-DAPL_DBG_TYPE_UTIL,
-" getipaddr: family %d port %d addr %d.%d.%d.%d\n",
-((struct sockaddr_in *)addr)->sin_family,
-((struct sockaddr_in *)addr)->sin_port,
-((struct sockaddr_in *)addr)->sin_addr.s_addr >> 0 & 0xff,
-((struct sockaddr_in *)addr)->sin_addr.s_addr >> 8 & 0xff,
-((struct sockaddr_in *)addr)->sin_addr.s_addr >> 16 & 0xff,
-((struct sockaddr_in *)addr)->sin_addr.
- s_addr >> 24 & 0xff);
-
-return 0;
-}
+/* Get IP address using network name, address, or device name */
+static int getipaddr(char *name, char *addr, int len)
+{
+struct addrinfo *res;
+
+/* assume netdev for first attempt, then network and address type */
+if (getipaddr_netdev(name, addr, len)) {
+if (getaddrinfo(name, NULL, NULL, &res)) {
+dapl_log(DAPL_DBG_TYPE_ERR,
+ " open_hca: getaddr_netdev ERROR:"
+ " %s. Is %s configured?\n",
+ strerror(errno), name);
+return 1;
+} else {
+if (len >= res->ai_addrlen)
+memcpy(addr, res->ai_addr, res->ai_addrlen);
+else {
+freeaddrinfo(res);
+return 1;
+}
+freeaddrinfo(res);
+}
+}
+
+dapl_dbg_log(
+DAPL_DBG_TYPE_UTIL,
+" getipaddr: family %d port %d addr %d.%d.%d.%d\n",
+((struct sockaddr_in *)addr)->sin_family,
+((struct sockaddr_in *)addr)->sin_port,
+((struct sockaddr_in *)addr)->sin_addr.s_addr >> 0 & 0xff,
+((struct sockaddr_in *)addr)->sin_addr.s_addr >> 8 & 0xff,
+((struct sockaddr_in *)addr)->sin_addr.s_addr >> 16 & 0xff,
+((struct sockaddr_in *)addr)->sin_addr.
+ s_addr >> 24 & 0xff);
+
+return 0;
+}
 
 /*
  * dapls_ib_init, dapls_ib_release
diff --git a/dapl/openib_scm/device.c b/dapl/openib_scm/device.c
index a5b0742..4c50f03 100644
--- a/dapl/openib_scm/device.c
+++ b/dapl/openib_scm/device.c
@@ -67,7 +67,6 @@ DAT_RETURN  dapli_ib_thread_init(void);
 void dapli_ib_thread_destroy(void);
 
 #if defined(_WIN64) || defined(_WIN32)
-#include "..\..\..\..\..\etc\user\comp_channel.cpp"
 #include 
 
 static COMP_SET ufds;
diff --git a/dapl/openib_ucm/device.c b/dapl/openib_ucm/device.c
index 1f324b3..1959c76 100644
--- a/dapl/openib_ucm/device.c
+++ b/dapl/openib_ucm/device.c
@@ -37,17 +37,16 @@ static void ucm_service_destroy(IN DAPL_HCA *hca);
 static int  ucm_service_create(IN DAPL_HCA *hca);
 
 #if defined (_WIN32)
-#include "..\..\..\..\..\etc\user\comp_channel.cpp"
 #include 
 
 static int32_t create_os_signal(IN DAPL_HCA * hca_ptr)
 {
-   return CompSetInit(&hca_ptr->ib_trans.signal.set);
+   return CompSetInit(&hca_ptr->ib_

Re: [PATCH] RDMA/ucma: Copy iWARP route information.

2010-05-19 Thread Steve Wise

Roland Dreier wrote:

 > Roland/Sean, is this ok for 2.6.35?

I guess it's fine.  What does it give us by itself though?

  


Piece of mind. :)


IMO it is a bug fix.


--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] RDMA/ucma: Copy iWARP route information.

2010-05-19 Thread Roland Dreier
 > Roland/Sean, is this ok for 2.6.35?

I guess it's fine.  What does it give us by itself though?

 - R.
-- 
Roland Dreier  || For corporate legal information go to:
http://www.cisco.com/web/about/doing_business/legal/cri/index.html
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] RDMA/ucma: Copy iWARP route information.

2010-05-19 Thread Steve Wise

Roland/Sean, is this ok for 2.6.35?


Steve Wise wrote:

For iWARP rdma_cm ids, the "route" information is the L2 src and
next hop addresses.

Signed-off-by: Steve Wise 
---

 drivers/infiniband/core/ucma.c |   13 +
 1 files changed, 13 insertions(+), 0 deletions(-)

diff --git a/drivers/infiniband/core/ucma.c b/drivers/infiniband/core/ucma.c
index ac7edc2..0498383 100644
--- a/drivers/infiniband/core/ucma.c
+++ b/drivers/infiniband/core/ucma.c
@@ -583,6 +583,16 @@ static void ucma_copy_ib_route(struct 
rdma_ucm_query_route_resp *resp,
}
 }
 
+static void ucma_copy_iw_route(struct rdma_ucm_query_route_resp *resp,

+  struct rdma_route *route)
+{
+   struct rdma_dev_addr *dev_addr;
+
+   dev_addr = &route->addr.dev_addr;
+   rdma_addr_get_dgid(dev_addr, (union ib_gid *) &resp->ib_route[0].dgid);
+   rdma_addr_get_sgid(dev_addr, (union ib_gid *) &resp->ib_route[0].sgid);
+}
+
 static ssize_t ucma_query_route(struct ucma_file *file,
const char __user *inbuf,
int in_len, int out_len)
@@ -621,6 +631,9 @@ static ssize_t ucma_query_route(struct ucma_file *file,
case RDMA_TRANSPORT_IB:
ucma_copy_ib_route(&resp, &ctx->cm_id->route);
break;
+   case RDMA_TRANSPORT_IWARP:
+   ucma_copy_iw_route(&resp, &ctx->cm_id->route);
+   break;
default:
break;
}

--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
  


--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


RE: [PATCH v2] libibverbs: add path record definitions to sa.h

2010-05-19 Thread Sean Hefty
>Also, There is a rdmacm kernel change to pass up iwarp L2 addresses once
>a cm_id has resolved the addresses.  I posted it earlier and I think
>Sean is going to integrate it for 2.6.36.

I believe that the patch you posted earlier is sufficient for 2.6.35.  I just
need to update my patch set for AF_IB support, which were written assuming that
iWarp devices did not return L2 addresses.

- Sean

--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH v2] libibverbs: add path record definitions to sa.h

2010-05-19 Thread Steve Wise

Steve Wise wrote:

Walukiewicz, Miroslaw wrote:

Hello Steve,
Do you plan some changes in the core code related to RAW_QPT?
  



The only changes I see needed to the kernel core is the mcast change 
you already proposed to allow mcast attach/detach for RAW_ETY qps...





Also, There is a rdmacm kernel change to pass up iwarp L2 addresses once 
a cm_id has resolved the addresses.  I posted it earlier and I think 
Sean is going to integrate it for 2.6.36.


Steve.


--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 7/9] dapl-2.0: scm: add option to use other network devices with environment variable DAPL_SCM_NETDEV

2010-05-19 Thread Davis, Arlin R

New environment variable can be used to set the netdev
for sockets to use instead of the default network device
returned using gethostname.

Signed-off-by: Arlin Davis 
---
 dapl/openib_cma/device.c|  152 +--
 dapl/openib_common/dapl_ib_common.h |3 +-
 dapl/openib_common/util.c   |  101 +++-
 dapl/openib_scm/device.c|2 +-
 4 files changed, 141 insertions(+), 117 deletions(-)

diff --git a/dapl/openib_cma/device.c b/dapl/openib_cma/device.c
index 12593cf..c9fc8c3 100644
--- a/dapl/openib_cma/device.c
+++ b/dapl/openib_cma/device.c
@@ -59,46 +59,6 @@ struct dapl_llist_entry *g_hca_list;
 
 static COMP_SET ufds;
 
-static int getipaddr_netdev(char *name, char *addr, int addr_len)
-{
-   IWVProvider *prov;
-   WV_DEVICE_ADDRESS devaddr;
-   struct addrinfo *res, *ai;
-   HRESULT hr;
-   int index;
-
-   if (strncmp(name, "rdma_dev", 8)) {
-   return EINVAL;
-   }
-
-   index = atoi(name + 8);
-
-   hr = WvGetObject(&IID_IWVProvider, (LPVOID *) &prov);
-   if (FAILED(hr)) {
-   return hr;
-   }
-
-   hr = getaddrinfo("..localmachine", NULL, NULL, &res);
-   if (hr) {
-   goto release;
-   }
-
-   for (ai = res; ai; ai = ai->ai_next) {
-   hr = prov->lpVtbl->TranslateAddress(prov, ai->ai_addr, 
&devaddr);
-   if (SUCCEEDED(hr) && (ai->ai_addrlen <= addr_len) && (index-- 
== 0)) {
-   memcpy(addr, ai->ai_addr, ai->ai_addrlen);
-   goto free;
-   }
-   }
-   hr = ENODEV;
-
-free:
-   freeaddrinfo(res);
-release:
-   prov->lpVtbl->Release(prov);
-   return hr;
-}
-
 static int dapls_os_init(void)
 {
return CompSetInit(&ufds);
@@ -133,6 +93,7 @@ static int dapls_thread_signal(void)
return 0;
 }
 #else  // _WIN64 || WIN32
+
 int g_ib_pipe[2];
 
 static int dapls_os_init(void)
@@ -146,43 +107,6 @@ static void dapls_os_release(void)
/* close pipe? */
 }
 
-/* Get IP address using network device name */
-static int getipaddr_netdev(char *name, char *addr, int addr_len)
-{
-   struct ifreq ifr;
-   int skfd, ret, len;
-
-   /* Fill in the structure */
-   snprintf(ifr.ifr_name, IFNAMSIZ, "%s", name);
-   ifr.ifr_hwaddr.sa_family = ARPHRD_INFINIBAND;
-
-   /* Create a socket fd */
-   skfd = socket(PF_INET, SOCK_STREAM, 0);
-   ret = ioctl(skfd, SIOCGIFADDR, &ifr);
-   if (ret)
-   goto bail;
-
-   switch (ifr.ifr_addr.sa_family) {
-#ifdef AF_INET6
-   case AF_INET6:
-   len = sizeof(struct sockaddr_in6);
-   break;
-#endif
-   case AF_INET:
-   default:
-   len = sizeof(struct sockaddr);
-   break;
-   }
-
-   if (len <= addr_len)
-   memcpy(addr, &ifr.ifr_addr, len);
-   else
-   ret = EINVAL;
-
-  bail:
-   close(skfd);
-   return ret;
-}
 
 static int dapls_config_fd(int fd)
 {
@@ -220,43 +144,43 @@ static int dapls_thread_signal(void)
 }
 #endif
 
-/* Get IP address using network name, address, or device name */
-static int getipaddr(char *name, char *addr, int len)
-{
-   struct addrinfo *res;
-
-   /* assume netdev for first attempt, then network and address type */
-   if (getipaddr_netdev(name, addr, len)) {
-   if (getaddrinfo(name, NULL, NULL, &res)) {
-   dapl_log(DAPL_DBG_TYPE_ERR,
-" open_hca: getaddr_netdev ERROR:"
-" %s. Is %s configured?\n",
-strerror(errno), name);
-   return 1;
-   } else {
-   if (len >= res->ai_addrlen)
-   memcpy(addr, res->ai_addr, res->ai_addrlen);
-   else {
-   freeaddrinfo(res);
-   return 1;
-   }
-   freeaddrinfo(res);
-   }
-   }
-
-   dapl_dbg_log(
-   DAPL_DBG_TYPE_UTIL,
-   " getipaddr: family %d port %d addr %d.%d.%d.%d\n",
-   ((struct sockaddr_in *)addr)->sin_family,
-   ((struct sockaddr_in *)addr)->sin_port,
-   ((struct sockaddr_in *)addr)->sin_addr.s_addr >> 0 & 0xff,
-   ((struct sockaddr_in *)addr)->sin_addr.s_addr >> 8 & 0xff,
-   ((struct sockaddr_in *)addr)->sin_addr.s_addr >> 16 & 0xff,
-   ((struct sockaddr_in *)addr)->sin_addr.
-s_addr >> 24 & 0xff);
-
-   return 0;
-}
+/* Get IP address using network name, address, or device name */
+static int getipaddr(char *name, char *addr, int len)
+{
+struct addrinfo *res;
+
+/* assume netdev for first attempt, then network and address type */
+if 

[PATCH 9/9] dapl-2.0: scm: check for hca object before signaling thread

2010-05-19 Thread Davis, Arlin R

There may not be an hca object attached to cm object
when freeing during cleanup.

Signed-off-by: Arlin Davis 
---
 dapl/openib_scm/cm.c |3 ++-
 1 files changed, 2 insertions(+), 1 deletions(-)

diff --git a/dapl/openib_scm/cm.c b/dapl/openib_scm/cm.c
index 975ffd5..ce0d961 100644
--- a/dapl/openib_scm/cm.c
+++ b/dapl/openib_scm/cm.c
@@ -322,7 +322,8 @@ static int dapl_select(struct dapl_fd_set *set)
 
 static void dapli_cm_thread_signal(dp_ib_cm_handle_t cm_ptr) 
 {
-   send(cm_ptr->hca->ib_trans.scm[1], "w", sizeof "w", 0);
+   if (cm_ptr->hca)
+   send(cm_ptr->hca->ib_trans.scm[1], "w", sizeof "w", 0);
 }
 
 static void dapli_cm_free(dp_ib_cm_handle_t cm_ptr) 
-- 
1.5.2.5

--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 8/9] dapl-2.0: scm, cma: fini code can be called multiple times and hang via fork

2010-05-19 Thread Davis, Arlin R

The providers should protect against forked child exits and
not cleanup until the parent init actually exits. Otherwise,
the child will hang trying to cleanup dapl thread. Modify to
check process id for proper init to fini cleanup and limit
cleanup to parent only.

Signed-off-by: Arlin Davis 
---
 dapl/openib_cma/device.c |8 ++--
 dapl/openib_scm/device.c |7 +++
 2 files changed, 13 insertions(+), 2 deletions(-)

diff --git a/dapl/openib_cma/device.c b/dapl/openib_cma/device.c
index c9fc8c3..99b8c55 100644
--- a/dapl/openib_cma/device.c
+++ b/dapl/openib_cma/device.c
@@ -197,9 +197,10 @@ static int getipaddr(char *name, char *addr, int len)
  * 0 success, -1 error
  *
  */
+DAT_UINT32 g_parent = 0;
 int32_t dapls_ib_init(void)
 {
-   dapl_dbg_log(DAPL_DBG_TYPE_UTIL, " dapl_ib_init: \n");
+   g_parent = dapl_os_getpid();
 
/* initialize hca_list lock */
dapl_os_lock_init(&g_hca_lock);
@@ -215,7 +216,10 @@ int32_t dapls_ib_init(void)
 
 int32_t dapls_ib_release(void)
 {
-   dapl_dbg_log(DAPL_DBG_TYPE_UTIL, " dapl_ib_release: \n");
+   /* only parent will cleanup */
+   if (dapl_os_getpid() != g_parent)
+   return 0;
+
dapli_ib_thread_destroy();
if (g_cm_events != NULL)
rdma_destroy_event_channel(g_cm_events);
diff --git a/dapl/openib_scm/device.c b/dapl/openib_scm/device.c
index 03d38a6..a5b0742 100644
--- a/dapl/openib_scm/device.c
+++ b/dapl/openib_scm/device.c
@@ -216,8 +216,11 @@ static void destroy_cr_pipe(IN DAPL_HCA * hca_ptr)
  * 0 success, -1 error
  *
  */
+DAT_UINT32 g_parent = 0;
 int32_t dapls_ib_init(void)
 {
+g_parent = dapl_os_getpid();
+
/* initialize hca_list */
dapl_os_lock_init(&g_hca_lock);
dapl_llist_init_head(&g_hca_list);
@@ -230,6 +233,10 @@ int32_t dapls_ib_init(void)
 
 int32_t dapls_ib_release(void)
 {
+/* only parent init will cleanup */
+if (dapl_os_getpid() != g_parent)
+return 0;
+   
dapli_ib_thread_destroy();
dapls_os_release();
return 0;
-- 
1.5.2.5

--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 2/9] dapl-2.0: ucm: increase default UCM retry count for connect reply to 15

2010-05-19 Thread Davis, Arlin R

on large clusters UCM is timing out with retries at 10.

Signed-off-by: Arlin Davis 
---
 dapl/openib_common/dapl_ib_common.h |2 +-
 1 files changed, 1 insertions(+), 1 deletions(-)

diff --git a/dapl/openib_common/dapl_ib_common.h 
b/dapl/openib_common/dapl_ib_common.h
index 3e32fab..cc416d2 100644
--- a/dapl/openib_common/dapl_ib_common.h
+++ b/dapl/openib_common/dapl_ib_common.h
@@ -166,7 +166,7 @@ typedef uint16_tib_hca_port_t;
 #define DCM_TCLASS 0
 
 /* DAPL uCM timers, default queue sizes */
-#define DCM_RETRY_CNT   10 
+#define DCM_RETRY_CNT   15 
 #define DCM_REP_TIME800/* reply timeout in m_secs */
 #define DCM_RTU_TIME400/* rtu timeout in m_secs */
 #define DCM_QP_SIZE 500 /* uCM tx, rx qp size */
-- 
1.5.2.5

--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 3/9] dapl-2.0: cma, ucm: cleanup issues with dat_ep_free on a connected EP without disconnecting.

2010-05-19 Thread Davis, Arlin R

During EP free, disconnecting with ABRUPT close flag, the disconnect should wait
for the DISC event to fire to allow the CM to be properly destroyed upon return.

The cma must also release the lock when calling the blocking rdma_destroy_id 
given
the callback thread could attempt to acquire the lock for reference counting.

Signed-off-by: Arlin Davis 
---
 dapl/openib_cma/cm.c |   56 -
 dapl/openib_ucm/cm.c |   15 -
 2 files changed, 55 insertions(+), 16 deletions(-)

diff --git a/dapl/openib_cma/cm.c b/dapl/openib_cma/cm.c
index 1e846aa..503df96 100644
--- a/dapl/openib_cma/cm.c
+++ b/dapl/openib_cma/cm.c
@@ -209,18 +209,13 @@ void dapls_cm_acquire(dp_ib_cm_handle_t conn)
 void dapls_cm_release(dp_ib_cm_handle_t conn)
 {
dapl_os_lock(&conn->lock);
-   conn->ref_count--;
-   if (conn->ref_count) {
-dapl_os_unlock(&conn->lock);
-   return;
-   }
-   if (conn->cm_id) {
-   if (conn->cm_id->qp)
-   rdma_destroy_qp(conn->cm_id);
-   rdma_destroy_id(conn->cm_id);
-   }
-   dapl_os_unlock(&conn->lock);
-   dapli_cm_dealloc(conn);
+   conn->ref_count--;
+   if (conn->ref_count) {
+dapl_os_unlock(&conn->lock);
+   return;
+   }
+   dapl_os_unlock(&conn->lock);
+   dapli_cm_dealloc(conn);
 }
 
 /* BLOCKING: called from dapl_ep_free, EP link will be last ref */
@@ -235,10 +230,14 @@ void dapls_cm_free(dp_ib_cm_handle_t conn)
/* Destroy cm_id, wait until EP is last ref */
dapl_os_lock(&conn->lock);
if (conn->cm_id) {
-   if (conn->cm_id->qp)
-   rdma_destroy_qp(conn->cm_id);
-   rdma_destroy_id(conn->cm_id);
+   struct rdma_cm_id *cm_id = conn->cm_id;
+
+   if (cm_id->qp)
+   rdma_destroy_qp(cm_id);
conn->cm_id = NULL;
+   dapl_os_unlock(&conn->lock);
+   rdma_destroy_id(cm_id); /* blocking, event processing */
+   dapl_os_lock(&conn->lock);
}
 
/* EP linking is last reference */
@@ -640,6 +639,17 @@ dapls_ib_disconnect(IN DAPL_EP * ep_ptr, IN 
DAT_CLOSE_FLAGS close_flags)
/* no graceful half-pipe disconnect option */
rdma_disconnect(conn->cm_id);
 
+   /* ABRUPT close, wait for callback and DISCONNECTED state */
+   if (close_flags == DAT_CLOSE_ABRUPT_FLAG) {
+   dapl_os_lock(&ep_ptr->header.lock);
+   while (ep_ptr->param.ep_state != DAT_EP_STATE_DISCONNECTED) {
+   dapl_os_unlock(&ep_ptr->header.lock);
+   dapl_os_sleep_usec(1);
+   dapl_os_lock(&ep_ptr->header.lock);
+   }
+   dapl_os_unlock(&ep_ptr->header.lock);
+   }
+
/* 
 * DAT event notification occurs from the callback
 * Note: will fire even if DREQ goes unanswered on timeout 
@@ -759,6 +769,7 @@ dapls_ib_setup_conn_listener(IN DAPL_IA * ia_ptr,
return DAT_SUCCESS;
 
 bail:
+   rdma_destroy_id(conn->cm_id);
dapls_cm_release(conn);
return dat_status;
 }
@@ -791,8 +802,13 @@ dapls_ib_remove_conn_listener(IN DAPL_IA * ia_ptr, IN 
DAPL_SP * sp_ptr)
 
if (conn != IB_INVALID_HANDLE) {
sp_ptr->cm_srvc_handle = NULL;
+   if (conn->cm_id) {
+   rdma_destroy_id(conn->cm_id);
+   conn->cm_id = NULL;
+   }
dapls_cm_release(conn);
}
+   
return DAT_SUCCESS;
 }
 
@@ -869,6 +885,7 @@ dapls_ib_accept_connection(IN DAT_CR_HANDLE cr_handle,
dapl_ep_unlink_cm(ep_ptr, ep_conn);
ep_conn->cm_id->qp = NULL;
ep_conn->ep = NULL;
+   rdma_destroy_id(ep_conn->cm_id);
dapls_cm_release(ep_conn);
 
/* add new CM to EP linking, qp_handle unchanged */
@@ -912,6 +929,7 @@ bail:
rdma_reject(cr_conn->cm_id, NULL, 0);
 
/* no EP linking, ok to destroy */
+   rdma_destroy_id(cr_conn->cm_id);
dapls_cm_release(cr_conn);
return dat_status;
 }
@@ -974,6 +992,7 @@ dapls_ib_reject_connection(IN dp_ib_cm_handle_t cm_handle,
  cm_handle->p_data, offset + private_data_size);
 
/* no EP linking, ok to destroy */
+   rdma_destroy_id(cm_handle->cm_id);
dapls_cm_release(cm_handle);
return dapl_convert_errno(ret, "reject");
 }
@@ -1067,6 +1086,13 @@ void dapli_cma_event_cb(void)
 
dapls_cm_acquire(conn);

+   /* destroying cm_id, consumer thread blocking waiting for ACK */
+   if (conn->cm_id == NULL) {
+   dapls_cm_release(conn);
+   rdma_ack_cm_event(event);
+   return;
+   }
+

[PATCH 6/9] dapl-2.0: scm: cr_thread occasionally segv's when disconnecting all-to-all MPI static connections

2010-05-19 Thread Davis, Arlin R

Note: no valid calltrace for segv on cr_thread because
of state changing in switch statement from another
thread, jumped unknown location.

Program received signal SIGSEGV, Segmentation fault.
[Switching to Thread 0x41a65940 (LWP 1328)]
0x2b2e7d9d5134 in ?? ()

Add cm object locking on all state change/checking. When
freeing CM object wakeup cr_thread to process
state change to CM_FREE.

Signed-off-by: Arlin Davis 
---
 dapl/openib_scm/cm.c |   39 ---
 1 files changed, 32 insertions(+), 7 deletions(-)

diff --git a/dapl/openib_scm/cm.c b/dapl/openib_scm/cm.c
index 4c8d4a1..975ffd5 100644
--- a/dapl/openib_scm/cm.c
+++ b/dapl/openib_scm/cm.c
@@ -436,6 +436,7 @@ void dapls_cm_free(dp_ib_cm_handle_t cm_ptr)
dapl_os_lock(&cm_ptr->lock);
cm_ptr->state = DCM_FREE;
while (cm_ptr->ref_count != 1) {
+   dapli_cm_thread_signal(cm_ptr);
dapl_os_unlock(&cm_ptr->lock);
dapl_os_sleep_usec(1);
dapl_os_lock(&cm_ptr->lock);
@@ -524,7 +525,9 @@ static void dapli_socket_connected(dp_ib_cm_handle_t 
cm_ptr, int err)
goto bail;
}
 
+   dapl_os_lock(&cm_ptr->lock);
cm_ptr->state = DCM_REP_PENDING;
+   dapl_os_unlock(&cm_ptr->lock);
 
/* send qp info and pdata to remote peer */
exp = sizeof(ib_cm_msg_t) - DCM_MAX_PDATA_SIZE;
@@ -836,7 +839,10 @@ static void dapli_socket_connect_rtu(dp_ib_cm_handle_t 
cm_ptr)
dapl_dbg_log(DAPL_DBG_TYPE_EP, " connect_rtu: send RTU\n");
 
/* complete handshake after final QP state change, Just ver+op */
+   dapl_os_lock(&cm_ptr->lock);
cm_ptr->state = DCM_CONNECTED;
+   dapl_os_unlock(&cm_ptr->lock);
+
cm_ptr->msg.op = ntohs(DCM_RTU);
if (send(cm_ptr->socket, (char *)&cm_ptr->msg, 4, 0) == -1) {
int err = dapl_socket_errno();
@@ -914,7 +920,10 @@ bail:
goto ud_bail;
 #endif
/* close socket, and post error event */
+   dapl_os_lock(&cm_ptr->lock);
cm_ptr->state = DCM_REJECTED;
+   dapl_os_unlock(&cm_ptr->lock);
+
dapl_evd_connection_callback(NULL, event, cm_ptr->msg.p_data,
 DCM_MAX_PDATA_SIZE, ep_ptr);
dapli_cm_free(cm_ptr);
@@ -1093,8 +1102,9 @@ static void dapli_socket_accept_data(ib_cm_srvc_handle_t 
acm_ptr)
}
p_data = acm_ptr->msg.p_data;
}
-
+   dapl_os_lock(&acm_ptr->lock);
acm_ptr->state = DCM_ACCEPTING_DATA;
+   dapl_os_unlock(&acm_ptr->lock);
 
dapl_dbg_log(DAPL_DBG_TYPE_CM,
 " ACCEPT: DST %s %x lid=0x%x, qpn=0x%x, psz=%d\n",
@@ -1235,7 +1245,9 @@ dapli_socket_accept_usr(DAPL_EP * ep_ptr,
dapl_os_memcpy(local.resv, cm_ptr->msg.resv, 4); 
 #endif
cm_ptr->hca = ia_ptr->hca_ptr;
+   dapl_os_lock(&cm_ptr->lock);
cm_ptr->state = DCM_ACCEPTED;
+   dapl_os_unlock(&cm_ptr->lock);
 
/* Link CM to EP, already queued on work thread */
dapl_ep_link_cm(ep_ptr, cm_ptr);
@@ -1305,7 +1317,9 @@ static void dapli_socket_accept_rtu(dp_ib_cm_handle_t 
cm_ptr)
}
 
/* save state and reference to EP, queue for disc event */
+   dapl_os_lock(&cm_ptr->lock);
cm_ptr->state = DCM_CONNECTED;
+   dapl_os_unlock(&cm_ptr->lock);
 
/* final data exchange if remote QP state is good to go */
dapl_dbg_log(DAPL_DBG_TYPE_EP, " PASSIVE: connected!\n");
@@ -1368,7 +1382,10 @@ bail:
if (cm_ptr->msg.saddr.ib.qp_type == IBV_QPT_UD) 
goto ud_bail;
 #endif
+   dapl_os_lock(&cm_ptr->lock);
cm_ptr->state = DCM_REJECTED;
+   dapl_os_unlock(&cm_ptr->lock);
+
dapls_cr_callback(cm_ptr, event, NULL, 0, cm_ptr->sp);
dapli_cm_free(cm_ptr);
 }
@@ -1759,47 +1776,55 @@ void cr_thread(void *arg)
 cr->socket);
 
/* data on listen, qp exchange, and on disc req */
+   dapl_os_lock(&cr->lock);
if ((ret == DAPL_FD_READ) || 
(cr->state != DCM_CONN_PENDING && ret == 
DAPL_FD_ERROR)) {
if (cr->socket != DAPL_INVALID_SOCKET) {
switch (cr->state) {
case DCM_LISTEN:
+   dapl_os_unlock(&cr->lock);
dapli_socket_accept(cr);
-   break;
+break;
case DCM_ACCEPTING:
+   dapl_os_unlock(&cr->lock);
dapli_socket_accept_data(cr);
break;
case DCM_ACCEPTED:
+

[PATCH 5/9] dapl-2.0: scm: SOCKOPT ERR Connection timed out on large clusters

2010-05-19 Thread Davis, Arlin R

Large scale all to all connections on +1500 cores
the listen backlog is reached and SYN's are dropped
which causes the connect to timeout. Retry connect
on timeout errors.

Signed-off-by: Arlin Davis 
---
 dapl/openib_scm/cm.c |   24 ++--
 1 files changed, 22 insertions(+), 2 deletions(-)

diff --git a/dapl/openib_scm/cm.c b/dapl/openib_scm/cm.c
index 7465190..4c8d4a1 100644
--- a/dapl/openib_scm/cm.c
+++ b/dapl/openib_scm/cm.c
@@ -60,6 +60,12 @@
 #include "dapl_ep_util.h"
 #include "dapl_osd.h"
 
+/* forward declarations */
+static DAT_RETURN
+dapli_socket_connect(DAPL_EP * ep_ptr,
+DAT_IA_ADDRESS_PTR r_addr,
+DAT_CONN_QUAL r_qual, DAT_COUNT p_size, DAT_PVOID p_data);
+
 #ifdef DAPL_DBG
 /* Check for EP linking to IA and proper connect state */
 void dapli_ep_check(DAPL_EP *ep)
@@ -494,13 +500,27 @@ static void dapli_socket_connected(dp_ib_cm_handle_t 
cm_ptr, int err)
 
if (err) {
dapl_log(DAPL_DBG_TYPE_ERR,
-" CONN_PENDING: %s ERR %s -> %s %d\n",
+" CONN_PENDING: %s ERR %s -> %s %d - %s\n",
 err == -1 ? "POLL" : "SOCKOPT",
 err == -1 ? strerror(dapl_socket_errno()) : 
strerror(err), 
 inet_ntoa(((struct sockaddr_in *)
&cm_ptr->addr)->sin_addr), 
 ntohs(((struct sockaddr_in *)
-   &cm_ptr->addr)->sin_port));
+   &cm_ptr->addr)->sin_port),
+err == ETIMEDOUT ? "RETRYING...":"ABORTING");
+
+   /* retry a timeout */
+   if (err == ETIMEDOUT) {
+   closesocket(cm_ptr->socket);
+   cm_ptr->socket = DAPL_INVALID_SOCKET;
+   dapli_socket_connect(cm_ptr->ep, 
(DAT_IA_ADDRESS_PTR)&cm_ptr->addr, 
+ntohs(((struct sockaddr_in 
*)&cm_ptr->addr)->sin_port) - 1000,
+ntohs(cm_ptr->msg.p_size), 
&cm_ptr->msg.p_data);
+   dapl_ep_unlink_cm(cm_ptr->ep, cm_ptr);
+   dapli_cm_free(cm_ptr);
+   return;
+   }
+
goto bail;
}
 
-- 
1.5.2.5

--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 4/9] dapl-2.0: ucm: UD mode, active side cm object released to soon, the RTU could be lost.

2010-05-19 Thread Davis, Arlin R

Will see following message with DAPL_DBG_TYPE set for Errors & Warnings (0x3):
ucm_recv: NO MATCH op REP 0x120 65487 i0x60005e c0x60005e < 0xd2 19824 0x60006a

The cm object was released on the active side after the connection
was established, RTU sent. This is a problem if the RTU is lost
and the remote side retries the REPLY. The RTU is never resent.
Keep the cm object until the EP is destroyed.

Signed-off-by: Arlin Davis 
---
 dapl/openib_ucm/cm.c |8 
 1 files changed, 0 insertions(+), 8 deletions(-)

diff --git a/dapl/openib_ucm/cm.c b/dapl/openib_ucm/cm.c
index c82147e..94af988 100644
--- a/dapl/openib_ucm/cm.c
+++ b/dapl/openib_ucm/cm.c
@@ -1143,10 +1143,6 @@ ud_bail:
(DAT_COUNT)ntohs(cm->msg.p_size),
(DAT_PVOID *)cm->msg.p_data,
(DAT_PVOID *)&xevent);
-
-   /* release cm_ptr, EP refs will prevent destroy */
-   dapli_cm_free(cm);
-   
} else
 #endif
{
@@ -1310,10 +1306,6 @@ static void ucm_accept_rtu(dp_ib_cm_handle_t cm, 
ib_cm_msg_t *msg)
(DAT_COUNT)ntohs(cm->msg.p_size),
(DAT_PVOID *)cm->msg.p_data,
(DAT_PVOID *)&xevent);
-
-/* done with CM object, EP ref will hold object for pdata */
-   dapli_cm_free(cm);
-   
} else {
 #endif
dapls_cr_callback(cm, IB_CME_CONNECTED, NULL, 0, cm->sp);
-- 
1.5.2.5

--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 1/9] dapl-2.0: scm: remove modify QP to ERR state during disconnect on UD type QP

2010-05-19 Thread Davis, Arlin R

Patch set of bug fixes as a result of scale-out testing on 128 nodes/1538 cores.

1/9 scm: remove modify QP to ERR state during disconnect on UD type QP
2/9 ucm: increase default UCM retry count for connect reply to 15
3/9 cma, ucm: cleanup issues with dat_ep_free on a connected EP without 
disconnecting.
4/9 ucm: UD mode, active side cm object released to soon, the RTU could be lost.
5/9 scm: SOCKOPT ERR Connection timed out on large clusters
6/9 scm: cr_thread occasionally segv's when disconnecting all-to-all MPI static 
connections
7/9 scm: add option to use other network devices with environment variable 
DAPL_SCM_NETDEV
8/9 scm, cma: fini code can be called multiple times and hang via fork
9/9 scm: check for hca object before signaling thread

The disconnect on a UD type QP should not modify QP to error
since this is a shared QP. The disconnect should be treated
as a NOP on the UD type QP and only be transitioned during
the QP destroy (dat_ep_free).

Signed-off-by: Arlin Davis 
---
 dapl/openib_scm/cm.c |6 +++---
 1 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/dapl/openib_scm/cm.c b/dapl/openib_scm/cm.c
index afd0d93..7465190 100644
--- a/dapl/openib_scm/cm.c
+++ b/dapl/openib_scm/cm.c
@@ -458,13 +458,13 @@ DAT_RETURN dapli_socket_disconnect(dp_ib_cm_handle_t 
cm_ptr)
dapl_os_unlock(&cm_ptr->lock);

/* send disc date, close socket, schedule destroy */
-   dapl_os_lock(&cm_ptr->ep->header.lock);
-   dapls_modify_qp_state(cm_ptr->ep->qp_handle, IBV_QPS_ERR, 0,0,0);
-   dapl_os_unlock(&cm_ptr->ep->header.lock);
send(cm_ptr->socket, (char *)&disc_data, sizeof(disc_data), 0);
 
/* disconnect events for RC's only */
if (cm_ptr->ep->param.ep_attr.service_type == DAT_SERVICE_TYPE_RC) {
+   dapl_os_lock(&cm_ptr->ep->header.lock);
+   dapls_modify_qp_state(cm_ptr->ep->qp_handle, IBV_QPS_ERR, 
0,0,0);
+   dapl_os_unlock(&cm_ptr->ep->header.lock);
if (cm_ptr->ep->cr_ptr) {
dapls_cr_callback(cm_ptr,
  IB_CME_DISCONNECTED,
-- 
1.5.2.5

--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 12/12] dapl-2.0: common: EP links to EVD, PZ incorrectly released before provider CM objects freed.

2010-05-19 Thread Davis, Arlin R

unlink/clear references after ALL CM objects linked to EP are freed.
Otherwise, event processing via CM objects could reference the handles
still linked to EP. After CM objects are freed (blocking) these handles
linked to EP are guaranteed not to refereence from underlying provider.

Signed-off-by: Arlin Davis 
---
 dapl/common/dapl_ep_free.c |   28 ++--
 1 files changed, 14 insertions(+), 14 deletions(-)

diff --git a/dapl/common/dapl_ep_free.c b/dapl/common/dapl_ep_free.c
index 8708e6f..3bfc541 100644
--- a/dapl/common/dapl_ep_free.c
+++ b/dapl/common/dapl_ep_free.c
@@ -110,6 +110,20 @@ DAT_RETURN DAT_API dapl_ep_free(IN DAT_EP_HANDLE ep_handle)
 */
(void)dapl_ep_disconnect(ep_ptr, DAT_CLOSE_ABRUPT_FLAG);
 
+   /* Free all CM objects */
+   cm_ptr = (dapl_llist_is_empty(&ep_ptr->cm_list_head)
+ ? NULL : dapl_llist_peek_head(&ep_ptr->cm_list_head));
+   while (cm_ptr != NULL) {
+dapl_log(DAPL_DBG_TYPE_EP,
+"dapl_ep_free: Free CM: EP=%p CM=%p\n",
+ep_ptr, cm_ptr);
+
+   next_cm_ptr = dapl_llist_next_entry(&ep_ptr->cm_list_head,
+   &cm_ptr->list_entry);
+   dapls_cm_free(cm_ptr); /* blocking call */
+   cm_ptr = next_cm_ptr;
+   }
+
/*
 * Do verification of parameters and the state change atomically.
 */
@@ -188,20 +202,6 @@ DAT_RETURN DAT_API dapl_ep_free(IN DAT_EP_HANDLE ep_handle)
}
}
 
-   /* Free all CM objects */
-   cm_ptr = (dapl_llist_is_empty(&ep_ptr->cm_list_head)
- ? NULL : dapl_llist_peek_head(&ep_ptr->cm_list_head));
-   while (cm_ptr != NULL) {
-dapl_log(DAPL_DBG_TYPE_EP,
-"dapl_ep_free: Free CM: EP=%p CM=%p\n",
-ep_ptr, cm_ptr);
-
-   next_cm_ptr = dapl_llist_next_entry(&ep_ptr->cm_list_head,
-   &cm_ptr->list_entry);
-   dapls_cm_free(cm_ptr); /* blocking call */
-   cm_ptr = next_cm_ptr;
-   }
-
/* Free the resource */
dapl_ep_dealloc(ep_ptr);
 
-- 
1.5.2.5

--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 11/12] dapl-2.0: ucm: set timer during RTU_PENDING state change

2010-05-19 Thread Davis, Arlin R

The timer thread may pick up an unitialized timer
value and timeout before the reply was sent.

Signed-off-by: Arlin Davis 
---
 dapl/openib_ucm/cm.c |1 +
 1 files changed, 1 insertions(+), 0 deletions(-)

diff --git a/dapl/openib_ucm/cm.c b/dapl/openib_ucm/cm.c
index 8fed8f6..5d5e7d2 100644
--- a/dapl/openib_ucm/cm.c
+++ b/dapl/openib_ucm/cm.c
@@ -1486,6 +1486,7 @@ dapli_accept_usr(DAPL_EP *ep, DAPL_CR *cr, DAT_COUNT 
p_size, DAT_PVOID p_data)
cm->hca = ia->hca_ptr;

dapl_os_lock(&cm->lock);
+   dapl_os_get_time(&cm->timer); /* RTU expected */
cm->state = DCM_RTU_PENDING;
dapl_os_unlock(&cm->lock);
 
-- 
1.5.2.5

--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 09/12] dapl-2.0: scm: add EP locking and cm checking to socket cm disconnect

2010-05-19 Thread Davis, Arlin R

Signed-off-by: Arlin Davis 
---
 dapl/openib_scm/cm.c |7 +--
 1 files changed, 5 insertions(+), 2 deletions(-)

diff --git a/dapl/openib_scm/cm.c b/dapl/openib_scm/cm.c
index b6ffbe9..afd0d93 100644
--- a/dapl/openib_scm/cm.c
+++ b/dapl/openib_scm/cm.c
@@ -1410,11 +1410,14 @@ dapls_ib_disconnect(IN DAPL_EP * ep_ptr, IN 
DAT_CLOSE_FLAGS close_flags)
 {
dp_ib_cm_handle_t cm_ptr = dapl_get_cm_from_ep(ep_ptr);
 
+   dapl_os_lock(&ep_ptr->header.lock);
if (ep_ptr->param.ep_state == DAT_EP_STATE_DISCONNECTED ||
-   ep_ptr->param.ep_attr.service_type != DAT_SERVICE_TYPE_RC) {
+   ep_ptr->param.ep_attr.service_type != DAT_SERVICE_TYPE_RC ||
+   cm_ptr == NULL) {
+   dapl_os_unlock(&ep_ptr->header.lock);
return DAT_SUCCESS;
} 
-   
+   dapl_os_unlock(&ep_ptr->header.lock);
return (dapli_socket_disconnect(cm_ptr));
 }
 
-- 
1.5.2.5

--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 10/12] dapl-2.0: ucm: fix issues with new EP to CM linking changes

2010-05-19 Thread Davis, Arlin R

Add EP locking around QP modify
Remove release during disconnect event processing
Add check in cm_free to check state and schedule thread if necessary.
Add some additional debugging
Add processing in disconnect_clean for conn_req timeout
Remove extra CR's

Signed-off-by: Arlin Davis 
---
 dapl/openib_ucm/cm.c |  107 -
 1 files changed, 78 insertions(+), 29 deletions(-)

diff --git a/dapl/openib_ucm/cm.c b/dapl/openib_ucm/cm.c
index 85c8b4b..8fed8f6 100644
--- a/dapl/openib_ucm/cm.c
+++ b/dapl/openib_ucm/cm.c
@@ -392,13 +392,13 @@ static void ucm_process_recv(ib_hca_transport_t *tp,
cm->msg.op = htons(DCM_DREP);
ucm_send(&cm->hca->ib_trans, &cm->msg, NULL, 0); 

-   }
-   /* UD reply retried ok to ignore, any other print warning */
-   if (ntohs(msg->op) != DCM_REP) {
+   } else if (ntohs(msg->op) != DCM_DREP){
+   /* DREP ok to ignore, any other print warning */
dapl_log(DAPL_DBG_TYPE_WARN,
-   " ucm_recv: UNKNOWN operation"
-   " <- op %d, %s spsp %d sqpn %d\n", 
-   ntohs(msg->op), dapl_cm_state_str(cm->state),
+   " ucm_recv: UNEXPECTED MSG on cm %p"
+   " <- op %s, st %s spsp %d sqpn %d\n", 
+   cm, dapl_cm_op_str(ntohs(msg->op)),
+   dapl_cm_state_str(cm->state),
ntohs(msg->sport), ntohl(msg->sqpn));
}
dapl_os_unlock(&cm->lock);
@@ -635,11 +635,11 @@ void dapls_cm_acquire(dp_ib_cm_handle_t cm)
 void dapls_cm_release(dp_ib_cm_handle_t cm)
 {
dapl_os_lock(&cm->lock);
-   cm->ref_count--;
-   if (cm->ref_count) {
-dapl_os_unlock(&cm->lock);
-   return;
-   }
+   cm->ref_count--;
+   if (cm->ref_count) {
+dapl_os_unlock(&cm->lock);
+   return;
+   }
/* client, release local conn id port */
if (!cm->sp && cm->msg.sport)
ucm_free_port(&cm->hca->ib_trans, ntohs(cm->msg.sport));
@@ -652,9 +652,9 @@ void dapls_cm_release(dp_ib_cm_handle_t cm)
if (cm->ah) {
ibv_destroy_ah(cm->ah);
cm->ah = NULL;
-   }
-   dapl_os_unlock(&cm->lock);
-   dapli_cm_dealloc(cm);
+   }
+   dapl_os_unlock(&cm->lock);
+   dapli_cm_dealloc(cm);
 }
 
 dp_ib_cm_handle_t dapls_ib_cm_create(DAPL_EP *ep)
@@ -710,6 +710,11 @@ bail:
 /* schedule destruction of CM object */
 void dapli_cm_free(dp_ib_cm_handle_t cm)
 {
+   dapl_log(DAPL_DBG_TYPE_CM,
+" dapli_cm_free: cm %p %s ep %p refs=%d\n", 
+cm, dapl_cm_state_str(cm->state),
+cm->ep, cm->ref_count);
+
dapl_os_lock(&cm->lock);
cm->state = DCM_FREE;
dapls_thread_signal(&cm->hca->ib_trans.signal);
@@ -720,15 +725,18 @@ void dapli_cm_free(dp_ib_cm_handle_t cm)
 void dapls_cm_free(dp_ib_cm_handle_t cm)
 {
dapl_log(DAPL_DBG_TYPE_CM,
-" cm_free: cm %p %s ep %p refs=%d\n", 
+" dapl_cm_free: cm %p %s ep %p refs=%d\n", 
 cm, dapl_cm_state_str(cm->state),
 cm->ep, cm->ref_count);

/* free from internal workq, wait until EP is last ref */
dapl_os_lock(&cm->lock);
-   cm->state = DCM_FREE;
+   if (cm->state != DCM_FREE) 
+   cm->state = DCM_FREE;
+   
while (cm->ref_count != 1) {
dapl_os_unlock(&cm->lock);
+   dapls_thread_signal(&cm->hca->ib_trans.signal);
dapl_os_sleep_usec(1);
dapl_os_lock(&cm->lock);
}
@@ -804,8 +812,6 @@ static void ucm_disconnect_final(dp_ib_cm_handle_t cm)
else
dapl_evd_connection_callback(cm, IB_CME_DISCONNECTED, NULL, 0, 
cm->ep);
 
-   /* free local resources, EP ref will prevent destory until dat_ep_free 
*/
-   dapls_cm_release(cm);
 }
 
 /*
@@ -815,6 +821,7 @@ static void ucm_disconnect_final(dp_ib_cm_handle_t cm)
 DAT_RETURN dapli_cm_disconnect(dp_ib_cm_handle_t cm)
 {
int finalize = 1;
+   int wakeup = 0;
 
dapl_os_lock(&cm->lock);
switch (cm->state) {
@@ -826,8 +833,8 @@ DAT_RETURN dapli_cm_disconnect(dp_ib_cm_handle_t cm)
/* send DREQ, event after DREP or DREQ timeout */
cm->state = DCM_DISC_PENDING;
cm->msg.op = htons(DCM_DREQ);
-   finalize = 0; /* wait for DREP, wakeup timer thread */
-   dapls_thread_signal(&cm->hca->ib_trans.signal);
+   finalize = 0; /* wait for DREP, wakeup timer after DREQ sent */
+   wakeup = 1;
break;
case DCM_DISC_PENDING:
/* DREQ timeo

[PATCH 05/12] dapl-2.0: ibal: changes for EP to CM linking and synchronization

2010-05-19 Thread Davis, Arlin R

Windows IBAL changes to allocate and manage CM objects
and to link them to the EP. This will insure the CM
IBAL objects and cm_id's are not destroy before EP.
Remove windows only ibal_cm_handle in EP structure.

Signed-off-by: Arlin Davis 
---
 dapl/common/dapl_cr_util.c |2 +-
 dapl/common/dapl_ep_util.c |7 -
 dapl/ibal/dapl_ibal_cm.c   |  274 ++--
 dapl/ibal/dapl_ibal_qp.c   |   14 ++-
 dapl/ibal/dapl_ibal_util.h |   23 -
 dapl/ibal/udapl.rc |4 +-
 dapl/include/dapl.h|1 -
 7 files changed, 224 insertions(+), 101 deletions(-)

diff --git a/dapl/common/dapl_cr_util.c b/dapl/common/dapl_cr_util.c
index 39b61ad..5970fa0 100644
--- a/dapl/common/dapl_cr_util.c
+++ b/dapl/common/dapl_cr_util.c
@@ -81,7 +81,7 @@ DAPL_CR *dapls_cr_alloc(DAPL_IA * ia_ptr)
 /*
  * dapls_cr_free
  *
- * Free the passed in EP structure.
+ * Free the passed in CR structure.
  *
  * Input:
  * entry point pointer
diff --git a/dapl/common/dapl_ep_util.c b/dapl/common/dapl_ep_util.c
index daad78d..9aff242 100644
--- a/dapl/common/dapl_ep_util.c
+++ b/dapl/common/dapl_ep_util.c
@@ -214,13 +214,6 @@ void dapl_ep_dealloc(IN DAPL_EP * ep_ptr)
if (NULL != ep_ptr->cxn_timer) {
dapl_os_free(ep_ptr->cxn_timer, sizeof(DAPL_OS_TIMER));
}
-#if defined(_WIN32) || defined(_WIN64)
-   if (ep_ptr->ibal_cm_handle) {
-   dapl_os_free(ep_ptr->ibal_cm_handle,
-sizeof(*ep_ptr->ibal_cm_handle));
-   ep_ptr->ibal_cm_handle = NULL;
-   }
-#endif

 #ifdef DAPL_COUNTERS
dapl_os_free(ep_ptr->cntrs, sizeof(DAT_UINT64) * DCNT_EP_ALL_COUNTERS);
diff --git a/dapl/ibal/dapl_ibal_cm.c b/dapl/ibal/dapl_ibal_cm.c
index c51faf8..e3c12ff 100644
--- a/dapl/ibal/dapl_ibal_cm.c
+++ b/dapl/ibal/dapl_ibal_cm.c
@@ -94,7 +94,7 @@ void dapli_print_private_data( char *prefix, const uint8_t 
*pd, int len )
 if ( !pd || len <= 0 )
return;

-dapl_log ( DAPL_DBG_TYPE_CM, "--> %s: private_data:\n",prefix);
+dapl_log ( DAPL_DBG_TYPE_CM, "--> %s: private_data(len %d)\n
",prefix,len);

 if (len > IB_MAX_REP_PDATA_SIZE)
 {
@@ -107,13 +107,70 @@ void dapli_print_private_data( char *prefix, const 
uint8_t *pd, int len )
 for ( i = 0 ; i < len; i++ )
 {
dapl_log ( DAPL_DBG_TYPE_CM, "%2x ", pd[i]);
-   if ( ((i+1) % 20) == 0 )
+   if ( ((i+1) % 5) == 0 )
dapl_log ( DAPL_DBG_TYPE_CM, "\n");
 }
dapl_log ( DAPL_DBG_TYPE_CM, "\n");
 }
 #endif

+/* EP-CM linking support */
+dp_ib_cm_handle_t ibal_cm_alloc(void)
+{
+   dp_ib_cm_handle_t cm_ptr;
+
+   /* Allocate CM, init lock, and initialize */
+   if ((cm_ptr = dapl_os_alloc(sizeof(*cm_ptr))) == NULL)
+   return NULL;
+
+   (void)dapl_os_memzero(cm_ptr, sizeof(*cm_ptr));
+   cm_ptr->ref_count = 1;
+
+   if (dapl_os_lock_init(&cm_ptr->lock)) {
+   dapl_os_free(cm_ptr, sizeof(*cm_ptr));
+   return NULL;
+   }
+
+   dapl_llist_init_entry((DAPL_LLIST_ENTRY *)&cm_ptr->list_entry);
+
+   return cm_ptr;
+}
+
+/* free CM object resources */
+static void ibal_cm_dealloc(dp_ib_cm_handle_t cm_ptr)
+{
+   dapl_os_assert(!cm_ptr->ref_count);
+   dapl_os_lock_destroy(&cm_ptr->lock);
+   dapl_os_free(cm_ptr, sizeof(*cm_ptr));
+}
+
+void dapls_cm_acquire(dp_ib_cm_handle_t cm_ptr)
+{
+   dapl_os_lock(&cm_ptr->lock);
+   cm_ptr->ref_count++;
+   dapl_os_unlock(&cm_ptr->lock);
+}
+
+void dapls_cm_release(dp_ib_cm_handle_t cm_ptr)
+{
+   dapl_os_lock(&cm_ptr->lock);
+   cm_ptr->ref_count--;
+   if (cm_ptr->ref_count) {
+dapl_os_unlock(&cm_ptr->lock);
+   return;
+   }
+   dapl_os_unlock(&cm_ptr->lock);
+   ibal_cm_dealloc(cm_ptr);
+}
+
+/* blocking: called from user thread dapl_ep_free() only */
+void dapls_cm_free(dp_ib_cm_handle_t cm_ptr)
+{
+   dapl_ep_unlink_cm(cm_ptr->ep, cm_ptr);
+
+   /* final reference, alloc */
+   dapls_cm_release(cm_ptr);
+}

 static void
 dapli_ib_cm_apr_cb (
@@ -147,6 +204,7 @@ dapli_ib_cm_dreq_cb (
 ib_cm_drep_tcm_drep;
 DAPL_EP *ep_ptr;
 intbail=10;
+dp_ib_cm_handle_t  cm_ptr;

 dapl_os_assert (p_cm_dreq_rec);

@@ -168,6 +226,14 @@ dapli_ib_cm_dreq_cb (
   DAPL_MAGIC_EP );
 return;
 }
+cm_ptr = dapl_get_cm_from_ep(ep_ptr);
+if (!cm_ptr)
+{
+dapl_dbg_log (DAPL_DBG_TYPE_ERR,
+  "--> %s: !CM_PTR on EP %p\n", __FUNCTION__, ep_ptr);
+return;
+}
+dapl_os_assert(cm_ptr->ib_cm.h_qp == p_cm_dreq_rec->h_cm_dreq.h_qp);

 dapl_dbg_log (DAPL_DBG_TYPE_CM,
   "--> %s() EP %p, %s sent_discreq %s\n",
@@ -210,10 +276,8 @@ dapli_ib_cm_dreq_cb (

 if (ep_ptr->cr_ptr)
 {
-dapl_os_assert(ep_ptr->ibal_cm_handle->cid
-  

[PATCH 03/12] dapl-2.0: scm, cma, ucm: consolidate dat event/provider event translation

2010-05-19 Thread Davis, Arlin R

Signed-off-by: Arlin Davis 
---
 dapl/openib_cma/cm.c  |  108 ---
 dapl/openib_common/util.c |  107 +++
 dapl/openib_scm/cm.c  |  112 ++---
 dapl/openib_ucm/cm.c  |   99 ---
 4 files changed, 111 insertions(+), 315 deletions(-)

diff --git a/dapl/openib_cma/cm.c b/dapl/openib_cma/cm.c
index a85e6ae..1e846aa 100644
--- a/dapl/openib_cma/cm.c
+++ b/dapl/openib_cma/cm.c
@@ -1051,114 +1051,6 @@ int dapls_ib_private_data_size(IN DAPL_HCA * hca_ptr)
return RDMA_MAX_PRIVATE_DATA;
 }
 
-/*
- * Map all CMA event codes to the DAT equivelent.
- */
-#define DAPL_IB_EVENT_CNT  13
-
-static struct ib_cm_event_map {
-   const ib_cm_events_t ib_cm_event;
-   DAT_EVENT_NUMBER dat_event_num;
-} ib_cm_event_map[DAPL_IB_EVENT_CNT] = {
-   /* 00 */  {
-   IB_CME_CONNECTED, DAT_CONNECTION_EVENT_ESTABLISHED},
-   /* 01 */  {
-   IB_CME_DISCONNECTED, DAT_CONNECTION_EVENT_DISCONNECTED},
-   /* 02 */  {
-   IB_CME_DISCONNECTED_ON_LINK_DOWN,
-   DAT_CONNECTION_EVENT_DISCONNECTED},
-   /* 03 */  {
-   IB_CME_CONNECTION_REQUEST_PENDING, DAT_CONNECTION_REQUEST_EVENT},
-   /* 04 */  {
-   IB_CME_CONNECTION_REQUEST_PENDING_PRIVATE_DATA,
-   DAT_CONNECTION_REQUEST_EVENT},
-   /* 05 */  {
-   IB_CME_CONNECTION_REQUEST_ACKED, DAT_CONNECTION_REQUEST_EVENT},
-   /* 06 */  {
-   IB_CME_DESTINATION_REJECT,
-   DAT_CONNECTION_EVENT_NON_PEER_REJECTED},
-   /* 07 */  {
-   IB_CME_DESTINATION_REJECT_PRIVATE_DATA,
-   DAT_CONNECTION_EVENT_PEER_REJECTED},
-   /* 08 */  {
-   IB_CME_DESTINATION_UNREACHABLE, DAT_CONNECTION_EVENT_UNREACHABLE},
-   /* 09 */  {
-   IB_CME_TOO_MANY_CONNECTION_REQUESTS,
-   DAT_CONNECTION_EVENT_NON_PEER_REJECTED},
-   /* 10 */  {
-   IB_CME_LOCAL_FAILURE, DAT_CONNECTION_EVENT_BROKEN},
-   /* 11 */  {
-   IB_CME_BROKEN, DAT_CONNECTION_EVENT_BROKEN},
-   /* 12 */  {
-IB_CME_TIMEOUT, DAT_CONNECTION_EVENT_TIMED_OUT},};
-
-/*
- * dapls_ib_get_cm_event
- *
- * Return a DAT connection event given a provider CM event.
- *
- * Input:
- * dat_event_num   DAT event we need an equivelent CM event for
- *
- * Output:
- * none
- *
- * Returns:
- * ib_cm_event of translated DAPL value
- */
-DAT_EVENT_NUMBER
-dapls_ib_get_dat_event(IN const ib_cm_events_t ib_cm_event,
-  IN DAT_BOOLEAN active)
-{
-   DAT_EVENT_NUMBER dat_event_num;
-   int i;
-
-   active = active;
-
-   dat_event_num = 0;
-   for (i = 0; i < DAPL_IB_EVENT_CNT; i++) {
-   if (ib_cm_event == ib_cm_event_map[i].ib_cm_event) {
-   dat_event_num = ib_cm_event_map[i].dat_event_num;
-   break;
-   }
-   }
-   dapl_dbg_log(DAPL_DBG_TYPE_CALLBACK,
-"dapls_ib_get_dat_event: event(%s) ib=0x%x dat=0x%x\n",
-active ? "active" : "passive", ib_cm_event, dat_event_num);
-
-   return dat_event_num;
-}
-
-/*
- * dapls_ib_get_dat_event
- *
- * Return a DAT connection event given a provider CM event.
- * 
- * Input:
- * ib_cm_event event provided to the dapl callback routine
- * active  switch indicating active or passive connection
- *
- * Output:
- * none
- *
- * Returns:
- * DAT_EVENT_NUMBER of translated provider value
- */
-ib_cm_events_t dapls_ib_get_cm_event(IN DAT_EVENT_NUMBER dat_event_num)
-{
-   ib_cm_events_t ib_cm_event;
-   int i;
-
-   ib_cm_event = 0;
-   for (i = 0; i < DAPL_IB_EVENT_CNT; i++) {
-   if (dat_event_num == ib_cm_event_map[i].dat_event_num) {
-   ib_cm_event = ib_cm_event_map[i].ib_cm_event;
-   break;
-   }
-   }
-   return ib_cm_event;
-}
-
 void dapli_cma_event_cb(void)
 {
struct rdma_cm_event *event;
diff --git a/dapl/openib_common/util.c b/dapl/openib_common/util.c
index 3963e1f..704d85a 100644
--- a/dapl/openib_common/util.c
+++ b/dapl/openib_common/util.c
@@ -471,3 +471,110 @@ void dapls_query_provider_specific_attr(IN DAPL_IA * 
ia_ptr,
/* set MTU to actual settings */
ib_attrs[0].value = ia_ptr->hca_ptr->ib_trans.named_attr.value;
 }
+
+/*
+ * Map all socket CM event codes to the DAT equivelent. Common to all providers
+ */
+#define DAPL_IB_EVENT_CNT  13
+
+static struct ib_cm_event_map {
+   const ib_cm_events_t ib_cm_event;
+   DAT_EVENT_NUMBER dat_event_num;
+} ib_cm_event_map[DAPL_IB_EVENT_CNT] = {
+/* 00 */ {IB_CME_CONNECTED, 
+ DAT_CONNECTION_EVENT_ESTABLISHED},
+/* 01 */ {IB_CME_DISCONNECTED, 
+ DAT_CONNECTION_EVENT_DISCONNECTED},
+/* 02 */ {IB_CME_DISCONNECTED_ON_LINK_DOWN,
+ DAT_CONNECTION_EVENT_DI

[PATCH 08/12] dapl-2.0: scm: new cm_ep linking broke UD mode over socket cm

2010-05-19 Thread Davis, Arlin R

Add EP locking around modify_qp for EP state.
Add new dapli_ep_check for debugging EP
Cleanup extra CR's
Change socket errno to dapl_socket_errno() abstraction

Signed-off-by: Arlin Davis 
---
 dapl/openib_scm/cm.c |  177 --
 1 files changed, 128 insertions(+), 49 deletions(-)

diff --git a/dapl/openib_scm/cm.c b/dapl/openib_scm/cm.c
index 6958b67..b6ffbe9 100644
--- a/dapl/openib_scm/cm.c
+++ b/dapl/openib_scm/cm.c
@@ -60,6 +60,48 @@
 #include "dapl_ep_util.h"
 #include "dapl_osd.h"

+#ifdef DAPL_DBG
+/* Check for EP linking to IA and proper connect state */
+void dapli_ep_check(DAPL_EP *ep)
+{
+   DAPL_IA *ia_ptr = ep->header.owner_ia;
+   DAPL_EP *ep_ptr, *next_ep_ptr;
+   int found = 0;
+
+   dapl_os_lock(&ia_ptr->header.lock);
+   ep_ptr = (dapl_llist_is_empty (&ia_ptr->ep_list_head)
+   ? NULL : dapl_llist_peek_head (&ia_ptr->ep_list_head));
+
+   while (ep_ptr != NULL) {
+   next_ep_ptr =
+   dapl_llist_next_entry(&ia_ptr->ep_list_head,
+ &ep_ptr->header.ia_list_entry);
+   if (ep == ep_ptr) {
+   found++;
+   if ((ep->cr_ptr && ep->param.ep_state
+   != DAT_EP_STATE_COMPLETION_PENDING) ||
+   (!ep->cr_ptr && ep->param.ep_state
+   != DAT_EP_STATE_ACTIVE_CONNECTION_PENDING))
+   goto err;
+   else
+   goto match;
+   }
+   ep_ptr = next_ep_ptr;
+   }
+err:
+   dapl_log(DAPL_DBG_TYPE_ERR,
+" dapli_ep_check ERR: %s %s ep=%p state=%d magic=0x%x\n",
+ep->cr_ptr ? "PASSIVE":"ACTIVE",
+found ? "WRONG_STATE":"NOT_FOUND" ,
+ep, ep->param.ep_state, ep->header.magic);
+match:
+   dapl_os_unlock(&ia_ptr->header.lock);
+   return;
+}
+#else
+#define dapli_ep_check(ep)
+#endif
+
 #if defined(_WIN32) || defined(_WIN64)
 enum DAPL_FD_EVENTS {
DAPL_FD_READ = 0x1,
@@ -311,13 +353,13 @@ void dapls_cm_acquire(dp_ib_cm_handle_t cm_ptr)
 void dapls_cm_release(dp_ib_cm_handle_t cm_ptr)
 {
dapl_os_lock(&cm_ptr->lock);
-   cm_ptr->ref_count--;
-   if (cm_ptr->ref_count) {
-dapl_os_unlock(&cm_ptr->lock);
-   return;
-   }
-   dapl_os_unlock(&cm_ptr->lock);
-   dapli_cm_dealloc(cm_ptr);
+   cm_ptr->ref_count--;
+   if (cm_ptr->ref_count) {
+dapl_os_unlock(&cm_ptr->lock);
+   return;
+   }
+   dapl_os_unlock(&cm_ptr->lock);
+   dapli_cm_dealloc(cm_ptr);
 }

 static dp_ib_cm_handle_t dapli_cm_alloc(DAPL_EP *ep_ptr)
@@ -416,7 +458,9 @@ DAT_RETURN dapli_socket_disconnect(dp_ib_cm_handle_t cm_ptr)
dapl_os_unlock(&cm_ptr->lock);

/* send disc date, close socket, schedule destroy */
+   dapl_os_lock(&cm_ptr->ep->header.lock);
dapls_modify_qp_state(cm_ptr->ep->qp_handle, IBV_QPS_ERR, 0,0,0);
+   dapl_os_unlock(&cm_ptr->ep->header.lock);
send(cm_ptr->socket, (char *)&disc_data, sizeof(disc_data), 0);

/* disconnect events for RC's only */
@@ -452,7 +496,7 @@ static void dapli_socket_connected(dp_ib_cm_handle_t 
cm_ptr, int err)
dapl_log(DAPL_DBG_TYPE_ERR,
 " CONN_PENDING: %s ERR %s -> %s %d\n",
 err == -1 ? "POLL" : "SOCKOPT",
-err == -1 ? strerror(errno) : strerror(err),
+err == -1 ? strerror(dapl_socket_errno()) : 
strerror(err),
 inet_ntoa(((struct sockaddr_in *)
&cm_ptr->addr)->sin_addr),
 ntohs(((struct sockaddr_in *)
@@ -475,9 +519,10 @@ static void dapli_socket_connected(dp_ib_cm_handle_t 
cm_ptr, int err)
}

if (len != (exp + ntohs(cm_ptr->msg.p_size))) {
+   int err = dapl_socket_errno();
dapl_log(DAPL_DBG_TYPE_ERR,
-" CONN_PENDING len ERR %s, wcnt=%d(%d) -> %s\n",
-strerror(errno), len,
+" CONN_PENDING len ERR 0x%x %s, wcnt=%d(%d) -> %s\n",
+err, strerror(err), len,
 exp + ntohs(cm_ptr->msg.p_size),
 inet_ntoa(((struct sockaddr_in *)
   ep_ptr->param.
@@ -530,16 +575,19 @@ dapli_socket_connect(DAPL_EP * ep_ptr,
/* create, connect, sockopt, and exchange QP information */
if ((cm_ptr->socket =
 socket(AF_INET, SOCK_STREAM, IPPROTO_TCP)) == DAPL_INVALID_SOCKET) 
{
+   int err = dapl_socket_errno();
dapl_log(DAPL_DBG_TYPE_ERR,
-" connect: socket create ERR %s\n", strerror(errno));
+" connect: socket c

[PATCH 07/12] dapl-2.0: common: dat_ep_connect should not set timer UD endpoints

2010-05-19 Thread Davis, Arlin R

connect for UD type is simply AH resolution and doesn't
need timed. The common code is not designed to handle
multiple timed events on connect requests so just ignore
timing UD AH requests.

Signed-off-by: Arlin Davis 
---
 dapl/common/dapl_ep_connect.c |3 ++-
 1 files changed, 2 insertions(+), 1 deletions(-)

diff --git a/dapl/common/dapl_ep_connect.c b/dapl/common/dapl_ep_connect.c
index 1f193ae..9b5829e 100755
--- a/dapl/common/dapl_ep_connect.c
+++ b/dapl/common/dapl_ep_connect.c
@@ -327,7 +327,8 @@ dapl_ep_connect(IN DAT_EP_HANDLE ep_handle,
dapl_os_lock(&ep_ptr->header.lock);
if (ep_ptr->param.ep_state ==
DAT_EP_STATE_ACTIVE_CONNECTION_PENDING
-   && timeout != DAT_TIMEOUT_INFINITE) {
+   && timeout != DAT_TIMEOUT_INFINITE &&
+   ep_ptr->param.ep_attr.service_type == DAT_SERVICE_TYPE_RC) {
ep_ptr->cxn_timer =
(DAPL_OS_TIMER *)
dapl_os_alloc(sizeof(DAPL_OS_TIMER));
-- 
1.5.2.5

--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 06/12] dapl-2.0: ucm: fix error path during accept_usr reply failure

2010-05-19 Thread Davis, Arlin R

if accept_usr fails when sending reply the EP was
being linked to CM instead of properly unlinked.

Signed-off-by: Arlin Davis 
---
 dapl/openib_ucm/cm.c |2 +-
 1 files changed, 1 insertions(+), 1 deletions(-)

diff --git a/dapl/openib_ucm/cm.c b/dapl/openib_ucm/cm.c
index 2cab529..85c8b4b 100644
--- a/dapl/openib_ucm/cm.c
+++ b/dapl/openib_ucm/cm.c
@@ -1454,7 +1454,7 @@ dapli_accept_usr(DAPL_EP *ep, DAPL_CR *cr, DAT_COUNT 
p_size, DAT_PVOID p_data)
dapl_os_unlock(&cm->lock);
 
if (ucm_reply(cm)) {
-   dapl_ep_link_cm(ep, cm);
+   dapl_ep_unlink_cm(ep, cm);
goto bail;
}
dapl_dbg_log(DAPL_DBG_TYPE_CM, " PASSIVE: accepted!\n");
-- 
1.5.2.5

--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 02/12] dapl-2.0: common: missed linking changes from atomic to acquire/release

2010-05-19 Thread Davis, Arlin R
 

Signed-off-by: Arlin Davis 
---
 dapl/common/dapl_ep_util.c |7 +++
 1 files changed, 3 insertions(+), 4 deletions(-)

diff --git a/dapl/common/dapl_ep_util.c b/dapl/common/dapl_ep_util.c
index bd7cdd9..daad78d 100644
--- a/dapl/common/dapl_ep_util.c
+++ b/dapl/common/dapl_ep_util.c
@@ -417,7 +417,7 @@ void dapls_ep_timeout(uintptr_t arg)
 * The disconnect_clean interface requires the provided dependent 
 *cm event number.
 */
-   ib_cm_event = dapls_ib_get_cm_event(DAT_CONNECTION_EVENT_DISCONNECTED);
+   ib_cm_event = dapls_ib_get_cm_event(DAT_CONNECTION_EVENT_TIMED_OUT);
dapls_ib_disconnect_clean(ep_ptr, DAT_TRUE, ib_cm_event);
 
(void)dapls_evd_post_connection_event((DAPL_EVD *) ep_ptr->param.
@@ -590,7 +590,6 @@ dapl_ep_legacy_post_disconnect(DAPL_EP * ep_ptr,
  * dp_ib_cm_handle_t *cm_ptr  defined in provider's dapl_util.h
  *
  * CM objects linked with EP using  ->list_entry
- *  CM objects sync'ed with EP using ->ref_count
  * Output:
  * none
  *
@@ -601,7 +600,7 @@ dapl_ep_legacy_post_disconnect(DAPL_EP * ep_ptr,
 void dapl_ep_link_cm(IN DAPL_EP *ep_ptr, IN dp_ib_cm_handle_t cm_ptr)
 {
dapl_os_lock(&ep_ptr->header.lock);
-   dapl_os_atomic_inc(&cm_ptr->ref_count);
+   dapls_cm_acquire(cm_ptr);
dapl_llist_add_tail(&ep_ptr->cm_list_head, &cm_ptr->list_entry, cm_ptr);
dapl_os_unlock(&ep_ptr->header.lock);
 }
@@ -610,7 +609,7 @@ void dapl_ep_unlink_cm(IN DAPL_EP *ep_ptr, IN 
dp_ib_cm_handle_t cm_ptr)
 {
dapl_os_lock(&ep_ptr->header.lock);
dapl_llist_remove_entry(&ep_ptr->cm_list_head, &cm_ptr->list_entry);
-   dapl_os_atomic_dec(&cm_ptr->ref_count);
+   dapls_cm_release(cm_ptr);
dapl_os_unlock(&ep_ptr->header.lock);
 }
 
-- 
1.5.2.5

--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 04/12] dapl-2.0: scm: add support for canceling conn request that times out.

2010-05-19 Thread Davis, Arlin R

print warning message during timeout.

Signed-off-by: Arlin Davis 
---
 dapl/common/dapl_ep_connect.c |2 ++
 dapl/openib_scm/cm.c  |   11 ++-
 2 files changed, 12 insertions(+), 1 deletions(-)

diff --git a/dapl/common/dapl_ep_connect.c b/dapl/common/dapl_ep_connect.c
index 4e5e112..1f193ae 100755
--- a/dapl/common/dapl_ep_connect.c
+++ b/dapl/common/dapl_ep_connect.c
@@ -334,6 +334,8 @@ dapl_ep_connect(IN DAT_EP_HANDLE ep_handle,
 
dapls_timer_set(ep_ptr->cxn_timer,
dapls_ep_timeout, ep_ptr, timeout);
+
+   dapl_log(DAPL_DBG_TYPE_WARN, " dapl_ep_connect timeout 
= %d us\n", timeout);
}
dapl_os_unlock(&ep_ptr->header.lock);
}
diff --git a/dapl/openib_scm/cm.c b/dapl/openib_scm/cm.c
index 957066a..6958b67 100644
--- a/dapl/openib_scm/cm.c
+++ b/dapl/openib_scm/cm.c
@@ -1366,7 +1366,16 @@ dapls_ib_disconnect_clean(IN DAPL_EP * ep_ptr,
  IN DAT_BOOLEAN active,
  IN const ib_cm_events_t ib_cm_event)
 {
-   /* nothing to cleanup */
+   if (ib_cm_event == IB_CME_TIMEOUT) {
+   dp_ib_cm_handle_t cm_ptr = dapl_get_cm_from_ep(ep_ptr);
+
+   dapl_log(DAPL_DBG_TYPE_WARN,
+   "dapls_ib_disc_clean: CONN_TIMEOUT ep %p cm %p %s\n",
+   ep_ptr, cm_ptr, dapl_cm_state_str(cm_ptr->state));
+   
+   /* schedule release of socket and local resources */
+   dapli_cm_free(cm_ptr);
+   }
 }
 
 /*
-- 
1.5.2.5

--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: InfiniBand/RDMA merge plans for 2.6.35

2010-05-19 Thread Jason Gunthorpe
On Mon, May 17, 2010 at 12:58:59PM -0700, Sean Hefty wrote:
> which has been updated to 2.6.34-rc7.  I tried to keep the patches small, to
> make review a little easier.
> 
> Jason, you've been most involved in reviewing the patches so far.
> Any chance I can ask you to assist Roland with a more formal review?

Sure, as time permits.. :)

Jason
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH] dapl-2.0: common: remove unnecessary lmr lkey hashing and duplicate lkey checking

2010-05-19 Thread Davis, Arlin R


lmr lkey hashing is too restrictive given the returned lkey could be
the same value for different regions on some rdma devices. Actually,
this checking is really unecesssary and requires considerable overhead
for hashing so just remove hashing of lmr lkey's. Let verbs device
level do the checking and validation.

Signed-off-by: Arlin Davis 
---
 dapl/common/dapl_hca_util.c  |   16 +---
 dapl/common/dapl_lmr_free.c  |   19 +-
 dapl/common/dapl_rmr_bind.c  |   15 ++-
 dapl/udapl/dapl_lmr_create.c |   87 --
 4 files changed, 7 insertions(+), 130 deletions(-)

diff --git a/dapl/common/dapl_hca_util.c b/dapl/common/dapl_hca_util.c
index b8f068c..4a8a400 100644
--- a/dapl/common/dapl_hca_util.c
+++ b/dapl/common/dapl_hca_util.c
@@ -38,7 +38,6 @@
 #include "dapl_adapter_util.h"
 #include "dapl_provider.h"
 #include "dapl_hca_util.h"
-#include "dapl_hash.h"
 
 /*
  * dapl_hca_alloc
@@ -66,13 +65,6 @@ DAPL_HCA *dapl_hca_alloc(char *name, char *port)
}
 
dapl_os_memzero(hca_ptr, sizeof(DAPL_HCA));
-
-   if (DAT_SUCCESS !=
-   dapls_hash_create(DAPL_HASH_TABLE_DEFAULT_CAPACITY,
- &hca_ptr->lmr_hash_table)) {
-   goto bail;
-   }
-
dapl_os_lock_init(&hca_ptr->lock);
dapl_llist_init_head(&hca_ptr->ia_list_head);
 
@@ -87,13 +79,8 @@ DAPL_HCA *dapl_hca_alloc(char *name, char *port)
return (hca_ptr);
 
   bail:
-   if (NULL != hca_ptr) {
-   if (NULL != hca_ptr->lmr_hash_table) {
-   dapls_hash_free(hca_ptr->lmr_hash_table);
-   }
-
+   if (NULL != hca_ptr) 
dapl_os_free(hca_ptr, sizeof(DAPL_HCA));
-   }
 
return NULL;
 }
@@ -115,7 +102,6 @@ DAPL_HCA *dapl_hca_alloc(char *name, char *port)
  */
 void dapl_hca_free(DAPL_HCA * hca_ptr)
 {
-   (void)dapls_hash_free(hca_ptr->lmr_hash_table);
dapl_os_free(hca_ptr->name, dapl_os_strlen(hca_ptr->name) + 1);
dapl_os_free(hca_ptr, sizeof(DAPL_HCA));
 }
diff --git a/dapl/common/dapl_lmr_free.c b/dapl/common/dapl_lmr_free.c
index e72824a..5f9336f 100644
--- a/dapl/common/dapl_lmr_free.c
+++ b/dapl/common/dapl_lmr_free.c
@@ -90,29 +90,12 @@ DAT_RETURN DAT_API dapl_lmr_free(IN DAT_LMR_HANDLE 
lmr_handle)
return DAT_INVALID_STATE;
}
 
-   dat_status =
-   dapls_hash_remove(lmr->header.owner_ia->hca_ptr->
- lmr_hash_table,
- lmr->param.lmr_context, NULL);
-   if (dat_status != DAT_SUCCESS) {
-   goto bail;
-   }
-
dat_status = dapls_ib_mr_deregister(lmr);
 
if (dat_status == DAT_SUCCESS) {
dapl_os_atomic_dec(&pz->pz_ref_count);
dapl_lmr_dealloc(lmr);
-   } else {
-   /*
-* Deregister failed; put it back in the
-* hash table.
-*/
-   dapls_hash_insert(lmr->header.owner_ia->
- hca_ptr->lmr_hash_table,
- lmr->param.lmr_context, lmr);
-   }
-
+   } 
break;
}
 #if defined(__KDAPL__)
diff --git a/dapl/common/dapl_rmr_bind.c b/dapl/common/dapl_rmr_bind.c
index 9793f38..ecb190b 100755
--- a/dapl/common/dapl_rmr_bind.c
+++ b/dapl/common/dapl_rmr_bind.c
@@ -48,7 +48,8 @@
 
 STATIC _INLINE_ DAT_RETURN
 dapli_rmr_bind_fuse(IN DAPL_RMR * rmr,
-   IN const DAT_LMR_TRIPLET * lmr_triplet,
+   IN DAT_LMR_HANDLE lmr_handle,
+   IN const DAT_LMR_TRIPLET * lmr_triplet,
IN DAT_MEM_PRIV_FLAGS mem_priv,
IN DAPL_EP * ep_ptr,
IN DAT_RMR_COOKIE user_cookie,
@@ -69,6 +70,7 @@ dapli_rmr_bind_unfuse(IN DAPL_RMR * rmr,
 
 DAT_RETURN
 dapli_rmr_bind_fuse(IN DAPL_RMR * rmr,
+   IN DAT_LMR_HANDLE lmr_handle,
IN const DAT_LMR_TRIPLET * lmr_triplet,
IN DAT_MEM_PRIV_FLAGS mem_priv,
IN DAPL_EP * ep_ptr,
@@ -80,16 +82,8 @@ dapli_rmr_bind_fuse(IN DAPL_RMR * rmr,
DAPL_COOKIE *cookie;
DAT_RETURN dat_status;
DAT_BOOLEAN is_signaled;
-   DAPL_HASH_DATA hash_lmr;
 
-   dat_status =
-   dapls_hash_search(rmr->header.owner_ia->hca_ptr->lmr_hash_table,
- lmr_triplet->lmr_context, &hash_lmr);
-   if (DAT_SUCCESS != dat_status) {
-   dat_status = DAT_ERROR(DAT_INVALID_PARAMETER, DAT_INVALID_ARG2);
-   go

[PATCH] dapl-2.0: common: add some debug prints to help isolate QP type issues

2010-05-19 Thread Davis, Arlin R
 

Signed-off-by: Arlin Davis 
---
 dapl/openib_common/qp.c |9 ++---
 1 files changed, 6 insertions(+), 3 deletions(-)

diff --git a/dapl/openib_common/qp.c b/dapl/openib_common/qp.c
index 15c1dae..473604b 100644
--- a/dapl/openib_common/qp.c
+++ b/dapl/openib_common/qp.c
@@ -182,8 +182,8 @@ dapls_ib_qp_alloc(IN DAPL_IA * ia_ptr,
}
 #endif
dapl_dbg_log(DAPL_DBG_TYPE_EP,
-" qp_alloc: qpn %p sq %d,%d rq %d,%d\n",
-ep_ptr->qp_handle->qp_num,
+" qp_alloc: qpn %p type %d sq %d,%d rq %d,%d\n",
+ep_ptr->qp_handle->qp_num, ep_ptr->qp_handle->qp_type,
 qp_create.cap.max_send_wr, qp_create.cap.max_send_sge,
 qp_create.cap.max_recv_wr, qp_create.cap.max_recv_sge);
 
@@ -561,8 +561,11 @@ dapls_create_ah(IN DAPL_HCA*hca,
struct ibv_qp_attr qp_attr;
ib_ah_handle_t  ah;
 
-   if (qp->qp_type != IBV_QPT_UD)
+   if (qp->qp_type != IBV_QPT_UD) {
+   dapl_log(DAPL_DBG_TYPE_ERR, 
+" create_ah ERR: QP_type != UD\n");
return NULL;
+   }
 
dapl_os_memzero((void *)&qp_attr, sizeof(qp_attr));
qp_attr.qp_state = IBV_QP_STATE;
-- 
1.5.2.5

--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH] dapl-2.0: ibal: add missing windows makefile in git tree

2010-05-19 Thread Davis, Arlin R


Signed-off-by: Arlin Davis 
---
 dapl/ibal/makefile |7 +++
 1 files changed, 7 insertions(+), 0 deletions(-)
 create mode 100644 dapl/ibal/makefile

diff --git a/dapl/ibal/makefile b/dapl/ibal/makefile
new file mode 100644
index 000..a0c0627
--- /dev/null
+++ b/dapl/ibal/makefile
@@ -0,0 +1,7 @@
+#
+# DO NOT EDIT THIS FILE!!!  Edit .\sources. if you want to add a new source
+# file to this component.  This file merely indirects to the real make file
+# that is shared by all the driver components of the OpenIB Windows project.
+#
+
+!INCLUDE ..\..\..\..\inc\openib.def
-- 
1.5.2.5

--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH] dapl-2.0: ibal: output completion code in deciaml & hex as intended

2010-05-19 Thread Davis, Arlin R
 

sign-off-by: stan smith 
---
 dapl/ibal/dapl_ibal_util.c |2 +-
 1 files changed, 1 insertions(+), 1 deletions(-)

diff --git a/dapl/ibal/dapl_ibal_util.c b/dapl/ibal/dapl_ibal_util.c
index 513d7c9..7f9b819 100644
--- a/dapl/ibal/dapl_ibal_util.c
+++ b/dapl/ibal/dapl_ibal_util.c
@@ -2011,7 +2011,7 @@ dapls_ib_get_dto_status(
 
 default:
 #ifdef DAPL_DBG
-dapl_dbg_log (DAPL_DBG_TYPE_ERR,"%s() unknown IB_COMP_ST %x(0x%x)\n",
+dapl_dbg_log (DAPL_DBG_TYPE_ERR,"%s() unknown IB_COMP_ST %d(0x%x)\n",
   __FUNCTION__,ib_status,ib_status);
 #endif
return DAT_DTO_FAILURE;
-- 
1.5.2.5

--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH] dapl-2.0: windows: remove static paths from dapltest scripts

2010-05-19 Thread Davis, Arlin R
 

signed-off-by: stan smith 
---
 test/dapltest/scripts/dt-cli.bat |   11 +--
 test/dapltest/scripts/dt-svr.bat |   12 +---
 2 files changed, 10 insertions(+), 13 deletions(-)

diff --git a/test/dapltest/scripts/dt-cli.bat b/test/dapltest/scripts/dt-cli.bat
index f1eb23b..441b975 100644
--- a/test/dapltest/scripts/dt-cli.bat
+++ b/test/dapltest/scripts/dt-cli.bat
@@ -17,13 +17,12 @@ if not "!F!" == "off" (
 rem set DAT_OVERRIDE=D:\dapl2\dat.conf
 rem favor DAT 2.0 (dapl2test.exe) over DAT 1.1 (dapltest.exe)
 
-set PF="%ProgramFiles%\WinOF"
-
-if NOT EXIST %PF%\dapl2test.exe (
-echo Missing file %PF%\dapl2test.exe ?
-exit /B 1
-)
 set DT=dapl2test.exe
+%DT% -h > Nul 2>&1
+if not "%ERRORLEVEL%" == "1" (
+echo %0: ERR - %DT% not in exec path?
+exit /B %ERRORLEVEL%
+)
 rem To debug dapl2test - use dapl2testd.exe with ibnic0v2d
 
 rem setup DAPL provider name: translate shorthand name or use name from 
dat.conf.
diff --git a/test/dapltest/scripts/dt-svr.bat b/test/dapltest/scripts/dt-svr.bat
index abd17fb..9974d23 100644
--- a/test/dapltest/scripts/dt-svr.bat
+++ b/test/dapltest/scripts/dt-svr.bat
@@ -5,15 +5,13 @@ rem
 SETLOCAL
 
 rem set DAT_OVERRIDE=C:\DAT\dat.conf
+set DT=dapl2test.exe
 
-set PF="%ProgramFiles%\WinOF"
-
-if NOT EXIST %PF%\dapl2test.exe (
-echo Missing file %PF%\dapl2test.exe ?
-exit /B 1
+%DT% -h > Nul 2>&1
+if not "%ERRORLEVEL%" == "1" (
+echo %0: ERR - %DT% not in exec path?
+exit /B %ERRORLEVEL%
 )
-
-set DT=dapl2test.exe
 rem To debug dapl2test - use dapl2testd.exe with ibnic0v2d
 
 rem which Dapl provider?
-- 
1.5.2.5

--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH] dapl-2.0: common: dapl_event_str function missing 2 IB extended events

2010-05-19 Thread Davis, Arlin R


Add all IB extended events in event string print function

Signed-off-by: Arlin Davis 
---
 dapl/common/dapl_evd_util.c |4 
 1 files changed, 4 insertions(+), 0 deletions(-)

diff --git a/dapl/common/dapl_evd_util.c b/dapl/common/dapl_evd_util.c
index 14a10c7..cb3eb1b 100644
--- a/dapl/common/dapl_evd_util.c
+++ b/dapl/common/dapl_evd_util.c
@@ -96,6 +96,10 @@ char *dapl_event_str(IN DAT_EVENT_NUMBER event_num)
 DAT_IB_EXTENSION_RANGE_BASE + 1},
{"DAT_IB_UD_CONNECTION_EVENT_ESTABLISHED",
 DAT_IB_EXTENSION_RANGE_BASE + 2},
+   {"DAT_IB_UD_CONNECTION_REJECT_EVENT",
+DAT_IB_EXTENSION_RANGE_BASE + 3},
+   {"DAT_IB_UD_CONNECTION_ERROR_EVENT",
+DAT_IB_EXTENSION_RANGE_BASE + 4},
{"DAT_IW_EXTENSION_RANGE_BASE", DAT_IW_EXTENSION_RANGE_BASE},
 #endif /* DAT_EXTENSIONS */
{NULL, 0},
-- 
1.5.2.5

--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 2/2] compat-dapl-1.2: cma: memory leak of verbs CQ and completion channels created during dat_ia_open

2010-05-19 Thread Davis, Arlin R
 
check/cleanup CQ and completion channels during dat_ia_close

Signed-off-by: Arlin Davis 
---
 dapl/openib_cma/dapl_ib_util.c |   22 --
 1 files changed, 16 insertions(+), 6 deletions(-)

diff --git a/dapl/openib_cma/dapl_ib_util.c b/dapl/openib_cma/dapl_ib_util.c
index 9d97ae1..00aa5fb 100755
--- a/dapl/openib_cma/dapl_ib_util.c
+++ b/dapl/openib_cma/dapl_ib_util.c
@@ -373,12 +373,6 @@ DAT_RETURN dapls_ib_close_hca(IN DAPL_HCA *hca_ptr)
dapl_dbg_log(DAPL_DBG_TYPE_UTIL," close_hca: %p->%p\n",
 hca_ptr,hca_ptr->ib_hca_handle);
 
-   if (hca_ptr->ib_hca_handle != IB_INVALID_HANDLE) {
-   if (rdma_destroy_id(hca_ptr->ib_trans.cm_id)) 
-   return(dapl_convert_errno(errno,"ib_close_device"));
-   hca_ptr->ib_hca_handle = IB_INVALID_HANDLE;
-   }
-
dapl_os_lock(&g_hca_lock);
if (g_ib_thread_state != IB_THREAD_RUN) {
dapl_os_unlock(&g_hca_lock);
@@ -410,6 +404,22 @@ DAT_RETURN dapls_ib_close_hca(IN DAPL_HCA *hca_ptr)
nanosleep (&sleep, &remain);
}
 bail:
+   if (hca_ptr->ib_trans.ib_cq)
+   ibv_destroy_comp_channel(hca_ptr->ib_trans.ib_cq);
+
+   if (hca_ptr->ib_trans.ib_cq_empty) {
+   struct ibv_comp_channel *channel;
+   channel = hca_ptr->ib_trans.ib_cq_empty->channel;
+   ibv_destroy_cq(hca_ptr->ib_trans.ib_cq_empty);
+   ibv_destroy_comp_channel(channel);
+   }
+
+   if (hca_ptr->ib_hca_handle != IB_INVALID_HANDLE) {
+   if (rdma_destroy_id(hca_ptr->ib_trans.cm_id))
+   return (dapl_convert_errno(errno, "ib_close_device"));
+   hca_ptr->ib_hca_handle = IB_INVALID_HANDLE;
+   }
+
return (DAT_SUCCESS);
 }
   
-- 
1.5.2.5

--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 1/2] compat-dapl-1.2: cma: memory leak of FD's (pipe) created during dat_evd_create

2010-05-19 Thread Davis, Arlin R

Add checking for pipe FD's during destroy and clean them up with close.

Signed-off-by: Arlin Davis 
---
 dapl/openib_cma/dapl_ib_cq.c |8 +++-
 1 files changed, 7 insertions(+), 1 deletions(-)

diff --git a/dapl/openib_cma/dapl_ib_cq.c b/dapl/openib_cma/dapl_ib_cq.c
index cf19f38..c54bbaf 100644
--- a/dapl/openib_cma/dapl_ib_cq.c
+++ b/dapl/openib_cma/dapl_ib_cq.c
@@ -462,8 +462,11 @@ dapls_ib_wait_object_create(IN DAPL_EVD *evd_ptr,
ibv_create_comp_channel(
evd_ptr->header.owner_ia->hca_ptr->ib_hca_handle);  

-   if ((*p_cq_wait_obj_handle)->events == NULL)
+   if ((*p_cq_wait_obj_handle)->events == NULL) {
+   close((*p_cq_wait_obj_handle)->pipe[0]);
+   close((*p_cq_wait_obj_handle)->pipe[1]);
goto bail;
+   }
 
return DAT_SUCCESS;
 bail:
@@ -483,6 +486,9 @@ dapls_ib_wait_object_destroy(IN ib_wait_obj_handle_t 
p_cq_wait_obj_handle)

ibv_destroy_comp_channel(p_cq_wait_obj_handle->events);
 
+   close(p_cq_wait_obj_handle->pipe[0]);
+   close(p_cq_wait_obj_handle->pipe[1]);
+
dapl_os_free(p_cq_wait_obj_handle, 
 sizeof(struct _ib_wait_obj_handle));
 
-- 
1.5.2.5

--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH v2] libibverbs: add path record definitions to sa.h

2010-05-19 Thread Steve Wise

Walukiewicz, Miroslaw wrote:
Hello Steve, 

Do you plan some changes in the core code related to RAW_QPT? 

  



The only changes I see needed to the kernel core is the mcast change you 
already proposed to allow mcast attach/detach for RAW_ETY qps...




Could you explain me better what means "priviledged interface" for you?

  



I just mean that allocating these raw qps should only be allowed by 
effective UID 0.  This is analogous to PF_PACKET sockets which are 
privileged as well.




Steve.

--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


RE: [PATCH v2] libibverbs: add path record definitions to sa.h

2010-05-19 Thread Walukiewicz, Miroslaw
Hello Steve, 

Do you plan some changes in the core code related to RAW_QPT? 

Could you explain me better what means "priviledged interface" for you?

Regards, 

Mirek
-Original Message-
From: linux-rdma-ow...@vger.kernel.org 
[mailto:linux-rdma-ow...@vger.kernel.org] On Behalf Of Steve Wise
Sent: Tuesday, May 18, 2010 4:04 PM
To: Roland Dreier
Cc: Hefty, Sean; linux-rdma
Subject: Re: [PATCH v2] libibverbs: add path record definitions to sa.h

Roland Dreier wrote:
>  > Can you add the RAW_ETY qp type in this release as well?
>
> To be honest I haven't looked at the iWARP datagram stuff at all.  I'm
> not sure overloading the RAW_ETY QP type is necessarily the right thing
> to do -- it has quite different (never implemented) semantics in the IB
> case.  Is there any overview of what you guys are planning as far as
> how work requests are created for such QPs?
>   

The RAW_ETY qp would be just that:  A kernel-bypass/user mode qp that 
allows sending/receiving ethernet packets.   It would also provide a way 
for user applications to join/leave ethernet mcast groups (which 
requires an rdma core kernel change that Intel posted too).  What the 
iWARP vendors are doing on top of that is implementing some form of UDP 
in user mode.  The main goal here is to provide an ultra low latency UDP 
multicast and unicast channel for important market segments that desire 
this paradigm.  Also, due to the nature of this (send/recv raw eth 
frames), the interface would be privileged.

If you want to wait, then later I'll post patches on how this is being 
done for cxgb4.  But I thought adding the RAW_ETY was definitely a 
common requirement for Intel and Chelsio.


Steve.



--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH V2 4/4] mlx4_ib: XRC RCV qp implementation.

2010-05-19 Thread Jack Morgenstein
RE-sending with correct subject line (should be V2).

Support for XRC RCV-only QP (requested by userspace,
but resides in kernel space).

Implements create_xrc_rcv_qp, modify_xrc_rcv_qp, query_xrc_rcv_qp,
and destroy_xrc_rcv_qp.

Since query_xrc_rcv_qp and query_qp are very similar, the common portions
have been placed in a helper function which is invoked by both query methods.

Signed-off-by: Jack Morgenstein 
---
 drivers/infiniband/hw/mlx4/cq.c  |4 +-
 drivers/infiniband/hw/mlx4/main.c|   60 --
 drivers/infiniband/hw/mlx4/mlx4_ib.h |   13 ++
 drivers/infiniband/hw/mlx4/qp.c  |  208 +-
 4 files changed, 272 insertions(+), 13 deletions(-)

Index: infiniband/drivers/infiniband/hw/mlx4/cq.c
===
--- infiniband.orig/drivers/infiniband/hw/mlx4/cq.c
+++ infiniband/drivers/infiniband/hw/mlx4/cq.c
@@ -176,7 +176,7 @@ struct ib_cq *mlx4_ib_create_cq(struct i
if (entries < 1 || entries > dev->dev->caps.max_cqes)
return ERR_PTR(-EINVAL);
 
-   cq = kmalloc(sizeof *cq, GFP_KERNEL);
+   cq = kzalloc(sizeof *cq, GFP_KERNEL);
if (!cq)
return ERR_PTR(-ENOMEM);
 
@@ -545,7 +545,7 @@ static int mlx4_ib_poll_one(struct mlx4_
struct mlx4_cqe *cqe;
struct mlx4_qp *mqp;
struct mlx4_ib_wq *wq;
-   struct mlx4_ib_srq *srq;
+   struct mlx4_ib_srq *uninitialized_var(srq);
struct mlx4_srq *msrq;
int is_send;
int is_error;
Index: infiniband/drivers/infiniband/hw/mlx4/main.c
===
--- infiniband.orig/drivers/infiniband/hw/mlx4/main.c
+++ infiniband/drivers/infiniband/hw/mlx4/main.c
@@ -420,7 +420,7 @@ static struct ib_pd *mlx4_ib_alloc_pd(st
struct mlx4_ib_pd *pd;
int err;
 
-   pd = kmalloc(sizeof *pd, GFP_KERNEL);
+   pd = kzalloc(sizeof *pd, GFP_KERNEL);
if (!pd)
return ERR_PTR(-ENOMEM);
 
@@ -462,12 +462,18 @@ static int mlx4_ib_mcg_detach(struct ib_
 &to_mqp(ibqp)->mqp, gid->raw);
 }
 
+static void mlx4_dummy_comp_handler(struct ib_cq *cq, void *cq_context)
+{
+}
+
 static struct ib_xrcd *mlx4_ib_alloc_xrcd(struct ib_device *ibdev,
  struct ib_ucontext *context,
  struct ib_udata *udata)
 {
struct mlx4_ib_xrcd *xrcd;
struct mlx4_ib_dev *mdev = to_mdev(ibdev);
+   struct ib_pd *pd;
+   struct ib_cq *cq;
int err;
 
if (!(mdev->dev->caps.flags & MLX4_DEV_CAP_FLAG_XRC))
@@ -478,23 +484,51 @@ static struct ib_xrcd *mlx4_ib_alloc_xrc
return ERR_PTR(-ENOMEM);
 
err = mlx4_xrcd_alloc(mdev->dev, &xrcd->xrcdn);
-   if (err) {
-   kfree(xrcd);
-   return ERR_PTR(err);
+   if (err)
+   goto err_xrcd;
+
+   pd = mlx4_ib_alloc_pd(ibdev, NULL, NULL);
+   if (IS_ERR(pd)) {
+   err = PTR_ERR(pd);
+   goto err_pd;
}
+   pd->device  = ibdev;
+
+   cq = mlx4_ib_create_cq(ibdev, 1, 0, NULL, NULL);
+   if (IS_ERR(cq)) {
+   err = PTR_ERR(cq);
+   goto err_cq;
+   }
+   cq->device= ibdev;
+   cq->comp_handler  = mlx4_dummy_comp_handler;
 
if (context)
if (ib_copy_to_udata(udata, &xrcd->xrcdn, sizeof(__u32))) {
-   mlx4_xrcd_free(mdev->dev, xrcd->xrcdn);
-   kfree(xrcd);
-   return ERR_PTR(-EFAULT);
+   err = -EFAULT;
+   goto err_copy;
}
 
+   xrcd->cq = cq;
+   xrcd->pd = pd;
return &xrcd->ibxrcd;
+
+err_copy:
+   mlx4_ib_destroy_cq(cq);
+err_cq:
+   mlx4_ib_dealloc_pd(pd);
+err_pd:
+   mlx4_xrcd_free(mdev->dev, xrcd->xrcdn);
+err_xrcd:
+   kfree(xrcd);
+   return ERR_PTR(err);
 }
 
 static int mlx4_ib_dealloc_xrcd(struct ib_xrcd *xrcd)
 {
+   struct mlx4_ib_xrcd *mxrcd = to_mxrcd(xrcd);
+
+   mlx4_ib_destroy_cq(mxrcd->cq);
+   mlx4_ib_dealloc_pd(mxrcd->pd);
mlx4_xrcd_free(to_mdev(xrcd->device)->dev, to_mxrcd(xrcd)->xrcdn);
kfree(xrcd);
 
@@ -700,18 +734,28 @@ static void *mlx4_ib_add(struct mlx4_dev
ibdev->ib_dev.create_xrc_srq = mlx4_ib_create_xrc_srq;
ibdev->ib_dev.alloc_xrcd = mlx4_ib_alloc_xrcd;
ibdev->ib_dev.dealloc_xrcd = mlx4_ib_dealloc_xrcd;
+   ibdev->ib_dev.create_xrc_rcv_qp = mlx4_ib_create_xrc_rcv_qp;
+   ibdev->ib_dev.modify_xrc_rcv_qp = mlx4_ib_modify_xrc_rcv_qp;
+   ibdev->ib_dev.query_xrc_rcv_qp = mlx4_ib_query_xrc_rcv_qp;
+   ibdev->ib_dev.destroy_xrc_rcv_qp = mlx4_ib_destroy_xrc_rcv_qp;
ibdev->ib_dev.uverbs_cmd_mask |=
(1ull << IB_USER_VERBS_CMD_CR

[PATCH V3 2/4] ib_uverbs: XRC RCV qp implementation

2010-05-19 Thread Jack Morgenstein
Implement XRC target QPs (xrc rcv qps) for userspace.
The basic verbs are: create/modify/query/destroy.

In addition, added two additional verbs -- register and unregister.

The motivation for register/unregister comes from MPI.
MPI requires XRC receive QPs which are not destroyed when the creating
process terminates (but persist so that other processes may still use
them as XRC targets).

Solution:  Userspace requests that a QP be created in kernel space.
Each userspace process using that QP (i.e. receiving packets on an XRC SRQ
via the qp), registers with that QP. When the last userspace process 
unregisters with
the QP, it is destroyed.  Unregistration is also part of userspace process
cleanup, so there is no leakage.

This patch implements the kernel procedures to implement the following
(new) libibverbs API:
ibv_create_xrc_rcv_qp
ibv_modify_xrc_rcv_qp
ibv_query_xrc_rcv_qp
ibv_destroy_xrc_rcv_qp

ibv_reg_xrc_rcv_qp
ibv_unreg_xrc_rcv_qp

Note that users who wish to make use of the reg/unreg capability should
never call the destroy verb -- the XRC RCV qp is automatically destroyed
when all registered processes have unregistered (or terminated).
In this case, the process which called "create" may also unregister (or do
nothing, and when it terminates, its reference to that QP is removed).

Thus, usage is:
Either: create/modify/query/destroy_xrc_rcv_qp
Or: create/modify/query/reg/unreg_xrc_rcv_qp

V3: renamed ib_xrc_rcv_table_cleanup to ib_xrc_rcv_qp_table_cleanup for 
consistency.

>From 6571bf63dbbbca0e95faabd81a9e57f908d7df17 Mon Sep 17 00:00:00 2001
From: Jack Morgenstein 
Date: Mon, 10 May 2010 20:23:24 +0300
Subject: [PATCH 4/4] ib_uverbs: XRC RCV qp implementation.

Signed-off-by: Jack Morgenstein 
---
 drivers/infiniband/core/uverbs.h  |8 +
 drivers/infiniband/core/uverbs_cmd.c  |  304 +
 drivers/infiniband/core/uverbs_main.c |   15 ++
 drivers/infiniband/hw/mlx4/main.c |4 +-
 include/rdma/ib_user_verbs.h  |   87 +-
 5 files changed, 416 insertions(+), 2 deletions(-)

Index: infiniband/drivers/infiniband/core/uverbs.h
===
--- infiniband.orig/drivers/infiniband/core/uverbs.h
+++ infiniband/drivers/infiniband/core/uverbs.h
@@ -175,6 +175,8 @@ void ib_uverbs_qp_event_handler(struct i
 void ib_uverbs_srq_event_handler(struct ib_event *event, void *context_ptr);
 void ib_uverbs_event_handler(struct ib_event_handler *handler,
 struct ib_event *event);
+void ib_uverbs_xrc_rcv_qp_event_handler(struct ib_event *event,
+   void *context_ptr);
 void ib_uverbs_dealloc_xrcd(struct ib_uverbs_device *dev,
struct ib_xrcd *xrcd);
 
@@ -214,5 +216,11 @@ IB_UVERBS_DECLARE_CMD(destroy_srq);
 IB_UVERBS_DECLARE_CMD(create_xrc_srq);
 IB_UVERBS_DECLARE_CMD(open_xrcd);
 IB_UVERBS_DECLARE_CMD(close_xrcd);
+IB_UVERBS_DECLARE_CMD(create_xrc_rcv_qp);
+IB_UVERBS_DECLARE_CMD(modify_xrc_rcv_qp);
+IB_UVERBS_DECLARE_CMD(query_xrc_rcv_qp);
+IB_UVERBS_DECLARE_CMD(destroy_xrc_rcv_qp);
+IB_UVERBS_DECLARE_CMD(reg_xrc_rcv_qp);
+IB_UVERBS_DECLARE_CMD(unreg_xrc_rcv_qp);
 
 #endif /* UVERBS_H */
Index: infiniband/drivers/infiniband/core/uverbs_cmd.c
===
--- infiniband.orig/drivers/infiniband/core/uverbs_cmd.c
+++ infiniband/drivers/infiniband/core/uverbs_cmd.c
@@ -2661,3 +2661,311 @@ void ib_uverbs_dealloc_xrcd(struct ib_uv
if (inode)
xrcd_table_delete(dev, inode);
 }
+
+ssize_t ib_uverbs_create_xrc_rcv_qp(struct ib_uverbs_file *file,
+   const char __user *buf, int in_len,
+   int out_len)
+{
+   struct ib_uverbs_create_xrc_rcv_qp  cmd;
+   struct ib_uverbs_create_xrc_rcv_qp_resp resp;
+   struct ib_qp_init_attr  init_attr = {0};
+   struct ib_xrcd *xrcd;
+   struct ib_uobject  *xrcd_uobj;
+   u32 qp_num;
+   int err;
+
+   if (out_len < sizeof resp)
+   return -ENOSPC;
+
+   if (copy_from_user(&cmd, buf, sizeof cmd))
+   return -EFAULT;
+
+   xrcd = idr_read_xrcd(cmd.xrcd_handle, file->ucontext, &xrcd_uobj);
+   if (!xrcd)
+   return -EINVAL;
+
+   init_attr.event_handler = ib_uverbs_xrc_rcv_qp_event_handler;
+   init_attr.qp_context= file;
+   init_attr.xrcd  = xrcd;
+
+   err = ib_create_xrc_rcv_qp(xrcd, &init_attr, &qp_num);
+   if (err)
+   goto err_put;
+
+   memset(&resp, 0, sizeof resp);
+   resp.qpn = qp_num;
+
+   if (copy_to_user((void __user *) (unsigned long) cmd.response,
+&resp, sizeof resp)) {
+   err = -EFAULT;
+   goto err_destroy;
+   }
+
+   put_uobj_read(xrcd_uobj);
+
+   ret

[PATCH V3 1/4] ib_core: changes to support XRC RCV qp's

2010-05-19 Thread Jack Morgenstein
The core layer does reference counting on XRC RCV qp's,
and also is responsible for distributing async events generated
for XRC RCV qp's to all registered processes.

Methods:  ib_create_xrc_rcv_qp, ib_destroy_xrc_rcv_qp, ib_reg_xrc_rcv_qp
and ib_unreg_xrc_rcv_qp -- and a XRC RCV QP cleanup function called
when a process terminates (this function removes all registrations for
that process, and destroys any XRC RCV QPs which have no processes registered
after the cleanup).

All other functions serve to support keeping track of the XRC RCV qp's
(in a radix tree), and to distribute the async events.

V3: Fixed bug reported by Sean, remove EXPORT_SYMBOL from internal procedures
ib_xrc_rcv_qp_table_xxx, and renamed ib_xrc_rcv_table_cleanup to
ib_xrc_rcv_qp_table_cleanup for consistency.

Signed-off-by: Jack Morgenstein 
---
 drivers/infiniband/core/device.c |6 +-
 drivers/infiniband/core/verbs.c  |  275 ++
 include/rdma/ib_verbs.h  |   45 ++
 3 files changed, 325 insertions(+), 1 deletions(-)

Index: infiniband/drivers/infiniband/core/device.c
===
--- infiniband.orig/drivers/infiniband/core/device.c
+++ infiniband/drivers/infiniband/core/device.c
@@ -172,9 +172,13 @@ static int end_port(struct ib_device *de
  */
 struct ib_device *ib_alloc_device(size_t size)
 {
+   struct ib_device *ibdev;
BUG_ON(size < sizeof (struct ib_device));
 
-   return kzalloc(size, GFP_KERNEL);
+   ibdev = kzalloc(size, GFP_KERNEL);
+   if (ibdev)
+   ib_xrc_rcv_qp_table_init(ibdev);
+   return ibdev;
 }
 EXPORT_SYMBOL(ib_alloc_device);
 
Index: infiniband/drivers/infiniband/core/verbs.c
===
--- infiniband.orig/drivers/infiniband/core/verbs.c
+++ infiniband/drivers/infiniband/core/verbs.c
@@ -39,6 +39,8 @@
 #include 
 #include 
 #include 
+#include 
+#include 
 
 #include 
 #include 
@@ -1030,3 +1032,276 @@ int ib_dealloc_xrcd(struct ib_xrcd *xrcd
return xrcd->device->dealloc_xrcd(xrcd);
 }
 EXPORT_SYMBOL(ib_dealloc_xrcd);
+
+void ib_xrc_rcv_qp_table_init(struct ib_device *dev)
+{
+   spin_lock_init(&dev->xrc_rcv_qp_table_lock);
+   INIT_RADIX_TREE(&dev->xrc_rcv_qp_table, GFP_ATOMIC);
+}
+
+struct ib_xrc_rcv_qp_table_entry *
+ib_xrc_rcv_tbl_find(struct ib_device *dev, u32 qpn)
+{
+return radix_tree_lookup(&dev->xrc_rcv_qp_table, qpn);
+}
+
+int ib_xrc_rcv_qp_table_new(struct ib_device *dev, void *event_handler,
+   u32 qpn, struct ib_xrcd *xrcd, void *context)
+{
+   struct ib_xrc_rcv_qp_table_entry *qp;
+   struct ib_xrc_rcv_reg_entry *reg_entry;
+   unsigned long flags;
+   int rc = -ENOMEM;
+
+   qp = kzalloc(sizeof *qp, GFP_ATOMIC);
+   if (!qp)
+   return -ENOMEM;
+
+   reg_entry = kzalloc(sizeof *reg_entry, GFP_KERNEL);
+   if (!reg_entry)
+   goto out_alloc;
+
+   INIT_LIST_HEAD(&qp->list);
+   qp->event_handler = event_handler;
+   qp->xrcd = xrcd;
+   qp->qpn = qpn;
+
+   reg_entry->context = context;
+   list_add_tail(®_entry->list, &qp->list);
+
+   spin_lock_irqsave(&dev->xrc_rcv_qp_table_lock, flags);
+   rc = radix_tree_insert(&dev->xrc_rcv_qp_table, qpn, qp);
+   spin_unlock_irqrestore(&dev->xrc_rcv_qp_table_lock, flags);
+   if (rc)
+   goto out_radix;
+   atomic_inc(&xrcd->usecnt);
+   return 0;
+
+out_radix:
+   kfree(reg_entry);
+out_alloc:
+   kfree(qp);
+   return rc;
+}
+
+
+int ib_xrc_rcv_qp_table_add_reg_entry(struct ib_device *dev, u32 qpn,
+ void *context)
+{
+   struct ib_xrc_rcv_reg_entry *reg_entry, *tmp;
+   struct ib_xrc_rcv_qp_table_entry *qp;
+   unsigned long flags;
+   int err = -EINVAL, found = 0;
+
+   reg_entry = kzalloc(sizeof *reg_entry, GFP_KERNEL);
+   if (!reg_entry)
+   return -ENOMEM;
+   reg_entry->context = context;
+
+   spin_lock_irqsave(&dev->xrc_rcv_qp_table_lock, flags);
+   qp = ib_xrc_rcv_tbl_find(dev, qpn);
+   if (unlikely(!qp))
+   goto free_out;
+   list_for_each_entry(tmp, &qp->list, list)
+   if (tmp->context == context) {
+   found = 1;
+   break;
+   }
+   /* add only a single entry per user context */
+   if (unlikely(found)) {
+   err = 0;
+   goto free_out;
+   }
+   atomic_inc(&qp->xrcd->usecnt);
+   list_add_tail(®_entry->list, &qp->list);
+   spin_unlock_irqrestore(&dev->xrc_rcv_qp_table_lock, flags);
+   return 0;
+
+free_out:
+   spin_unlock_irqrestore(&dev->xrc_rcv_qp_table_lock, flags);
+   kfree(reg_entry);
+   return err;
+}
+
+int ib_xrc_rcv_qp_table_remove_reg_entry(struct ib_device *dev, u32 qpn,
+void *