Re: [openib-general] Kernel Oops related to IPoIB (multicast module?)
Sean Hefty wrote: > > I am working on trying to resolve this as my top priority at the > moment, but I have not been able to reproduce this on my systems. I > want to understand why ib_sa was not unloaded as part of modprobe -r > ib_ipoib, but why ib_multicast apparently was. I will examine the > script that you mentioned, but I typically do not run the OFED release. > > - Sean > No need to run the OFED release, just take openibd script from https://openib.org/svn/gen2/branches/1.0/ofed/openib/scripts/ and use it: openibd start and openibd stop. In order for it to load/unload modules you need also to have the file openib.conf under /etc/infiniband directory with this content: # Start HCA driver upon boot ONBOOT=yes # Load MTHCA MTHCA_LOAD=yes # Load IPoIB IPOIB_LOAD=yes Tziporet ___ openib-general mailing list openib-general@openib.org http://openib.org/mailman/listinfo/openib-general To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general
Re: [openib-general] ucma into kernel.org
Sean Hefty wrote: > Sure, I can consider doing that. There would just be some logistics > to work out, like the location of the git tree. > > Would a patch series in Roland's git tree work? Once he returns, we > can start queuing up patches for 2.6.19, which could include any or > all of the following: > > userspace support for the RDMA CM > iWarp support > latest changes for IB (UD QP and multicast) > > - Sean > For OFED 1.1 we need only userspace support for the RDMA CM Tziporet ___ openib-general mailing list openib-general@openib.org http://openib.org/mailman/listinfo/openib-general To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general
[openib-general] [PATCH] ib_addr: fix get/set gid alignment issues
The device address contains unsigned character arrays, which contain raw GID addresses. The GIDs may not be naturally aligned, so do not cast them to structures or unions. Signed-off-by: Sean Hefty <[EMAIL PROTECTED]> --- This fixes an alignment issue pointed out by Michael when adding MGID support to the ib_addr module. Index: include/rdma/ib_addr.h === --- include/rdma/ib_addr.h (revision 8224) +++ include/rdma/ib_addr.h (working copy) @@ -89,14 +89,16 @@ static inline void ib_addr_set_pkey(stru dev_addr->broadcast[9] = (unsigned char) pkey; } -static inline union ib_gid *ib_addr_get_mgid(struct rdma_dev_addr *dev_addr) +static inline void ib_addr_get_mgid(struct rdma_dev_addr *dev_addr, + union ib_gid *gid) { - return (union ib_gid *) (dev_addr->broadcast + 4); + memcpy(gid, dev_addr->broadcast + 4, sizeof *gid); } -static inline union ib_gid *ib_addr_get_sgid(struct rdma_dev_addr *dev_addr) +static inline void ib_addr_get_sgid(struct rdma_dev_addr *dev_addr, + union ib_gid *gid) { - return (union ib_gid *) (dev_addr->src_dev_addr + 4); + memcpy(gid, dev_addr->src_dev_addr + 4, sizeof *gid); } static inline void ib_addr_set_sgid(struct rdma_dev_addr *dev_addr, @@ -105,9 +107,10 @@ static inline void ib_addr_set_sgid(stru memcpy(dev_addr->src_dev_addr + 4, gid, sizeof *gid); } -static inline union ib_gid *ib_addr_get_dgid(struct rdma_dev_addr *dev_addr) +static inline void ib_addr_get_dgid(struct rdma_dev_addr *dev_addr, + union ib_gid *gid) { - return (union ib_gid *) (dev_addr->dst_dev_addr + 4); + memcpy(gid, dev_addr->dst_dev_addr + 4, sizeof *gid); } static inline void ib_addr_set_dgid(struct rdma_dev_addr *dev_addr, Index: core/ucma_ib.c === --- core/ucma_ib.c (revision 8224) +++ core/ucma_ib.c (working copy) @@ -40,27 +40,27 @@ static int ucma_get_paths(struct rdma_cm struct ib_sa_cursor *cursor; struct ib_sa_path_rec *path; struct ib_user_path_rec user_path; - union ib_gid *gid; + union ib_gid gid; int left, ret = 0; u16 pkey; if (!id->device) return -ENODEV; - gid = ib_addr_get_dgid(&id->route.addr.dev_addr); + ib_addr_get_dgid(&id->route.addr.dev_addr, &gid); pkey = ib_addr_get_pkey(&id->route.addr.dev_addr); - cursor = ib_create_path_cursor(id->device, id->port_num, gid); + cursor = ib_create_path_cursor(id->device, id->port_num, &gid); if (IS_ERR(cursor)) return PTR_ERR(cursor); - gid = ib_addr_get_sgid(&id->route.addr.dev_addr); + ib_addr_get_sgid(&id->route.addr.dev_addr, &gid); left = *len; *len = 0; for (path = ib_get_next_sa_attr(&cursor); path; path = ib_get_next_sa_attr(&cursor)) { if (pkey == path->pkey && - !memcmp(gid, path->sgid.raw, sizeof *gid)) { + !memcmp(&gid, path->sgid.raw, sizeof gid)) { if (paths) { ib_copy_path_rec_to_user(&user_path, path); if (copy_to_user(paths, &user_path, Index: core/cma.c === --- core/cma.c (revision 8224) +++ core/cma.c (working copy) @@ -278,14 +278,14 @@ static void cma_detach_from_dev(struct r static int cma_acquire_ib_dev(struct rdma_id_private *id_priv) { struct cma_device *cma_dev; - union ib_gid *gid; + union ib_gid gid; int ret = -ENODEV; - gid = ib_addr_get_sgid(&id_priv->id.route.addr.dev_addr); + ib_addr_get_sgid(&id_priv->id.route.addr.dev_addr, &gid), mutex_lock(&lock); list_for_each_entry(cma_dev, &dev_list, list) { - ret = ib_find_cached_gid(cma_dev->device, gid, + ret = ib_find_cached_gid(cma_dev->device, &gid, &id_priv->id.port_num, NULL); if (!ret) { cma_attach_to_dev(id_priv, cma_dev); @@ -1266,8 +1266,8 @@ static int cma_query_ib_route(struct rdm struct ib_sa_path_rec path_rec; memset(&path_rec, 0, sizeof path_rec); - path_rec.sgid = *ib_addr_get_sgid(addr); - path_rec.dgid = *ib_addr_get_dgid(addr); + ib_addr_get_sgid(addr, &path_rec.sgid); + ib_addr_get_dgid(addr, &path_rec.dgid); path_rec.pkey = cpu_to_be16(ib_addr_get_pkey(addr)); path_rec.numb_path = 1; @@ -1326,8 +1326,10 @@ static int cma_resolve_ib_route(struct r goto err1; } + ib_addr_get_sgid(addr, &route->path_rec->sgid); + ib_addr_get_dgid(addr, &route->path_rec->dgid); ret = ib_get_pat
Re: [openib-general] ucma into kernel.org
Quoting r. Sean Hefty <[EMAIL PROTECTED]>: > Subject: Re: ucma into kernel.org > > Michael S. Tsirkin wrote: > > Would you consider making a git repository available with just > > the CMA code appropriate for OFED 1.1? Mixing git and SVN code > > to build OFED is really painful for us. > > Sure, I can consider doing that. There would just be some logistics to work > out, like the location of the git tree. Oh, there's no reason to decide this up front: as I learned hosting a clone of a git tree is *really* trivial. For example, we can arrange to host a clone of your tree at mellanox.co.il if you like, and let you push there. And its also trivial to clone and switch to another location whenever you like. > Would a patch series in Roland's git tree work? You mean a head there, like for-ofed-1.1? Why not. But it does mean you'll need Roland to apply your patches to his tree. > Once he returns, we can start > queuing up patches for 2.6.19, which could include any or all of the > following: > > userspace support for the RDMA CM > iWarp support > latest changes for IB (UD QP and multicast) And hopefully the retry/timeout options which started this dicussion? :) It is probably best to take whatever is needed in OFED and have a branch with these things, separate from for-2.6.19. -- MST ___ openib-general mailing list openib-general@openib.org http://openib.org/mailman/listinfo/openib-general To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general
Re: [openib-general] ucma into kernel.org
Michael S. Tsirkin wrote: > Would you consider making a git repository available with just > the CMA code appropriate for OFED 1.1? Mixing git and SVN code > to build OFED is really painful for us. Sure, I can consider doing that. There would just be some logistics to work out, like the location of the git tree. Would a patch series in Roland's git tree work? Once he returns, we can start queuing up patches for 2.6.19, which could include any or all of the following: userspace support for the RDMA CM iWarp support latest changes for IB (UD QP and multicast) - Sean ___ openib-general mailing list openib-general@openib.org http://openib.org/mailman/listinfo/openib-general To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general
Re: [openib-general] ucma into kernel.org
Quoting r. Sean Hefty <[EMAIL PROTECTED]>: > > Can you drive these enhancements only to 2.6.18. > > I would like these features in OFED 1.1 as well. Would you consider making a git repository available with just the CMA code appropriate for OFED 1.1? Mixing git and SVN code to build OFED is really painful for us. -- MST ___ openib-general mailing list openib-general@openib.org http://openib.org/mailman/listinfo/openib-general To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general
Re: [openib-general] ucma into kernel.org
Quoting r. Sean Hefty <[EMAIL PROTECTED]>: > > Can you drive these enhancements only to 2.6.18. > > I would like these features in OFED 1.1 as well. However, there are no users > of those new interfaces in 2.6.18 that would justify their inclusion. I think setting the number of retries and timeout in CMA might be useful for iSER as well. Or, what do you think? -- MST ___ openib-general mailing list openib-general@openib.org http://openib.org/mailman/listinfo/openib-general To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general
Re: [openib-general] RFC: CMA backlog (was Re: CMA backlog)
Michael S. Tsirkin wrote: > Looks good to me. Please go ahead, then I'll use this in SDP and test this > way. Committed in 8261. ___ openib-general mailing list openib-general@openib.org http://openib.org/mailman/listinfo/openib-general To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general
Re: [openib-general] RFC: CMA backlog (was Re: CMA backlog)
Quoting r. Sean Hefty <[EMAIL PROTECTED]>: > This is a slightly modified version of the patch. I passed the return > code directly to the destroy function for future flexibility, and > limited the behavior change to REQ processing only. > > I ran some basic tests to make sure that this didn't break anything. > If this looks okay to you, I can commit this to SVN. Looks good to me. Please go ahead, then I'll use this in SDP and test this way. -- MST ___ openib-general mailing list openib-general@openib.org http://openib.org/mailman/listinfo/openib-general To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general
Re: [openib-general] ucma into kernel.org
Tziporet Koren wrote: > These features are needed for uDAPL and were requested by Woody and > Arlin for Intel MPI scalability. > Since in OFED 1.1 we are going to take CMA from kernel 2.6.18 we need > them upstream. > > Can you drive these enhancements only to 2.6.18. I would like these features in OFED 1.1 as well. However, there are no users of those new interfaces in 2.6.18 that would justify their inclusion. I can target userspace support of the RDMA CM for 2.6.19, but I don't think it makes sense to try for 2.6.18. - Sean ___ openib-general mailing list openib-general@openib.org http://openib.org/mailman/listinfo/openib-general To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general
Re: [openib-general] max_send_sge < max_sge
Quoting r. Pete Wyckoff <[EMAIL PROTECTED]>: > Subject: Re: max_send_sge < max_sge > > [EMAIL PROTECTED] wrote on Tue, 27 Jun 2006 09:42 +0300: > > Quoting r. Pete Wyckoff <[EMAIL PROTECTED]>: > > > Is this a known issue? > > > > Yes. The fact that ibv_query_device returns some value in hca_cap can not > > guarantee that ibv_create_qp with these parameters will succeed. For > > example, system administrator might have imposed a limit on the amount of > > memory you can pin down, and you will get ENOMEM. > > I was hoping to get a guaranteed maximum number from ibv_query_device so that > I would know that calls to ibv_create_qp would not fail due to my asking for > too many CQ entries. My code has some idea of how many it wants (16), and > compares that to the hca_cap values to settle for what it can get. I only > happened to notice that 30 wouldn't work even though it was so claimed when > debugging. Ah. I see. Unfortunately I don't think ibv_query_device currently provides this guarantee, and its not something easy to fix. What are you doing of the hca cap is below the values you want? Also, please see below for ideas about extending the API in a way that might be useful to you. > > > Should I always subtract 1 from the reported max on the send side? Just > > > for this hardware? > > > > Unless you use it, passing the absolute maximum value supported by hardware > > does not seem, to me, to make sense - it will just slow you down, and waste > > resources. Is there a protocol out there that actually has a use for 30 > > sge? > > Perhaps I don't understand what is more resource-costly about using > 29 sge when they are supported by the hardware. Well, more SGEs per WR does mean more resources are consumed for the same amount of WRs per QP. OK? > I'm using them on the send side to avoid having to either: > 1. memcpy 29 little buffers into one big buffer > or > 2. send 29 rdma writes instead of a single rdma write with 29 sges > The buffer on the receiver is contiguous and big enough to hold > everything. Its the same thing. Seems I'm not being clear. I was just saying that large SGE and WR values have cost so one should use a smallest SGE and WR numbers that still give good performance, not maximum thinkable values. But you probably know this :) > > In my opinion, for the application to be robust it has to either use small > > values that empirically work on most systems, or be able to scale down to > > require less resources if an allocation fails. > > Scale down? So if ibv_create_qp fails, you think I should look at > the return value (which is NULL, not ENOMEM or EINVAL or anything > informative), and then gradually reduce the values for max_recv_sge, > max_send_sge, max_recv_wr, max_send_wr, max_inline_data below the > reported HCA maximum until I find something that works? Well, if there's no bug I see no reason for ibv_create_qp to fail except that you are asking for too much WRs/SGEs. So yes, the trick you describe will work I think. At some point, I tried to think about extending the API in such a way that verbs like ibv_create_qp would round the parameters down to whatever does work. Would something like this be useful to you? Further, if the given SGE/WR pair can't be satisfied, will you want to scale down the number of SGEs or the number of WRs? > I'll subtract 1 from the hca_cap.max_sge for Mellanox hardware > before doing the comparison against how many SGEs I'd like to get. > Otherwise I can't see much alternative to trusting the hca_cap > values that are returned. If this works for you, great. I was just trying to point out query device can not guarantee that QP allocaton will always succeed even if you stay within limits it reports. For example, are you using a large number of WRs per QP as well? If so after alocating a couple of QPs you might run out of locked memory limit allowed per-user, depending on your system setup. QP allocation will then fail, even if you use the hcacap - 1 heuristic. -- MST ___ openib-general mailing list openib-general@openib.org http://openib.org/mailman/listinfo/openib-general To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general
Re: [openib-general] RFC: CMA backlog (was Re: CMA backlog)
If a user of the IB CM returns -ENOMEM from their connection callback, simply drop the incoming REQ. Do not send a reject, which should allow the sender to retry the request. This is necessary for SDP to support a backlog. Signed-off-by: Michael S. Tsirkin <[EMAIL PROTECTED]> Signed-off-by: Sean Hefty <[EMAIL PROTECTED]> --- This is a slightly modified version of the patch. I passed the return code directly to the destroy function for future flexibility, and limited the behavior change to REQ processing only. I ran some basic tests to make sure that this didn't break anything. If this looks okay to you, I can commit this to SVN. Index: cm.c === --- cm.c(revision 8224) +++ cm.c(working copy) @@ -702,7 +702,7 @@ static void cm_reset_to_idle(struct cm_i } } -void ib_destroy_cm_id(struct ib_cm_id *cm_id) +static void cm_destroy_id(struct ib_cm_id *cm_id, int err) { struct cm_id_private *cm_id_priv; struct cm_work *work; @@ -736,12 +736,22 @@ retest: sizeof cm_id_priv->av.port->cm_dev->ca_guid, NULL, 0); break; + case IB_CM_REQ_RCVD: + if (err == -ENOMEM) { + /* Do not reject to allow future retries. */ + cm_reset_to_idle(cm_id_priv); + spin_unlock_irqrestore(&cm_id_priv->lock, flags); + } else { + spin_unlock_irqrestore(&cm_id_priv->lock, flags); + ib_send_cm_rej(cm_id, IB_CM_REJ_CONSUMER_DEFINED, + NULL, 0, NULL, 0); + } + break; case IB_CM_MRA_REQ_RCVD: case IB_CM_REP_SENT: case IB_CM_MRA_REP_RCVD: ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg); /* Fall through */ - case IB_CM_REQ_RCVD: case IB_CM_MRA_REQ_SENT: case IB_CM_REP_RCVD: case IB_CM_MRA_REP_SENT: @@ -776,6 +786,11 @@ retest: kfree(cm_id_priv->private_data); kfree(cm_id_priv); } + +void ib_destroy_cm_id(struct ib_cm_id *cm_id) +{ + cm_destroy_id(cm_id, 0); +} EXPORT_SYMBOL(ib_destroy_cm_id); int ib_cm_listen(struct ib_cm_id *cm_id, __be64 service_id, __be64 service_mask, @@ -1164,7 +1179,7 @@ static void cm_process_work(struct cm_id } cm_deref_id(cm_id_priv); if (ret) - ib_destroy_cm_id(&cm_id_priv->id); + cm_destroy_id(&cm_id_priv->id, ret); } static void cm_format_mra(struct cm_mra_msg *mra_msg, ___ openib-general mailing list openib-general@openib.org http://openib.org/mailman/listinfo/openib-general To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general
[openib-general] [PATCH] trivial white space clean up in libipathverbs
This patch just corrects some white space code conventions. Signed-off-by: Ralph Campbell <[EMAIL PROTECTED]> Index: src/userspace/libipathverbs/src/ipathverbs.h === --- src/userspace/libipathverbs/src/ipathverbs.h(old) +++ src/userspace/libipathverbs/src/ipathverbs.h(new) @@ -122,7 +122,6 @@ int ipath_destroy_srq(struct ibv_srq *srq); - struct ibv_ah *ipath_create_ah(struct ibv_pd *pd, struct ibv_ah_attr *attr); int ipath_destroy_ah(struct ibv_ah *ah); Index: src/userspace/libipathverbs/src/verbs.c === --- src/userspace/libipathverbs/src/verbs.c (old) +++ src/userspace/libipathverbs/src/verbs.c (new) @@ -83,11 +83,11 @@ struct ibv_pd*pd; pd = malloc(sizeof *pd); - if(!pd) + if (!pd) return NULL; - if(ibv_cmd_alloc_pd(context, pd, &cmd, sizeof cmd, - &resp, sizeof resp)) { + if (ibv_cmd_alloc_pd(context, pd, &cmd, sizeof cmd, +&resp, sizeof resp)) { free(pd); return NULL; } @@ -232,7 +232,7 @@ int ret; srq = malloc(sizeof *srq); - if(srq == NULL) + if (srq == NULL) return NULL; ret = ibv_cmd_create_srq(pd, srq, attr, &cmd, sizeof cmd, @@ -278,10 +278,10 @@ struct ibv_ah *ah; ah = malloc(sizeof *ah); - if(ah == NULL) + if (ah == NULL) return NULL; - if(ibv_cmd_create_ah(pd, ah, attr)) { + if (ibv_cmd_create_ah(pd, ah, attr)) { free(ah); return NULL; } ___ openib-general mailing list openib-general@openib.org http://openib.org/mailman/listinfo/openib-general To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general
[openib-general] [PATCH] add support for ibv_query_qp(), ibv_query_srq() to libipathverbs
This patch adds support for ibv_query_qp() and ibv_query_srq() to libipathverbs which are new in libibverbs.so.2. Note that it layers on top of my previous patch. Signed-off-by: Ralph Campbell <[EMAIL PROTECTED]> Index: src/userspace/libipathverbs/src/ipathverbs.h === --- src/userspace/libipathverbs/src/ipathverbs.h(old) +++ src/userspace/libipathverbs/src/ipathverbs.h(new) @@ -96,6 +96,10 @@ struct ibv_qp *ipath_create_qp(struct ibv_pd *pd, struct ibv_qp_init_attr *attr); +int ipath_query_qp(struct ibv_qp *qp, struct ibv_qp_attr *attr, + enum ibv_qp_attr_mask attr_mask, + struct ibv_qp_init_attr *init_attr); + int ipath_modify_qp(struct ibv_qp *qp, struct ibv_qp_attr *attr, enum ibv_qp_attr_mask attr_mask); @@ -114,6 +118,8 @@ struct ibv_srq_attr *attr, enum ibv_srq_attr_mask attr_mask); +int ipath_query_srq(struct ibv_srq *srq, struct ibv_srq_attr *attr); + int ipath_destroy_srq(struct ibv_srq *srq); Index: src/userspace/libipathverbs/src/verbs.c === --- libipathverbs/src/verbs.c (old) +++ libipathverbs/src/verbs.c (new) @@ -40,7 +40,7 @@ #include #include -#include +#include #include #include @@ -193,6 +193,16 @@ return qp; } +int ipath_query_qp(struct ibv_qp *qp, struct ibv_qp_attr *attr, + enum ibv_qp_attr_mask attr_mask, + struct ibv_qp_init_attr *init_attr) +{ + struct ibv_query_qp cmd; + + return ibv_cmd_query_qp(qp, attr, attr_mask, init_attr, + &cmd, sizeof cmd); +} + int ipath_modify_qp(struct ibv_qp *qp, struct ibv_qp_attr *attr, enum ibv_qp_attr_mask attr_mask) { @@ -244,6 +254,13 @@ return ibv_cmd_modify_srq(srq, attr, attr_mask, &cmd, sizeof cmd); } +int ipath_query_srq(struct ibv_srq *srq, struct ibv_srq_attr *attr) +{ + struct ibv_query_srq cmd; + + return ibv_cmd_query_srq(srq, attr, &cmd, sizeof cmd); +} + int ipath_destroy_srq(struct ibv_srq *srq) { int ret; ___ openib-general mailing list openib-general@openib.org http://openib.org/mailman/listinfo/openib-general To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general
Re: [openib-general] max_send_sge < max_sge
Quoting r. Pete Wyckoff <[EMAIL PROTECTED]>: > It would not be a major problem to avoid using more than a couple of > SGEs; however, I didn't see any reason to avoid them. Please let me > know if you see a problem with this approach. A QP with a large number of SGEs per WQE enabled uses up more resources and might also be slower if typical WR has a small number of SGEs. So you should anticipate the typical number of SGEs for best performance. As I mentioned previously, even if you do want a large number of SGEs, but you want your application to be robust and scalable, you should scale your parameters down if QP allocation fails since device query does not guarantee the allocation will always succeed. -- MST ___ openib-general mailing list openib-general@openib.org http://openib.org/mailman/listinfo/openib-general To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general
Re: [openib-general] Kernel Oops related to IPoIB (multicast module?)
Sean Hefty wrote: > The SA query interface always invokes a callback, regardless if a call > succeeds. > So if a call to ib_sa_mcmmember_rec_set() fails (which happens in this case > because the SM is down), the user's callback is still invoked. The multicast > module is coded assuming that an immediate failure does not result in a > callback, so the callback is unexpected, which throws off the reference > counting. I've committed a patch that should hopefully fix this problem. The problem was that a return code of 0 from the SA query calls should have been treated as valid, rather than an error. - Sean ___ openib-general mailing list openib-general@openib.org http://openib.org/mailman/listinfo/openib-general To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general
[openib-general] [PATCH] change libipathverbs to use the new initialization convention
The libibverbs.so.2 has a different device plug-in module intialization convention from libibverbs.so.1. This patch updates the InfiniPath libipathverbs module to conform to the new convention. Signed-off-by Ralph Campbell <[EMAIL PROTECTED]> Index: src/userspace/libipathverbs/src/ipathverbs.map === --- src/userspace/libipathverbs/src/ipathverbs.map (revision 8255) +++ src/userspace/libipathverbs/src/ipathverbs.map (working copy) @@ -1,4 +1,4 @@ { - global: openib_driver_init; + global: ibv_driver_init; local: *; }; Index: src/userspace/libipathverbs/src/ipathverbs.c === --- src/userspace/libipathverbs/src/ipathverbs.c(revision 8255) +++ src/userspace/libipathverbs/src/ipathverbs.c(working copy) @@ -145,30 +145,24 @@ .free_context = ipath_free_context }; -struct ibv_device *openib_driver_init(struct sysfs_class_device *sysdev) +struct ibv_device *ibv_driver_init(const char *uverbs_sys_path, + int abi_version) { - struct sysfs_device*pcidev; - struct sysfs_attribute *attr; + charvalue[8]; struct ipath_device*dev; - unsignedvendor, device; - int i; + unsignedvendor, device; + int i; - pcidev = sysfs_get_classdev_device(sysdev); - if (!pcidev) + if (ibv_read_sysfs_file(uverbs_sys_path, "device/vendor", + value, sizeof value) < 0) return NULL; + sscanf(value, "%i", &vendor); - attr = sysfs_get_device_attr(pcidev, "vendor"); - if (!attr) + if (ibv_read_sysfs_file(uverbs_sys_path, "device/device", + value, sizeof value) < 0) return NULL; - sscanf(attr->value, "%i", &vendor); - sysfs_close_attribute(attr); + sscanf(value, "%i", &device); - attr = sysfs_get_device_attr(pcidev, "device"); - if (!attr) - return NULL; - sscanf(attr->value, "%i", &device); - sysfs_close_attribute(attr); - for (i = 0; i < sizeof hca_table / sizeof hca_table[0]; ++i) if (vendor == hca_table[i].vendor && device == hca_table[i].device) @@ -180,13 +174,12 @@ dev = malloc(sizeof *dev); if (!dev) { fprintf(stderr, PFX "Fatal: couldn't allocate device for %s\n", - sysdev->name); - abort(); + uverbs_sys_path); + return NULL; } dev->ibv_dev.ops = ipath_dev_ops; dev->hca_type= hca_table[i].type; - dev->page_size = sysconf(_SC_PAGESIZE); return &dev->ibv_dev; } Index: src/userspace/libipathverbs/src/ipathverbs.h === --- src/userspace/libipathverbs/src/ipathverbs.h(revision 8255) +++ src/userspace/libipathverbs/src/ipathverbs.h(working copy) @@ -57,7 +57,6 @@ struct ipath_device { struct ibv_device ibv_dev; enum ipath_hca_type hca_type; - int page_size; }; struct ipath_context { ___ openib-general mailing list openib-general@openib.org http://openib.org/mailman/listinfo/openib-general To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general
Re: [openib-general] max_send_sge < max_sge
[EMAIL PROTECTED] wrote on Tue, 27 Jun 2006 09:06 -0400: > At 02:42 AM 6/27/2006, Michael S. Tsirkin wrote: > >Unless you use it, passing the absolute maximum value supported by > >hardware does > >not seem, to me, to make sense - it will just slow you down, and waste > >resources. Is there a protocol out there that actually has a use for 30 sge? > > It's not a protocol thing, it's a memory registration thing. But I agree, > that's a huge number of segments for send and receive. 2-4 is more > typical. I'd be interested to know what wants 30 as well... This is the OpenIB port of pvfs2: http://www.pvfs.org/pvfs2/download.html See pvfs2/src/io/bmi/bmi_ib/openib.c for the bottom of the transport stack. The max_sge-1 aspect I'm complaining about isn't checked in yet. It's a file system application. The MPI-IO interface provides datatypes and file views that let a client write complex subsets of the in-memory data to a file with a single call. One case that happens is contiguous-in-file but discontiguous-in-memory, where the file system client writes data from multiple addresses to a single region in a file. The application calls MPI_File_write or a variant, and this complex buffer description filters all the way down to the OpenIB transport, which then has to figure out how to get the data to the server. These separate data regions may have been allocated all at once using MPI_Alloc_mem (rarely), or may have been used previously for file system operations so are already pinned in the registration cache. Are you implying there is more memory registration work that has to happen beyond making sure each of the SGE buffers is pinned and has a valid lkey? It would not be a major problem to avoid using more than a couple of SGEs; however, I didn't see any reason to avoid them. Please let me know if you see a problem with this approach. -- Pete ___ openib-general mailing list openib-general@openib.org http://openib.org/mailman/listinfo/openib-general To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general
Re: [openib-general] max_send_sge < max_sge
[EMAIL PROTECTED] wrote on Tue, 27 Jun 2006 09:42 +0300: > Quoting r. Pete Wyckoff <[EMAIL PROTECTED]>: > > Is this a known issue? > > Yes. The fact that ibv_query_device returns some value in hca_cap can not > guarantee that ibv_create_qp with these parameters will succeed. For example, > system administrator might have imposed a limit on the amount of memory you > can > pin down, and you will get ENOMEM. I was hoping to get a guaranteed maximum number from ibv_query_device so that I would know that calls to ibv_create_qp would not fail due to my asking for too many CQ entries. My code has some idea of how many it wants (16), and compares that to the hca_cap values to settle for what it can get. I only happened to notice that 30 wouldn't work even though it was so claimed when debugging. > > Should I always subtract 1 from the reported max on the send side? Just for > > this hardware? > > Unless you use it, passing the absolute maximum value supported by hardware > does > not seem, to me, to make sense - it will just slow you down, and waste > resources. Is there a protocol out there that actually has a use for 30 sge? Perhaps I don't understand what is more resource-costly about using 29 sge when they are supported by the hardware. I'm using them on the send side to avoid having to either: 1. memcpy 29 little buffers into one big buffer or 2. send 29 rdma writes instead of a single rdma write with 29 sges The buffer on the receiver is contiguous and big enough to hold everything. > In my opinion, for the application to be robust it has to either use small > values that empirically work on most systems, or be able to scale down to > require less resources if an allocation fails. Scale down? So if ibv_create_qp fails, you think I should look at the return value (which is NULL, not ENOMEM or EINVAL or anything informative), and then gradually reduce the values for max_recv_sge, max_send_sge, max_recv_wr, max_send_wr, max_inline_data below the reported HCA maximum until I find something that works? I'll subtract 1 from the hca_cap.max_sge for Mellanox hardware before doing the comparison against how many SGEs I'd like to get. Otherwise I can't see much alternative to trusting the hca_cap values that are returned. -- Pete ___ openib-general mailing list openib-general@openib.org http://openib.org/mailman/listinfo/openib-general To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general
[openib-general] new uDAPL co-maintainer
In recognition of his many contributions to the DAPL project, Arlin Davis is joining the project as an official co-maintainer. Arlin and I will collaborate on DAPL maintenance and development decisions. james -- James Lentini | Network Appliance | 781-768-5359 | [EMAIL PROTECTED] ___ openib-general mailing list openib-general@openib.org http://openib.org/mailman/listinfo/openib-general To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general
Re: [openib-general] Kernel Oops related to IPoIB (multicast module?)
Jack Morgenstein wrote: > Evidently, ipoib was still attempting to connect with an SA, when the ipoib > module was unloaded (modprobe -r). After the ipoib module was unloaded (or at > least rendered inaccessible), the ib_sa module attempted to invoke > "ib_sa_mcmember_rec_callback" (for a callback address that was part of the > unloaded ipoib module). Hence, the Oops below. I still haven't been able to reproduce this, but I _think_ I understand what's likely happening. The SA query interface always invokes a callback, regardless if a call succeeds. So if a call to ib_sa_mcmmember_rec_set() fails (which happens in this case because the SM is down), the user's callback is still invoked. The multicast module is coded assuming that an immediate failure does not result in a callback, so the callback is unexpected, which throws off the reference counting. I should have a patch for this shortly, but since I can't reproduce the problem, my testing of it will be limited. - Sean ___ openib-general mailing list openib-general@openib.org http://openib.org/mailman/listinfo/openib-general To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general
[openib-general] [PATCH]OpenSM/osm_lid_mgr.c: In __osm_lid_mgr_init_sweep, support enhanced switch port 0 for LMC > 0
OpenSM/osm_lid_mgr.c: In __osm_lid_mgr_init_sweep, support enhanced switch port 0 for LMC > 0 Base port 0 is constrained to have LMC of 0 whereas enhanced switch port 0 is not. Support enhanced switch port 0 is more like CA and router ports in terms of this. Signed-off-by: Hal Rosenstock <[EMAIL PROTECTED]> Index: opensm/osm_lid_mgr.c === --- opensm/osm_lid_mgr.c(revision 8239) +++ opensm/osm_lid_mgr.c(working copy) @@ -94,6 +94,7 @@ #include #include #include +#include #include #include #include @@ -351,6 +352,8 @@ __osm_lid_mgr_init_sweep( osm_lid_mgr_range_t *p_range = NULL; osm_port_t *p_port; cl_qmap_t *p_port_guid_tbl; + osm_switch_t*p_sw; + ib_switch_info_t*p_si; uint8_t lmc_num_lids = (uint8_t)(1 << p_mgr->p_subn->opt.lmc); uint16_t lmc_mask; uint16_t req_lid, num_lids; @@ -436,7 +439,20 @@ __osm_lid_mgr_init_sweep( IB_NODE_TYPE_SWITCH ) num_lids = lmc_num_lids; else -num_lids = 1; + { +/* Determine if enhanced switch port 0 */ +p_sw = osm_get_switch_by_guid(p_mgr->p_subn, + osm_node_get_node_guid(osm_port_get_parent_node(p_port))); +if (p_sw && (p_si = osm_switch_get_si_ptr(p_sw)) && +ib_switch_info_is_enhanced_port0(p_si)) +{ + num_lids = lmc_num_lids; +} +else +{ + num_lids = 1; +} + } if ((num_lids != 1) && (((db_min_lid & lmc_mask) != db_min_lid) || ___ openib-general mailing list openib-general@openib.org http://openib.org/mailman/listinfo/openib-general To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general
Re: [openib-general] RFC: CMA backlog (was Re: CMA backlog)
Quoting r. Sean Hefty <[EMAIL PROTECTED]>: > Subject: Re: RFC: CMA backlog (was Re: CMA backlog) > > Michael S. Tsirkin wrote: > > I'm just saying that we can use exactly the code in ib_destroy_cm_id, but > > avoid calling ib_send_cm_rej in this one case: > > Ah... yes, something like that should work. Like this then (untested)? Signed-off-by: Michael S. Tsirkin <[EMAIL PROTECTED]> Index: linux-2.6.17-2.6.18/drivers/infiniband/core/cm.c === --- linux-2.6.17-2.6.18.orig/drivers/infiniband/core/cm.c 2006-06-27 12:21:34.0 +0300 +++ linux-2.6.17-2.6.18/drivers/infiniband/core/cm.c2006-06-27 21:16:49.0 +0300 @@ -701,7 +701,7 @@ static void cm_reset_to_idle(struct cm_i } } -void ib_destroy_cm_id(struct ib_cm_id *cm_id) +static void cm_destroy_id(struct ib_cm_id *cm_id, int reject) { struct cm_id_private *cm_id_priv; struct cm_work *work; @@ -731,9 +731,9 @@ retest: ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg); spin_unlock_irqrestore(&cm_id_priv->lock, flags); ib_send_cm_rej(cm_id, IB_CM_REJ_TIMEOUT, - &cm_id_priv->av.port->cm_dev->ca_guid, - sizeof cm_id_priv->av.port->cm_dev->ca_guid, - NULL, 0); + &cm_id_priv->av.port->cm_dev->ca_guid, + sizeof cm_id_priv->av.port->cm_dev->ca_guid, + NULL, 0); break; case IB_CM_MRA_REQ_RCVD: case IB_CM_REP_SENT: @@ -744,9 +744,14 @@ retest: case IB_CM_MRA_REQ_SENT: case IB_CM_REP_RCVD: case IB_CM_MRA_REP_SENT: - spin_unlock_irqrestore(&cm_id_priv->lock, flags); - ib_send_cm_rej(cm_id, IB_CM_REJ_CONSUMER_DEFINED, - NULL, 0, NULL, 0); + if (reject) { + spin_unlock_irqrestore(&cm_id_priv->lock, flags); + ib_send_cm_rej(cm_id, IB_CM_REJ_CONSUMER_DEFINED, + NULL, 0, NULL, 0); + } else { + cm_reset_to_idle(cm_id_priv); + spin_unlock_irqrestore(&cm_id_priv->lock, flags); + } break; case IB_CM_ESTABLISHED: spin_unlock_irqrestore(&cm_id_priv->lock, flags); @@ -775,6 +780,12 @@ retest: kfree(cm_id_priv->private_data); kfree(cm_id_priv); } + +void ib_destroy_cm_id(struct ib_cm_id *cm_id) +{ + cm_destroy_id(cm_id, 1); +} + EXPORT_SYMBOL(ib_destroy_cm_id); int ib_cm_listen(struct ib_cm_id *cm_id, __be64 service_id, __be64 service_mask, @@ -1163,7 +1174,7 @@ static void cm_process_work(struct cm_id } cm_deref_id(cm_id_priv); if (ret) - ib_destroy_cm_id(&cm_id_priv->id); + cm_destroy_id(&cm_id_priv->id, ret != -ENOMEM); } static void cm_format_mra(struct cm_mra_msg *mra_msg, -- MST ___ openib-general mailing list openib-general@openib.org http://openib.org/mailman/listinfo/openib-general To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general
Re: [openib-general] [IWARP BRANCH] [PATCH 0/3] Fix rdma_lat and rdma_bw to work with the new stack and libraries
Committed in the iwarp branch. r8254. Thanks, Steve. On Tue, 2006-06-27 at 23:21 +0530, Pradipta Kumar Banerjee wrote: > The present rdma_lat and rdma_bw utilizing the RDMA CM is broken and doesn't > work with the latest libraries. The present code breaks because of using the > old > signature for the function rdma_get_cm_event. > > old function signature - int rdma_get_cm_event(struct rdma_cm_event **event) > new function signature - int rdma_get_cm_event(struct rdma_event_channel > *channel, > struct rdma_cm_event **event) > > This patchset consists changes for rdma_lat, rdma_bw and Makefile. > > 1 - rdma_lat.c changes > 2 - rdma_bw.c changes > 3 - Makefile changes > > Signed-off-by: Pradipta Kumar Banerjee <[EMAIL PROTECTED]> > > --- > > Thanks, > Pradipta Kumar. ___ openib-general mailing list openib-general@openib.org http://openib.org/mailman/listinfo/openib-general To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general
[openib-general] [IWARP BRANCH] [PATCH 3/3] Fix rdma_lat and rdma_bw to work with the new stack and libraries
This fixes the Makefile to properly build rdma_lat and rdma_bw Includes the librdmacm library. Signed-off-by: Pradipta Kumar Banerjee <[EMAIL PROTECTED]> --- Index: Makefile = --- bkp/Makefile2006-06-22 10:18:58.0 +0530 +++ Makefile2006-06-22 10:26:55.0 +0530 @@ -10,7 +10,7 @@ EXTRA_HEADERS = get_clock.h LOADLIBES += LDFLAGS += -${TESTS}: LOADLIBES += -libverbs +${TESTS}: LOADLIBES += -libverbs -lrdmacm ${TESTS} ${UTILS}: %: %.c ${EXTRA_FILES} ${EXTRA_HEADERS} $(CC) $(CPPFLAGS) $(CFLAGS) $(LDFLAGS) $< ${EXTRA_FILES} $(LOADLIBES) $(LDLIBS) -o $@ ___ openib-general mailing list openib-general@openib.org http://openib.org/mailman/listinfo/openib-general To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general
[openib-general] [IWARP BRANCH] [PATCH 2/3] Fix rdma_lat and rdma_bw to work with the new stack and libraries
This patch fixes the broken rdma_bw by using the correct function signature for rdma_get_cm_event. old function signature - int rdma_get_cm_event(struct rdma_cm_event **event) new function signature - int rdma_get_cm_event(struct rdma_event_channel *channel, struct rdma_cm_event **event) Signed-off-by: Pradipta Kumar Banerjee <[EMAIL PROTECTED]> --- Index: rdma_bw.c = --- ../perftest-org/rdma_bw.c 2006-06-22 18:28:13.0 +0530 +++ rdma_bw.c 2006-06-22 18:40:01.0 +0530 @@ -51,6 +51,7 @@ #include #include #include +#include #include #include @@ -75,6 +76,7 @@ struct pingpong_context { struct ibv_sge list; struct ibv_send_wr wr; struct rdma_cm_id *cm_id; + struct rdma_event_channel *cm_channel; }; struct pingpong_dest { @@ -545,11 +547,12 @@ static void pp_close_cma(struct pingpong } } - rdma_get_cm_event(&event); + rdma_get_cm_event(ctx->cm_channel, &event); if (event->event != RDMA_CM_EVENT_DISCONNECTED) printf("unexpected event during disconnect %d\n", event->event); rdma_ack_cm_event(event); rdma_destroy_id(ctx->cm_id); + rdma_destroy_event_channel(ctx->cm_channel); } static struct pingpong_context *pp_server_connect_cma(unsigned short port, int size, int tx_depth, @@ -562,13 +565,22 @@ static struct pingpong_context *pp_serve int ret; struct sockaddr_in sin; struct rdma_cm_id *child_cm_id; + struct rdma_event_channel *channel; struct pingpong_context *ctx; printf("%s starting server\n", __FUNCTION__); - ret = rdma_create_id(&listen_id, NULL); + channel = rdma_create_event_channel(); + if (!channel) { + ret = errno; + fprintf(stderr, "%s rdma_create_event_channel failed with error %d\n", +__FUNCTION__, ret); + return NULL; +} + + ret = rdma_create_id(channel, &listen_id, NULL); if (ret) { fprintf(stderr, "%s rdma_create_id failed %d\n", __FUNCTION__, ret); - return NULL; + goto err3; } sin.sin_addr.s_addr = 0; @@ -586,7 +598,7 @@ static struct pingpong_context *pp_serve goto err2; } - ret = rdma_get_cm_event(&event); + ret = rdma_get_cm_event(channel, &event); if (ret) goto err2; @@ -612,6 +624,7 @@ static struct pingpong_context *pp_serve goto err0; } + ctx->cm_channel = channel; my_dest->qpn = 0; my_dest->psn = 0xbb; my_dest->rkey = ctx->mr->rkey; @@ -627,7 +640,7 @@ static struct pingpong_context *pp_serve goto err0; } rdma_ack_cm_event(event); - ret = rdma_get_cm_event(&event); + ret = rdma_get_cm_event(channel, &event); if (ret) { fprintf(stderr,"rdma_get_cm_event error %d\n", ret); rdma_destroy_id(child_cm_id); @@ -646,8 +659,10 @@ err0: err1: rdma_ack_cm_event(event); err2: - rdma_destroy_id(listen_id); fprintf(stderr,"%s NOT connected!\n", __FUNCTION__); + rdma_destroy_id(listen_id); +err3: + rdma_destroy_event_channel(channel); return NULL; } @@ -683,6 +698,7 @@ static struct pingpong_context *pp_clien int ret; struct sockaddr_in sin; struct rdma_cm_id *cm_id; + struct rdma_event_channel *channel; struct pingpong_context *ctx; fprintf(stderr,"%s starting client\n", __FUNCTION__); @@ -691,10 +707,18 @@ static struct pingpong_context *pp_clien return NULL; } - ret = rdma_create_id(&cm_id, NULL); + channel = rdma_create_event_channel(); + if (!channel) { + ret = errno; + fprintf(stderr, "%s rdma_create_event_channel failed with error %d\n", + __FUNCTION__, ret); + return NULL; + } + + ret = rdma_create_id(channel, &cm_id, NULL); if (ret) { fprintf(stderr,"%s rdma_create_id failed %d\n", __FUNCTION__, ret); - return NULL; + goto err3; } sin.sin_family = PF_INET; @@ -705,7 +729,7 @@ static struct pingpong_context *pp_clien goto err2; } - ret = rdma_get_cm_event(&event); + ret = rdma_get_cm_event(channel, &event); if (ret) goto err2; @@ -722,7 +746,7 @@ static struct pingpong_context *pp_clien goto err2; } - ret = rdma_get_cm_event(&event); + ret = rdma_get_cm_event(channel, &event); if (ret) goto err2; @@ -740,6 +764,7 @@ static struct pingpong
[openib-general] [IWARP BRANCH] [PATCH 1/3] Fix rdma_lat and rdma_bw to work with the new stack and libraries
This patch fixes the broken rdma_lat by using the correct function signature for rdma_get_cm_event. old function signature - int rdma_get_cm_event(struct rdma_cm_event **event) new function signature - int rdma_get_cm_event(struct rdma_event_channel *channel, struct rdma_cm_event **event) Signed-off-by: Pradipta Kumar Banerjee <[EMAIL PROTECTED]> --- Index: rdma_lat.c = --- ../perftest-org/rdma_lat.c 2006-06-22 18:28:13.0 +0530 +++ rdma_lat.c 2006-06-22 18:36:12.0 +0530 @@ -51,6 +51,7 @@ #include #include #include +#include #include #include @@ -83,6 +84,7 @@ struct pingpong_context { struct ibv_sge list; struct ibv_send_wr wr; struct rdma_cm_id *cm_id; + struct rdma_event_channel *cm_channel; }; struct pingpong_dest { @@ -612,11 +614,12 @@ static void pp_close_cma(struct pingpong } } - rdma_get_cm_event(&event); + rdma_get_cm_event(ctx->cm_channel, &event); if (event->event != RDMA_CM_EVENT_DISCONNECTED) printf("unexpected event during disconnect %d\n", event->event); rdma_ack_cm_event(event); rdma_destroy_id(ctx->cm_id); + rdma_destroy_event_channel(ctx->cm_channel); } static struct pingpong_context *pp_server_connect_cma(unsigned short port, int size, int tx_depth, @@ -629,17 +632,26 @@ static struct pingpong_context *pp_serve int ret; struct sockaddr_in sin; struct rdma_cm_id *child_cm_id; + struct rdma_event_channel *channel; struct pingpong_context *ctx; - + printf("%s starting server\n", __FUNCTION__); - ret = rdma_create_id(&listen_id, NULL); - if (ret) { - fprintf(stderr, "%s rdma_create_id failed %d\n", __FUNCTION__, ret); + channel = rdma_create_event_channel(); + if (!channel) { + ret = errno; + fprintf(stderr, "%s rdma_create_event_channel failed with error %d\n", + __FUNCTION__, ret); return NULL; } + ret = rdma_create_id(channel, &listen_id, NULL); + if (ret) { + fprintf(stderr, "%s rdma_create_id failed %d\n", __FUNCTION__, ret); + goto err3; + } + memset(&sin, 0, sizeof(sin)); sin.sin_addr.s_addr = 0; - sin.sin_family = PF_INET; + sin.sin_family = AF_INET; sin.sin_port = htons(port); ret = rdma_bind_addr(listen_id, (struct sockaddr *)&sin); if (ret) { @@ -653,7 +665,7 @@ static struct pingpong_context *pp_serve goto err2; } - ret = rdma_get_cm_event(&event); + ret = rdma_get_cm_event(channel, &event); if (ret) goto err2; @@ -678,7 +690,8 @@ static struct pingpong_context *pp_serve fprintf(stderr,"%s pp_init_cma_ctx failed\n", __FUNCTION__); goto err0; } - + + ctx->cm_channel = channel; my_dest->qpn = 0; my_dest->psn = 0xbb; my_dest->rkey = ctx->mr->rkey; @@ -694,7 +707,7 @@ static struct pingpong_context *pp_serve goto err0; } rdma_ack_cm_event(event); - ret = rdma_get_cm_event(&event); + ret = rdma_get_cm_event(channel, &event); if (ret) { fprintf(stderr,"rdma_get_cm_event error %d\n", ret); rdma_destroy_id(child_cm_id); @@ -713,8 +726,10 @@ err0: err1: rdma_ack_cm_event(event); err2: - rdma_destroy_id(listen_id); fprintf(stderr,"%s NOT connected!\n", __FUNCTION__); + rdma_destroy_id(listen_id); +err3: + rdma_destroy_event_channel(channel); return NULL; } @@ -750,6 +765,7 @@ static struct pingpong_context *pp_clien int ret; struct sockaddr_in sin; struct rdma_cm_id *cm_id; + struct rdma_event_channel *channel; struct pingpong_context *ctx; fprintf(stderr,"%s starting client\n", __FUNCTION__); @@ -758,10 +774,18 @@ static struct pingpong_context *pp_clien return NULL; } - ret = rdma_create_id(&cm_id, NULL); + channel = rdma_create_event_channel(); + if (!channel) { + ret = errno; + fprintf(stderr, "%s rdma_create_event_channel failed with error %d\n", + __FUNCTION__, ret); + return NULL; + } + + ret = rdma_create_id(channel, &cm_id, NULL); if (ret) { fprintf(stderr,"%s rdma_create_id failed %d\n", __FUNCTION__, ret); - return NULL; + goto err3; } sin.sin_family = PF_INET; @@ -772,7 +796,7 @@ static struct pingpong_context *pp_clien goto err2; } -
[openib-general] [IWARP BRANCH] [PATCH 0/3] Fix rdma_lat and rdma_bw to work with the new stack and libraries
The present rdma_lat and rdma_bw utilizing the RDMA CM is broken and doesn't work with the latest libraries. The present code breaks because of using the old signature for the function rdma_get_cm_event. old function signature - int rdma_get_cm_event(struct rdma_cm_event **event) new function signature - int rdma_get_cm_event(struct rdma_event_channel *channel, struct rdma_cm_event **event) This patchset consists changes for rdma_lat, rdma_bw and Makefile. 1 - rdma_lat.c changes 2 - rdma_bw.c changes 3 - Makefile changes Signed-off-by: Pradipta Kumar Banerjee <[EMAIL PROTECTED]> --- Thanks, Pradipta Kumar. ___ openib-general mailing list openib-general@openib.org http://openib.org/mailman/listinfo/openib-general To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general
Re: [openib-general] [PATCH] opensm: libibmad: match MAD TransactionID
On Tue, 2006-06-27 at 13:07, Sasha Khapyorsky wrote: > Hi Eitan, > > On 09:48 Tue 27 Jun , Eitan Behave wrote: > > Hi Sasha > > > > Can you provide a little more info on the cause and impact of the > > issue you are solving with this patch? > > umad_recv() uses poll(), when it is timeouted umad_recv() returns error > and _do_madrpc() returns with error too. The next _do_madrpc() session > will got the previous response MAD. And so on. One more note to add to this: This only affects the OpenIB diagnostics and not OpenSM as the latter does not use this library; it uses umad directly not via rpc. -- Hal > > How is it related to work on the thread: > > "mad: add GID/class checking for matching received to sent MADs"? > > It is not related. > > Sasha > > > > > Thanks > > > > Sasha Khapyorsky wrote: > > >Match MAD TransactionID on receiving. This prevents request/response MADs > > >mixing - reproducible when poll() (in libibumad) returns timeout. > > > ___ openib-general mailing list openib-general@openib.org http://openib.org/mailman/listinfo/openib-general To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general
[openib-general] [PATCH] OpenSM/osm_port_info_rcv.c: In __osm_pi_rcv_process_switch_port, better BSP0 handling
OpenSM/osm_port_info_rcv.c: In __osm_pi_rcv_process_switch_port, better BSP0 handling In __osm_pi_rcv_process_switch_port, if base switch port 0, then copy the received PortInfo attribute into the physp structure regardless of the port state. On BSP0, the port state is not used so this protects against an SMA which set this to LINK_DOWN. This makes the code for BSP0 more similar to how it originally was at the cost of an extra copy of the PortInfo attribute. Signed-off-by: Hal Rosenstock <[EMAIL PROTECTED]> Index: opensm/osm_port_info_rcv.c === --- opensm/osm_port_info_rcv.c (revision 8252) +++ opensm/osm_port_info_rcv.c (working copy) @@ -239,6 +239,8 @@ __osm_pi_rcv_process_switch_port( uint8_t port_num; uint8_t remote_port_num; osm_dr_path_t path; + osm_switch_t *p_sw; + ib_switch_info_t *p_si; OSM_LOG_ENTER( p_rcv->p_log, __osm_pi_rcv_process_switch_port ); @@ -350,6 +352,15 @@ __osm_pi_rcv_process_switch_port( "__osm_pi_rcv_process_switch_port: ERR 0F04: " "Invalid base LID 0x%x corrected\n", cl_ntoh16( orig_lid ) ); + /* Determine if base switch port 0 */ + p_sw = osm_get_switch_by_guid(p_rcv->p_subn, + osm_node_get_node_guid( p_node )); + if (p_sw && (p_si = osm_switch_get_si_ptr(p_sw)) && + !ib_switch_info_is_enhanced_port0(p_si)) +{ + /* PortState is not used on BSP0 but just in case it is DOWN */ + p_physp->port_info = *p_pi; +} __osm_pi_rcv_process_endport(p_rcv, p_physp, p_pi); } ___ openib-general mailing list openib-general@openib.org http://openib.org/mailman/listinfo/openib-general To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general
[openib-general] [PATCH][TRIVIAL] OpenSM/osm_pkey_mgr.c: In pkey_mgr_get_physp_max_blocks, use routine rather than accessing structure member directly
OpenSM/osm_pkey_mgr.c: In pkey_mgr_get_physp_max_blocks, use routine rather than accessing structure member directly Signed-off-by: Hal Rosenstock <[EMAIL PROTECTED]> Index: opensm/osm_pkey_mgr.c === --- opensm/osm_pkey_mgr.c (revision 8220) +++ opensm/osm_pkey_mgr.c (working copy) @@ -81,7 +81,7 @@ pkey_mgr_get_physp_max_blocks( num_pkeys = cl_ntoh16( p_node->node_info.partition_cap ); else { -p_sw = osm_get_switch_by_guid( p_subn, p_node->node_info.node_guid ); +p_sw = osm_get_switch_by_guid( p_subn, osm_node_get_node_guid( p_node ) ); if (p_sw) num_pkeys = cl_ntoh16( p_sw->switch_info.enforce_cap ); } ___ openib-general mailing list openib-general@openib.org http://openib.org/mailman/listinfo/openib-general To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general
Re: [openib-general] [PATCH] opensm: libibmad: match MAD TransactionID
Hi Eitan, On 09:48 Tue 27 Jun , Eitan Behave wrote: > Hi Sasha > > Can you provide a little more info on the cause and impact of the > issue you are solving with this patch? umad_recv() uses poll(), when it is timeouted umad_recv() returns error and _do_madrpc() returns with error too. The next _do_madrpc() session will got the previous response MAD. And so on. > > How is it related to work on the thread: > "mad: add GID/class checking for matching received to sent MADs"? It is not related. Sasha > > Thanks > > Sasha Khapyorsky wrote: > >Match MAD TransactionID on receiving. This prevents request/response MADs > >mixing - reproducible when poll() (in libibumad) returns timeout. > > ___ openib-general mailing list openib-general@openib.org http://openib.org/mailman/listinfo/openib-general To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general
Re: [openib-general] Kernel Oops related to IPoIB (multicast module?)
Tziporet Koren wrote: > Resolving this issue is critical for us since it prevent us from any > usage of the new multicsat module. > An easy way to reproduce it is to use the OFED "openibd" script. Just > run "openibd start" and than "openibd stop" and you will see the > problem. This script is available within OFED release. I am working on trying to resolve this as my top priority at the moment, but I have not been able to reproduce this on my systems. I want to understand why ib_sa was not unloaded as part of modprobe -r ib_ipoib, but why ib_multicast apparently was. I will examine the script that you mentioned, but I typically do not run the OFED release. - Sean ___ openib-general mailing list openib-general@openib.org http://openib.org/mailman/listinfo/openib-general To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general
Re: [openib-general] Local QP operation error
Quoting r. Ramachandra K <[EMAIL PROTECTED]>: > Just to clarify, I am writing the kernel module that is getting the local > QP operation error. I guess I am missing something in my code that > is causing the error. But I am unable to pinpoint the cause of the error. > > Does this error point to some issue with the DMA address specified > in the work request SGE ? Yes, it seems hardware could not read (gather) data when executing the work request SGE. -- MST ___ openib-general mailing list openib-general@openib.org http://openib.org/mailman/listinfo/openib-general To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general
Re: [openib-general] Local QP operation error
At 09:21 AM 6/27/2006, Ramachandra K wrote: >Does this error point to some issue with the DMA address specified >in the work request SGE ? Ding Ding Ding Ding! :-) We recently identified the exact issue in the NFS/RDMA server, which happened only when running on ia64. If you're not using the dma_map_* api, that's maybe something to look at. ;-) Tom. ___ openib-general mailing list openib-general@openib.org http://openib.org/mailman/listinfo/openib-general To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general
Re: [openib-general] Local QP operation error
Michael S. Tsirkin wrote: The IB specifcation says that this error indicates an internal QP consistency error. What are the possible reasons for this and is there any way I can pin point the inconsistency ? This normally indicates some kind of driver bug, or memory corruption. What is the value of the vendor_err field? The vendor_err field value is 115 (0x73). Just to clarify, I am writing the kernel module that is getting the local QP operation error. I guess I am missing something in my code that is causing the error. But I am unable to pinpoint the cause of the error. Does this error point to some issue with the DMA address specified in the work request SGE ? Regards, Ram ___ openib-general mailing list openib-general@openib.org http://openib.org/mailman/listinfo/openib-general To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general
Re: [openib-general] ucma into kernel.org
> > How about the cma changes required by ucma to get/set options? I think they > are > not upstream yet. Could these go upstream, to make building ucma > out-of-kernel > possible, without kernel patches? > > Hi Sean, These features are needed for uDAPL and were requested by Woody and Arlin for Intel MPI scalability. Since in OFED 1.1 we are going to take CMA from kernel 2.6.18 we need them upstream. Can you drive these enhancements only to 2.6.18. Thanks, Tziporet ___ openib-general mailing list openib-general@openib.org http://openib.org/mailman/listinfo/openib-general To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general
Re: [openib-general] max_send_sge < max_sge
At 02:42 AM 6/27/2006, Michael S. Tsirkin wrote: >Unless you use it, passing the absolute maximum value supported by >hardware does >not seem, to me, to make sense - it will just slow you down, and waste >resources. Is there a protocol out there that actually has a use for 30 sge? It's not a protocol thing, it's a memory registration thing. But I agree, that's a huge number of segments for send and receive. 2-4 is more typical. I'd be interested to know what wants 30 as well... Tom. ___ openib-general mailing list openib-general@openib.org http://openib.org/mailman/listinfo/openib-general To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general
Re: [openib-general] Local QP operation error
Quoting r. Ramachandra K <[EMAIL PROTECTED]>: > Subject: Local QP operation error > > In a kernel module, on polling the CQ, I am getting a local QP > operation error (IB_WC_LOC_QP_OP_ERR). Work request > posted was of type IB_WR_SEND and the QP was moved to > IB_QPS_RTS state before posting the send work request. > > The IB specifcation says that this error indicates an internal QP consistency > error. What are the possible reasons for this and is there any way I can pin > point the inconsistency ? This normally indicates some kind of driver bug, or memory corruption. What is the value of the vendor_err field? -- MST ___ openib-general mailing list openib-general@openib.org http://openib.org/mailman/listinfo/openib-general To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general
Re: [openib-general] Kernel Oops related to IPoIB (multicast module?)
Sean Hefty wrote: > Thanks for the additional information. I've been trying to reproduce this, > but > haven't been able to yet. I did notice that there's a several second delay > when > calling modprobe -r ip_iboib, but only if I've tried to configure ib0 first. > (No SM was running.) > > I am confused on one area. After executing modprobe -r ib_ipoib, what kept > ib_sa loaded? (Why was modprobe -r ib_sa necessary?) I would have expected > it > to be unloaded at the same time. > > - Sean > > Hi Sean, Resolving this issue is critical for us since it prevent us from any usage of the new multicsat module. An easy way to reproduce it is to use the OFED "openibd" script. Just run "openibd start" and than "openibd stop" and you will see the problem. This script is available within OFED release. Thanks, Tziporet ___ openib-general mailing list openib-general@openib.org http://openib.org/mailman/listinfo/openib-general To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general
[openib-general] Local QP operation error
In a kernel module, on polling the CQ, I am getting a local QP operation error (IB_WC_LOC_QP_OP_ERR). Work request posted was of type IB_WR_SEND and the QP was moved to IB_QPS_RTS state before posting the send work request. The IB specifcation says that this error indicates an internal QP consistency error. What are the possible reasons for this and is there any way I can pin point the inconsistency ? I would appreciate any hints to resolve this error. Regards, Ram ___ openib-general mailing list openib-general@openib.org http://openib.org/mailman/listinfo/openib-general To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general
[openib-general] [PATCH] OpenSM/SA: Eliminate some no longer needed code
OpenSM/SA: Eliminate some no longer needed code No longer a need to check whether the LID is beyond the vector table size. In fact, this turns an edge case into an error (when LMC > 0 and a non base LID is requested which is above the last base LID but within that port's LID range). In any case, osm_get_port_by_base_lid uses cl_ptr_vector_get_at which does this check at the proper time. Signed-off-by: Hal Rosenstock <[EMAIL PROTECTED]> Index: opensm/osm_sa_pkey_record.c === --- opensm/osm_sa_pkey_record.c (revision 8236) +++ opensm/osm_sa_pkey_record.c (working copy) @@ -419,25 +419,14 @@ osm_pkey_rec_rcv_process( CL_ASSERT( cl_ptr_vector_get_size(p_tbl) < 0x1 ); -if ((uint16_t)cl_ptr_vector_get_size(p_tbl) > cl_ntoh16(p_rcvd_rec->lid)) +status = osm_get_port_by_base_lid( p_rcv->p_subn, p_rcvd_rec->lid, &p_port ); +if ( ( status != IB_SUCCESS ) || ( p_port == NULL ) ) { - status = osm_get_port_by_base_lid( p_rcv->p_subn, p_rcvd_rec->lid, &p_port ); - if ( ( status != IB_SUCCESS ) || ( p_port == NULL ) ) - { -status = IB_NOT_FOUND; -osm_log( p_rcv->p_log, OSM_LOG_ERROR, - "osm_pkey_rec_rcv_process: ERR 460B: " - "No port found with LID 0x%x\n", - cl_ntoh16(p_rcvd_rec->lid) ); - } -} -else -{ /* LID out of range */ status = IB_NOT_FOUND; osm_log( p_rcv->p_log, OSM_LOG_ERROR, - "osm_pkey_rec_rcv_process: ERR 4609: " - "Given LID (0x%X) is out of range:0x%X\n", - cl_ntoh16(p_rcvd_rec->lid), cl_ptr_vector_get_size(p_tbl) ); + "osm_pkey_rec_rcv_process: ERR 460B: " + "No port found with LID 0x%x\n", + cl_ntoh16(p_rcvd_rec->lid) ); } } Index: opensm/osm_sa_portinfo_record.c === --- opensm/osm_sa_portinfo_record.c (revision 8236) +++ opensm/osm_sa_portinfo_record.c (working copy) @@ -677,25 +677,14 @@ osm_pir_rcv_process( */ if( comp_mask & IB_PIR_COMPMASK_LID ) { -if ((uint16_t)cl_ptr_vector_get_size(p_tbl) > cl_ntoh16(p_rcvd_rec->lid)) -{ - status = osm_get_port_by_base_lid( p_rcv->p_subn, p_rcvd_rec->lid, &p_port ); - if ( ( status != IB_SUCCESS ) || ( p_port == NULL ) ) - { -status = IB_NOT_FOUND; -osm_log( p_rcv->p_log, OSM_LOG_ERROR, - "osm_pir_rcv_process: ERR 2109: " - "No port found with LID 0x%x\n", - cl_ntoh16(p_rcvd_rec->lid) ); - } -} -else +status = osm_get_port_by_base_lid( p_rcv->p_subn, p_rcvd_rec->lid, &p_port ); +if ( ( status != IB_SUCCESS ) || ( p_port == NULL ) ) { status = IB_NOT_FOUND; osm_log( p_rcv->p_log, OSM_LOG_ERROR, - "osm_pir_rcv_process: ERR 2101: " - "Given LID (0x%X) is out of range:0x%X\n", - cl_ntoh16(p_rcvd_rec->lid), cl_ptr_vector_get_size(p_tbl) ); + "osm_pir_rcv_process: ERR 2109: " + "No port found with LID 0x%x\n", + cl_ntoh16(p_rcvd_rec->lid) ); } } else Index: opensm/osm_sa_slvl_record.c === --- opensm/osm_sa_slvl_record.c (revision 8236) +++ opensm/osm_sa_slvl_record.c (working copy) @@ -387,25 +387,14 @@ osm_slvl_rec_rcv_process( CL_ASSERT( cl_ptr_vector_get_size(p_tbl) < 0x1 ); -if ((uint16_t)cl_ptr_vector_get_size(p_tbl) > cl_ntoh16(p_rcvd_rec->lid)) +status = osm_get_port_by_base_lid( p_rcv->p_subn, p_rcvd_rec->lid, &p_port ); +if ( ( status != IB_SUCCESS ) || ( p_port == NULL ) ) { - status = osm_get_port_by_base_lid( p_rcv->p_subn, p_rcvd_rec->lid, &p_port ); - if ( ( status != IB_SUCCESS ) || ( p_port == NULL ) ) - { -status = IB_NOT_FOUND; -osm_log( p_rcv->p_log, OSM_LOG_ERROR, - "osm_slvl_rec_rcv_process: ERR 2608: " - "No port found with LID 0x%x\n", - cl_ntoh16(p_rcvd_rec->lid) ); - } -} -else -{ /* LID out of range */ status = IB_NOT_FOUND; osm_log( p_rcv->p_log, OSM_LOG_ERROR, - "osm_slvl_rec_rcv_process: ERR 2601: " - "Given LID (0x%X) is out of range:0x%X\n", - cl_ntoh16(p_rcvd_rec->lid), cl_ptr_vector_get_size(p_tbl)); + "osm_slvl_rec_rcv_process: ERR 2608: " + "No port found with LID 0x%x\n", + cl_ntoh16(p_rcvd_rec->lid) ); } } Index: opensm/osm_sa_vlarb_record.c === --- opensm/osm_sa_vlarb_record.c(revision 8236) +++ opensm/osm_sa_vlarb_record.c(working copy) @@ -407,25 +407,14 @@ osm_vlarb_rec_rcv_process( CL_ASSERT( cl_ptr_vector_get_size(p_tbl) < 0x1 ); -if ((uin