Here is an updated kernel patch and a matching libibverbs patch (both
against your branch).  Still compile tested only.  I'll get my
Pathscale system set up to do some testing of this code tomorrow
morning and see if this stuff actually works.

 - R.

Index: infiniband/include/rdma/ib_user_verbs.h
===================================================================
--- infiniband/include/rdma/ib_user_verbs.h	(revision 3742)
+++ infiniband/include/rdma/ib_user_verbs.h	(working copy)
@@ -89,8 +89,11 @@ enum {
  * Make sure that all structs defined in this file remain laid out so
  * that they pack the same way on 32-bit and 64-bit architectures (to
  * avoid incompatibility between 32-bit userspace and 64-bit kernels).
- * In particular do not use pointer types -- pass pointers in __u64
- * instead.
+ * Specifically:
+ *  - Do not use pointer types -- pass pointers in __u64 instead.
+ *  - Make sure that any structure larger than 4 bytes is padded to a
+ *    multiple of 8 bytes.  Otherwise the structure size will be
+ *    different between 32-bit and 64-bit architectures.
  */
 
 struct ib_uverbs_async_event_desc {
@@ -284,12 +287,12 @@ struct ib_uverbs_wc {
 	__u8 sl;
 	__u8 dlid_path_bits;
 	__u8 port_num;
-	__u8 reserved; /* Align struct to 8 bytes */
+	__u8 reserved;
 };
 
 struct ib_uverbs_poll_cq_resp {
 	__u32 count;
-	__u32 reserved; /* Align struct to 8 bytes */
+	__u32 reserved;
 	struct ib_uverbs_wc wc[];
 };
 
@@ -417,20 +420,20 @@ struct ib_uverbs_send_wr {
 		struct {
 			__u64 remote_addr;
 			__u32 rkey;
-			__u32 reserved; /* Align struct to 8 bytes */
+			__u32 reserved;
 		} rdma;
 		struct {
 			__u64 remote_addr;
 			__u64 compare_add;
 			__u64 swap;
 			__u32 rkey;
-			__u32 reserved; /* Align struct to 8 bytes */
+			__u32 reserved;
 		} atomic;
 		struct {
 			__u32 ah;
 			__u32 remote_qpn;
 			__u32 remote_qkey;
-			__u32 reserved; /* Align struct to 8 bytes */
+			__u32 reserved;
 		} ud;
 	} wr;
 };
@@ -440,8 +443,7 @@ struct ib_uverbs_post_send {
 	__u32 qp_handle;
 	__u32 wr_count;
 	__u32 sge_count;
-	__u32 reserved; /* Align struct to 8 bytes */
-	__u64 wr;
+	__u32 wqe_size;
 };
 
 struct ib_uverbs_post_send_resp {
@@ -451,7 +453,7 @@ struct ib_uverbs_post_send_resp {
 struct ib_uverbs_recv_wr {
 	__u64 wr_id;
 	__u32 num_sge;
-	__u32 reserved; /* Align struct to 8 bytes */
+	__u32 reserved;
 };
 
 struct ib_uverbs_post_recv {
@@ -459,8 +461,7 @@ struct ib_uverbs_post_recv {
 	__u32 qp_handle;
 	__u32 wr_count;
 	__u32 sge_count;
-	__u32 reserved; /* Align struct to 8 bytes */
-	__u64 wr;
+	__u32 wqe_size;
 };
 
 struct ib_uverbs_post_recv_resp {
@@ -472,47 +473,38 @@ struct ib_uverbs_post_srq_recv {
 	__u32 srq_handle;
 	__u32 wr_count;
 	__u32 sge_count;
-	__u32 reserved; /* Align struct to 8 bytes */
-	__u64 wr;
+	__u32 wqe_size;
 };
 
 struct ib_uverbs_post_srq_recv_resp {
 	__u32 bad_wr;
 };
 
-union ib_uverbs_gid {
-	__u8 raw[16]; 
-	struct {
-		__u64 subnet_prefix;
-		__u64 interface_id;
-	} global;
-};
-
-struct ibv_m_global_route {
-	union ib_uverbs_gid dgid;
+struct ib_uverbs_global_route {
+	__u8  dgid[16];
 	__u32 flow_label;    
 	__u8  sgid_index;
 	__u8  hop_limit;
 	__u8  traffic_class;
-	__u8  reserved; /* Align struct to 8 bytes */
+	__u8  reserved;
 };
 
 struct ib_uverbs_ah_attr {
-	struct ibv_m_global_route grh;
+	struct ib_uverbs_global_route grh;
 	__u16 dlid;
 	__u8  sl;
 	__u8  src_path_bits;
 	__u8  static_rate;
 	__u8  is_global;
 	__u8  port_num;
-	__u8  reserved; /* Align struct to 8 bytes */
+	__u8  reserved;
 };
 
 struct ib_uverbs_create_ah {
 	__u64 response;
 	__u64 user_handle;
 	__u32 pd_handle;
-	__u32 reserved; /* Align struct to 8 bytes */
+	__u32 reserved;
 	struct ib_uverbs_ah_attr attr;
 };
 
Index: infiniband/core/uverbs_cmd.c
===================================================================
--- infiniband/core/uverbs_cmd.c	(revision 3742)
+++ infiniband/core/uverbs_cmd.c	(working copy)
@@ -699,24 +699,25 @@ ssize_t ib_uverbs_poll_cq(struct ib_uver
 	}
 
 	resp->count = ib_poll_cq(cq, cmd.ne, wc);
-	for(i = 0; i < cmd.ne; i++) {
-		resp->wc[i].wr_id = wc[i].wr_id;
-		resp->wc[i].status = wc[i].status;
-		resp->wc[i].opcode = wc[i].opcode;
-		resp->wc[i].vendor_err = wc[i].vendor_err;
-		resp->wc[i].byte_len = wc[i].byte_len;
-		resp->wc[i].imm_data = wc[i].imm_data;
-		resp->wc[i].qp_num = wc[i].qp_num;
-		resp->wc[i].src_qp = wc[i].src_qp;
-		resp->wc[i].wc_flags = wc[i].wc_flags;
-		resp->wc[i].pkey_index = wc[i].pkey_index;
-		resp->wc[i].slid = wc[i].slid;
-		resp->wc[i].sl = wc[i].sl;
+	for (i = 0; i < resp->count; i++) {
+		resp->wc[i].wr_id 	   = wc[i].wr_id;
+		resp->wc[i].status 	   = wc[i].status;
+		resp->wc[i].opcode 	   = wc[i].opcode;
+		resp->wc[i].vendor_err 	   = wc[i].vendor_err;
+		resp->wc[i].byte_len 	   = wc[i].byte_len;
+		resp->wc[i].imm_data 	   = wc[i].imm_data;
+		resp->wc[i].qp_num 	   = wc[i].qp_num;
+		resp->wc[i].src_qp 	   = wc[i].src_qp;
+		resp->wc[i].wc_flags 	   = wc[i].wc_flags;
+		resp->wc[i].pkey_index 	   = wc[i].pkey_index;
+		resp->wc[i].slid 	   = wc[i].slid;
+		resp->wc[i].sl 		   = wc[i].sl;
 		resp->wc[i].dlid_path_bits = wc[i].dlid_path_bits;
-		resp->wc[i].port_num = wc[i].port_num;
+		resp->wc[i].port_num 	   = wc[i].port_num;
 	}
 
-	if (copy_to_user((void __user *)cmd.response, resp, rsize))
+	if (copy_to_user((void __user *) (unsigned long) cmd.response,
+			 &resp, sizeof resp))
 		ret = -EFAULT;
 
 out:
@@ -741,15 +742,12 @@ ssize_t ib_uverbs_req_notify_cq(struct i
 
 	down(&ib_uverbs_idr_mutex);
 	cq = idr_find(&ib_uverbs_cq_idr, cmd.cq_handle);
-	if (!cq || cq->uobject->context != file->ucontext)
-		goto out;
-
-	ib_req_notify_cq(cq, cmd.solicited ? IB_CQ_SOLICITED : IB_CQ_NEXT_COMP);
-
-	ret = in_len;
-
-out:
+	if (cq && cq->uobject->context == file->ucontext) {
+		ib_req_notify_cq(cq, cmd.solicited ? IB_CQ_SOLICITED : IB_CQ_NEXT_COMP);
+		ret = in_len;
+	}
 	up(&ib_uverbs_idr_mutex);
+
 	return ret;
 }
 
@@ -1097,195 +1095,275 @@ ssize_t ib_uverbs_post_send(struct ib_uv
 {
 	struct ib_uverbs_post_send      cmd;
 	struct ib_uverbs_post_send_resp resp;
-	struct ib_uverbs_send_wr       *m_wr, *j;
-	struct ib_send_wr              *wr, *i, *bad_wr;
-	struct ib_sge                  *s;
+	struct ib_uverbs_send_wr       *user_wr;
+	struct ib_send_wr              *wr = NULL, *last, *next, *bad_wr;
 	struct ib_qp                   *qp;
-	int                             size;
-	int                             count;
+	int                             i, sg_ind;
 	ssize_t                         ret = -EINVAL;
 
-	resp.bad_wr = 0;
 	if (copy_from_user(&cmd, buf, sizeof cmd))
 		return -EFAULT;
 
+	if (in_len < sizeof cmd + cmd.wqe_size * cmd.wr_count +
+	    cmd.sge_count * sizeof (struct ib_uverbs_sge))
+		return -EINVAL;
+
+	if (cmd.wqe_size < sizeof (struct ib_uverbs_send_wr))
+		return -EINVAL;
+
+	user_wr = kmalloc(cmd.wqe_size, GFP_KERNEL);
+	if (!user_wr)
+		return -ENOMEM;
+
 	down(&ib_uverbs_idr_mutex);
 
 	qp = idr_find(&ib_uverbs_qp_idr, cmd.qp_handle);
 	if (!qp || qp->uobject->context != file->ucontext)
 		goto out;
 
-	size = (cmd.wr_count * sizeof *wr) + (cmd.sge_count * sizeof *s);
-	m_wr = kmalloc(size, GFP_KERNEL);
-	if (!m_wr) {
-		ret = -ENOMEM;
-		goto out;
-	}
-
-	if (copy_from_user(m_wr, (void __user *)cmd.wr, size)) {
-		ret = -EFAULT;
-		goto wrout;
-	}
-
-	wr = kmalloc(cmd.wr_count * sizeof *wr, GFP_KERNEL);
-	if (!wr) {
-		ret = -ENOMEM;
-		goto wrout;
-	}
+	sg_ind = 0;
+	last = NULL;
+	for (i = 0; i < cmd.wr_count; ++i) {
+		if (copy_from_user(user_wr,
+				   buf + sizeof cmd + i * cmd.wqe_size,
+				   cmd.wqe_size)) {
+			ret = -EFAULT;
+			goto out;
+		}
 
-	s = (struct ib_sge *)(m_wr + cmd.wr_count);
+		if (user_wr->num_sge + sg_ind > cmd.sge_count) {
+			ret = -EINVAL;
+			goto out;
+		}
 
-	i = wr;
-	j = m_wr;
-	count = 0;
-	while (count++ < cmd.wr_count) {
-		struct ib_send_wr *t = i++;
-		struct ib_uverbs_send_wr *u = j++;
+		next = kmalloc(ALIGN(sizeof *next, sizeof (struct ib_sge)) +
+			       user_wr->num_sge * sizeof (struct ib_sge),
+			       GFP_KERNEL);
+		if (!next) {
+			ret = -ENOMEM;
+			goto out;
+		}
 
-		if (count < cmd.wr_count)
-			t->next = i;
+		if (!last)
+			wr = next;
 		else
-			t->next = NULL;
+			last->next = next;
+		last = next;
 
-		t->wr_id = u->wr_id;
-		t->num_sge = u->num_sge;
-		t->opcode = u->opcode;
-		t->send_flags = u->send_flags;
-		t->imm_data = u->imm_data;
+		next->next       = NULL;
+		next->wr_id      = user_wr->wr_id;
+		next->num_sge    = user_wr->num_sge;
+		next->opcode     = user_wr->opcode;
+		next->send_flags = user_wr->send_flags;
+		next->imm_data   = user_wr->imm_data;
 
 		if (qp->qp_type == IB_QPT_UD) {
-			t->wr.ud.ah = idr_find(&ib_uverbs_ah_idr, u->wr.ud.ah);
-			if (!t->wr.ud.ah)
-				goto kwrout;
-			t->wr.ud.remote_qpn = u->wr.ud.remote_qpn;
-			t->wr.ud.remote_qkey = u->wr.ud.remote_qkey;
+			next->wr.ud.ah = idr_find(&ib_uverbs_ah_idr,
+						  user_wr->wr.ud.ah);
+			if (!next->wr.ud.ah) {
+				ret = -EINVAL;
+				goto out;
+			}
+			next->wr.ud.remote_qpn  = user_wr->wr.ud.remote_qpn;
+			next->wr.ud.remote_qkey = user_wr->wr.ud.remote_qkey;
 		} else {
-			switch (t->opcode) {
+			switch (next->opcode) {
 			case IB_WR_RDMA_WRITE:
 			case IB_WR_RDMA_WRITE_WITH_IMM:
 			case IB_WR_RDMA_READ:
-				t->wr.rdma.remote_addr = u->wr.rdma.remote_addr;
-				t->wr.rdma.rkey = u->wr.rdma.rkey;
+				next->wr.rdma.remote_addr =
+					user_wr->wr.rdma.remote_addr;
+				next->wr.rdma.rkey        =
+					user_wr->wr.rdma.rkey;
 				break;
 			case IB_WR_ATOMIC_CMP_AND_SWP:
 			case IB_WR_ATOMIC_FETCH_AND_ADD:
-				t->wr.atomic.remote_addr =
-					u->wr.atomic.remote_addr;
-				t->wr.atomic.compare_add =
-					u->wr.atomic.compare_add;
-				t->wr.atomic.swap = u->wr.atomic.swap;
-				t->wr.atomic.rkey = u->wr.atomic.rkey;
+				next->wr.atomic.remote_addr =
+					user_wr->wr.atomic.remote_addr;
+				next->wr.atomic.compare_add =
+					user_wr->wr.atomic.compare_add;
+				next->wr.atomic.swap = user_wr->wr.atomic.swap;
+				next->wr.atomic.rkey = user_wr->wr.atomic.rkey;
 				break;
 			default:
 				break;
 			}
 		}
 
-		if (t->num_sge) {
-			t->sg_list = s;
-			s += t->num_sge;
+		if (next->num_sge) {
+			next->sg_list = (void *) next +
+				ALIGN(sizeof *next, sizeof (struct ib_sge));
+			if (copy_from_user(next->sg_list,
+					   buf + sizeof cmd +
+					   cmd.wr_count * cmd.wqe_size +
+					   sg_ind * sizeof (struct ib_sge),
+					   next->num_sge * sizeof (struct ib_sge))) {
+				ret = -EFAULT;
+				goto out;
+			}
+			sg_ind += next->num_sge;
 		} else
-			t->sg_list = NULL;
+			next->sg_list = NULL;
 	}
 
+	resp.bad_wr = 0;
 	ret = qp->device->post_send(qp, wr, &bad_wr);
-	resp.bad_wr = ret ? (bad_wr - wr) + 1 : 0;
-
-kwrout:
-	kfree(wr);
+	if (ret)
+		for (next = wr; next; next = next->next) {
+			++resp.bad_wr;
+			if (next == bad_wr)
+				break;
+		}
 
-wrout:
-	kfree(m_wr);
+	if (copy_to_user((void __user *) (unsigned long) cmd.response,
+			 &resp, sizeof resp))
+		ret = -EFAULT;
 
 out:
 	up(&ib_uverbs_idr_mutex);
 
-	if (copy_to_user((void __user *) (unsigned long) cmd.response,
-			 &resp, sizeof resp))
-		ret = -EFAULT;
+	while (wr) {
+		next = wr->next;
+		kfree(wr);
+		wr = next;
+	}
+
+	kfree(user_wr);
 
 	return ret ? ret : in_len;
 }
 
+static struct ib_recv_wr *ib_uverbs_unmarshall_recv(const char __user *buf,
+						    int in_len,
+						    u32 wr_count,
+						    u32 sge_count,
+						    u32 wqe_size)
+{
+	struct ib_uverbs_recv_wr *user_wr;
+	struct ib_recv_wr        *wr = NULL, *last, *next;
+	int                       sg_ind;
+	int                       i;
+	int                       ret;
+
+	if (in_len < wqe_size * wr_count +
+	    sge_count * sizeof (struct ib_uverbs_sge))
+		return ERR_PTR(-EINVAL);
+
+	if (wqe_size < sizeof (struct ib_uverbs_recv_wr))
+		return ERR_PTR(-EINVAL);
+
+	user_wr = kmalloc(wqe_size, GFP_KERNEL);
+	if (!user_wr)
+		return ERR_PTR(-ENOMEM);
+
+	sg_ind = 0;
+	last = NULL;
+	for (i = 0; i < wr_count; ++i) {
+		if (copy_from_user(user_wr, buf + i * wqe_size,
+				   wqe_size)) {
+			ret = -EFAULT;
+			goto err;
+		}
+
+		if (user_wr->num_sge + sg_ind > sge_count) {
+			ret = -EINVAL;
+			goto err;
+		}
+
+		next = kmalloc(ALIGN(sizeof *next, sizeof (struct ib_sge)) +
+			       user_wr->num_sge * sizeof (struct ib_sge),
+			       GFP_KERNEL);
+		if (!next) {
+			ret = -ENOMEM;
+			goto err;
+		}
+
+		if (!last)
+			wr = next;
+		else
+			last->next = next;
+		last = next;
+
+		next->next       = NULL;
+		next->wr_id      = user_wr->wr_id;
+		next->num_sge    = user_wr->num_sge;
+
+		if (next->num_sge) {
+			next->sg_list = (void *) next +
+				ALIGN(sizeof *next, sizeof (struct ib_sge));
+			if (copy_from_user(next->sg_list,
+					   buf + wr_count * wqe_size +
+					   sg_ind * sizeof (struct ib_sge),
+					   next->num_sge * sizeof (struct ib_sge))) {
+				ret = -EFAULT;
+				goto err;
+			}
+			sg_ind += next->num_sge;
+		} else
+			next->sg_list = NULL;
+	}
+
+	kfree(user_wr);
+	return wr;
+
+err:
+	kfree(user_wr);
+
+	while (wr) {
+		next = wr->next;
+		kfree(wr);
+		wr = next;
+	}
+
+	return ERR_PTR(ret);
+}
+
 ssize_t ib_uverbs_post_recv(struct ib_uverbs_file *file,
                             const char __user *buf, int in_len,
                             int out_len)
 {
 	struct ib_uverbs_post_recv      cmd;
 	struct ib_uverbs_post_recv_resp resp;
-	struct ib_uverbs_recv_wr       *m_wr, *j;
-	struct ib_recv_wr              *wr, *i, *bad_wr;
-	struct ib_sge                  *s;
+	struct ib_recv_wr              *wr, *next, *bad_wr;
 	struct ib_qp                   *qp;
-	int                             size;
-	int                             count;
 	ssize_t                         ret = -EINVAL;
 
 	if (copy_from_user(&cmd, buf, sizeof cmd))
 		return -EFAULT;
 
+	wr = ib_uverbs_unmarshall_recv(buf + sizeof cmd,
+				       in_len - sizeof cmd, cmd.wr_count,
+				       cmd.sge_count, cmd.wqe_size);
+	if (IS_ERR(wr))
+		return PTR_ERR(wr);
+
 	down(&ib_uverbs_idr_mutex);
 
 	qp = idr_find(&ib_uverbs_qp_idr, cmd.qp_handle);
 	if (!qp || qp->uobject->context != file->ucontext)
 		goto out;
 
-	size = (cmd.wr_count * sizeof *m_wr) + (cmd.sge_count * sizeof *s);
-	m_wr = kmalloc(size, GFP_KERNEL);
-	if (!m_wr) {
-		ret = -ENOMEM;
-		goto out;
-	}
-
-	if (copy_from_user(m_wr, (void __user *)cmd.wr, size)) {
-		ret = -EFAULT;
-		goto wrout;
-	}
-
-	wr = kmalloc(cmd.wr_count * sizeof *wr, GFP_KERNEL);
-	if (!wr) {
-		ret = -ENOMEM;
-		goto wrout;
-	}
-
-	s = (struct ib_sge *)(m_wr + cmd.wr_count);
-
-	i = wr;
-	j = m_wr;
-	count = 0;
-	while (count++ < cmd.wr_count) {
-		struct ib_recv_wr *t = i++;
-		struct ib_uverbs_recv_wr *u = j++;
-
-		if (count < cmd.wr_count)
-			t->next = i;
-		else
-			t->next = NULL;
-
-		t->wr_id = u->wr_id;
-		t->num_sge = u->num_sge;
-
-		if (t->num_sge) {
-			t->sg_list = s;
-			s += t->num_sge;
-		} else
-			t->sg_list = NULL;
-	}
-
+	resp.bad_wr = 0;
 	ret = qp->device->post_recv(qp, wr, &bad_wr);
-	resp.bad_wr = ret ? (bad_wr - wr) + 1 : 0;
+	if (ret)
+		for (next = wr; next; next = next->next) {
+			++resp.bad_wr;
+			if (next == bad_wr)
+				break;
+		}
+
+	up(&ib_uverbs_idr_mutex);
 
 	if (copy_to_user((void __user *) (unsigned long) cmd.response,
 			 &resp, sizeof resp))
 		ret = -EFAULT;
 
-	kfree(wr);
-
-wrout:
-	kfree(m_wr);
-
 out:
-	up(&ib_uverbs_idr_mutex);
+	while (wr) {
+		next = wr->next;
+		kfree(wr);
+		wr = next;
+	}
 
 	return ret ? ret : in_len;
 }
@@ -1294,80 +1372,48 @@ ssize_t ib_uverbs_post_srq_recv(struct i
                             const char __user *buf, int in_len,
                             int out_len)
 {
-	struct ib_uverbs_post_srq_recv cmd;
+	struct ib_uverbs_post_srq_recv      cmd;
 	struct ib_uverbs_post_srq_recv_resp resp;
-	struct ib_uverbs_recv_wr       *m_wr, *j;
-	struct ib_recv_wr              *wr, *i, *bad_wr;
-	struct ib_sge                  *s;
-	struct ib_srq                  *srq;
-	int                             size;
-	int                             count;
-	ssize_t                         ret = -EFAULT;
+	struct ib_recv_wr                  *wr, *next, *bad_wr;
+	struct ib_srq                      *srq;
+	ssize_t                             ret = -EINVAL;
 
 	if (copy_from_user(&cmd, buf, sizeof cmd))
 		return -EFAULT;
 
+	wr = ib_uverbs_unmarshall_recv(buf + sizeof cmd,
+				       in_len - sizeof cmd, cmd.wr_count,
+				       cmd.sge_count, cmd.wqe_size);
+	if (IS_ERR(wr))
+		return PTR_ERR(wr);
+
 	down(&ib_uverbs_idr_mutex);
 
 	srq = idr_find(&ib_uverbs_srq_idr, cmd.srq_handle);
 	if (!srq || srq->uobject->context != file->ucontext)
 		goto out;
 
-	size = (cmd.wr_count * sizeof *m_wr) + (cmd.sge_count * sizeof *s);
-	m_wr = kmalloc(size, GFP_KERNEL);
-	if (!m_wr) {
-		ret = -ENOMEM;
-		goto out;
-	}
-
-	if (copy_from_user(m_wr, (void __user *)cmd.wr, size)) {
-		goto wrout;
-	}
-
-	wr = kmalloc(cmd.wr_count * sizeof *wr, GFP_KERNEL);
-	if (!wr) {
-		ret = -ENOMEM;
-		goto wrout;
-	}
-
-	s = (struct ib_sge *)(m_wr + cmd.wr_count);
-
-	i = wr;
-	j = m_wr;
-	count = 0;
-	while (count++ < cmd.wr_count) {
-		struct ib_recv_wr *t = i++;
-		struct ib_uverbs_recv_wr *u = j++;
-
-		if (count < cmd.wr_count)
-			t->next = i;
-		else
-			t->next = NULL;
-
-		t->wr_id = u->wr_id;
-		t->num_sge = u->num_sge;
-
-		if (t->num_sge) {
-			t->sg_list = s;
-			s += t->num_sge;
-		} else
-			t->sg_list = NULL;
-	}
-
+	resp.bad_wr = 0;
 	ret = srq->device->post_srq_recv(srq, wr, &bad_wr);
-	resp.bad_wr = ret ? (bad_wr - wr) + 1 : 0;
+	if (ret)
+		for (next = wr; next; next = next->next) {
+			++resp.bad_wr;
+			if (next == bad_wr)
+				break;
+		}
+
+	up(&ib_uverbs_idr_mutex);
 
 	if (copy_to_user((void __user *) (unsigned long) cmd.response,
 			 &resp, sizeof resp))
 		ret = -EFAULT;
 
-	kfree(wr);
-
-wrout:
-	kfree(m_wr);
-
 out:
-	up(&ib_uverbs_idr_mutex);
+	while (wr) {
+		next = wr->next;
+		kfree(wr);
+		wr = next;
+	}
 
 	return ret ? ret : in_len;
 }
@@ -1405,19 +1451,16 @@ ssize_t ib_uverbs_create_ah(struct ib_uv
 	uobj->user_handle = cmd.user_handle;
 	uobj->context     = file->ucontext;
 
-	attr.dlid = cmd.attr.dlid;
-	attr.sl = cmd.attr.sl;
-	attr.src_path_bits = cmd.attr.src_path_bits;
-	attr.static_rate = cmd.attr.static_rate;
-	attr.port_num = cmd.attr.port_num;
-	attr.grh.flow_label = cmd.attr.grh.flow_label;
-	attr.grh.sgid_index = cmd.attr.grh.sgid_index;
-	attr.grh.hop_limit = cmd.attr.grh.hop_limit;
+	attr.dlid 	       = cmd.attr.dlid;
+	attr.sl 	       = cmd.attr.sl;
+	attr.src_path_bits     = cmd.attr.src_path_bits;
+	attr.static_rate       = cmd.attr.static_rate;
+	attr.port_num 	       = cmd.attr.port_num;
+	attr.grh.flow_label    = cmd.attr.grh.flow_label;
+	attr.grh.sgid_index    = cmd.attr.grh.sgid_index;
+	attr.grh.hop_limit     = cmd.attr.grh.hop_limit;
 	attr.grh.traffic_class = cmd.attr.grh.traffic_class;
-	attr.grh.dgid.global.subnet_prefix =
-		cmd.attr.grh.dgid.global.subnet_prefix;
-	attr.grh.dgid.global.interface_id =
-		cmd.attr.grh.dgid.global.interface_id;
+	memcpy(attr.grh.dgid.raw, cmd.attr.grh.dgid, 16);
 
 	ah = ib_create_ah(pd, &attr);
 	if (IS_ERR(ah)) {
Index: libibverbs/include/infiniband/kern-abi.h
===================================================================
--- libibverbs/include/infiniband/kern-abi.h	(revision 3739)
+++ libibverbs/include/infiniband/kern-abi.h	(working copy)
@@ -94,8 +94,11 @@ enum {
  * Make sure that all structs defined in this file remain laid out so
  * that they pack the same way on 32-bit and 64-bit architectures (to
  * avoid incompatibility between 32-bit userspace and 64-bit kernels).
- * In particular do not use pointer types -- pass pointers in __u64
- * instead.
+ * Specifically:
+ *  - Do not use pointer types -- pass pointers in __u64 instead.
+ *  - Make sure that any structure larger than 4 bytes is padded to a
+ *    multiple of 8 bytes.  Otherwise the structure size will be
+ *    different between 32-bit and 64-bit architectures.
  */
 
 struct ibv_kern_async_event {
@@ -314,7 +317,7 @@ struct ibv_kern_wc {
         __u8   sl;
         __u8   dlid_path_bits;
 	__u8   port_num;
-	__u8   reserved; /* Align struct to 8 bytes */
+	__u8   reserved;
 };
 
 struct ibv_poll_cq {
@@ -328,7 +331,7 @@ struct ibv_poll_cq {
 
 struct ibv_poll_cq_resp {
 	__u32 count;
-	__u32 reserved; /* Align struct to 8 bytes */
+	__u32 reserved;
 	struct ibv_kern_wc wc[];
 };
 
@@ -452,20 +455,20 @@ struct ibv_kern_send_wr {
 		struct {
 			__u64 remote_addr;
 			__u32 rkey;
-			__u32 reserved; /* Align struct to 8 bytes */
+			__u32 reserved;
 		} rdma;
 		struct {
 			__u64 remote_addr;
 			__u64 compare_add;
 			__u64 swap;
 			__u32 rkey;
-			__u32 reserved; /* Align struct to 8 bytes */
+			__u32 reserved;
 		} atomic;
 		struct {
 			__u32 ah;
 			__u32 remote_qpn;
 			__u32 remote_qkey;
-			__u32 reserved; /* Align struct to 8 bytes */
+			__u32 reserved;
 		} ud;
 	} wr;
 };
@@ -478,8 +481,7 @@ struct ibv_post_send {
 	__u32 qp_handle;
 	__u32 wr_count;
 	__u32 sge_count;
-	__u32 reserved; /* Align struct to 8 bytes */
-	__u64 wr;
+	__u32 wqe_size;
 };
 
 struct ibv_post_send_resp {
@@ -489,7 +491,7 @@ struct ibv_post_send_resp {
 struct ibv_kern_recv_wr {
 	__u64 wr_id;
 	__u32 num_sge;
-	__u32 reserved; /* Align struct to 8 bytes */
+	__u32 reserved;
 };
 
 struct ibv_post_recv {
@@ -500,8 +502,7 @@ struct ibv_post_recv {
 	__u32 qp_handle;
 	__u32 wr_count;
 	__u32 sge_count;
-	__u32 reserved; /* Align struct to 8 bytes */
-	__u64 wr;
+	__u32 wqe_size;
 };
 
 struct ibv_post_recv_resp {
@@ -516,29 +517,20 @@ struct ibv_post_srq_recv {
 	__u32 srq_handle;
 	__u32 wr_count;
 	__u32 sge_count;
-	__u32 reserved; /* Align struct to 8 bytes */
-	__u64 wr;
+	__u32 wqe_size;
 };
 
 struct ibv_post_srq_recv_resp {
 	__u32 bad_wr;
 };
 
-union ibv_kern_gid {
-	__u8 raw[16];
-	struct {
-		__u64 subnet_prefix;
-		__u64 interface_id;
-	} global;
-};
-
 struct ibv_kern_global_route {
-	union ibv_kern_gid dgid;
+	__u8  dgid[16];
 	__u32 flow_label;
 	__u8  sgid_index;
 	__u8  hop_limit;
 	__u8  traffic_class;
-	__u8  reserved; /* Align struct to 8 bytes */
+	__u8  reserved;
 };
 
 struct ibv_kern_ah_attr {
@@ -549,7 +541,7 @@ struct ibv_kern_ah_attr {
 	__u8  static_rate;
 	__u8  is_global;
 	__u8  port_num;
-	__u8  reserved; /* Align struct to 8 bytes */
+	__u8  reserved;
 };
 
 struct ibv_create_ah {
@@ -559,7 +551,7 @@ struct ibv_create_ah {
 	__u64 response;
 	__u64 user_handle;
 	__u32 pd_handle;
-	__u32 reserved; /* Align struct to 8 bytes */
+	__u32 reserved;
 	struct ibv_kern_ah_attr attr;
 };
 
Index: libibverbs/src/cmd.c
===================================================================
--- libibverbs/src/cmd.c	(revision 3739)
+++ libibverbs/src/cmd.c	(working copy)
@@ -327,19 +327,19 @@ int ibv_cmd_poll_cq(struct ibv_cq *ibcq,
 		goto out;
 	}
 
-	for(i = 0; i < ne; i++) {
-		wc[i].wr_id = resp->wc[i].wr_id;
-		wc[i].status = resp->wc[i].status;
-		wc[i].opcode = resp->wc[i].opcode;
-		wc[i].vendor_err = resp->wc[i].vendor_err;
-		wc[i].byte_len = resp->wc[i].byte_len;
-		wc[i].imm_data = resp->wc[i].imm_data;
-		wc[i].qp_num = resp->wc[i].qp_num;
-		wc[i].src_qp = resp->wc[i].src_qp;
-		wc[i].wc_flags = resp->wc[i].wc_flags;
-		wc[i].pkey_index = resp->wc[i].pkey_index;
-		wc[i].slid = resp->wc[i].slid;
-		wc[i].sl = resp->wc[i].sl;
+	for (i = 0; i < ne; i++) {
+		wc[i].wr_id 	     = resp->wc[i].wr_id;
+		wc[i].status 	     = resp->wc[i].status;
+		wc[i].opcode 	     = resp->wc[i].opcode;
+		wc[i].vendor_err     = resp->wc[i].vendor_err;
+		wc[i].byte_len 	     = resp->wc[i].byte_len;
+		wc[i].imm_data 	     = resp->wc[i].imm_data;
+		wc[i].qp_num 	     = resp->wc[i].qp_num;
+		wc[i].src_qp 	     = resp->wc[i].src_qp;
+		wc[i].wc_flags 	     = resp->wc[i].wc_flags;
+		wc[i].pkey_index     = resp->wc[i].pkey_index;
+		wc[i].slid 	     = resp->wc[i].slid;
+		wc[i].sl 	     = resp->wc[i].sl;
 		wc[i].dlid_path_bits = resp->wc[i].dlid_path_bits;
 	}
 
@@ -582,42 +582,43 @@ static int ibv_cmd_destroy_qp_v1(struct 
 int ibv_cmd_post_send(struct ibv_qp *ibqp, struct ibv_send_wr *wr,
 		      struct ibv_send_wr **bad_wr)
 {
-	struct ibv_post_send      cmd;
+	struct ibv_post_send     *cmd;
 	struct ibv_post_send_resp resp;
 	struct ibv_send_wr       *i;
 	struct ibv_kern_send_wr  *n, *tmp;
 	struct ibv_sge           *s;
 	unsigned                  wr_count = 0;
 	unsigned                  sge_count = 0;
+	int                       size;
 	int                       ret;
 
-	for(i = wr; i; i = i->next) {
+	for (i = wr; i; i = i->next) {
 		wr_count++;
 		sge_count += i->num_sge;
 	}
 
-	n = malloc((wr_count * sizeof *n) + (sge_count * sizeof *s));
-	if(!n)
-		return errno;
+	size = sizeof *cmd + wr_count * sizeof *n + sge_count * sizeof *s;
+	cmd  = alloca(size);
 
-	IBV_INIT_CMD_RESP(&cmd, sizeof cmd, POST_SEND, &resp, sizeof resp);
-	cmd.qp_handle = ibqp->handle;
-	cmd.wr_count  = wr_count;
-	cmd.sge_count = sge_count;
-	cmd.wr        = (uintptr_t)n;
+	IBV_INIT_CMD_RESP(cmd, size, POST_SEND, &resp, sizeof resp);
+	cmd->qp_handle = ibqp->handle;
+	cmd->wr_count  = wr_count;
+	cmd->sge_count = sge_count;
+	cmd->wqe_size  = sizeof *n;
 
-	s = (struct ibv_sge *)(n + wr_count);
+	n = (struct ibv_kern_send_wr *) ((void *) cmd + sizeof *cmd);
+	s = (struct ibv_sge *) (n + wr_count);
 
 	tmp = n;
-	for(i = wr; i; i = i->next) {
-		tmp->wr_id = i->wr_id;
-		tmp->num_sge = i->num_sge;
-		tmp->opcode = i->opcode;
+	for (i = wr; i; i = i->next) {
+		tmp->wr_id 	= i->wr_id;
+		tmp->num_sge 	= i->num_sge;
+		tmp->opcode 	= i->opcode;
 		tmp->send_flags = i->send_flags;
-		tmp->imm_data = i->imm_data;
-		if(ibqp->qp_type == IBV_QPT_UD) {
-			tmp->wr.ud.ah = i->wr.ud.ah->handle;
-			tmp->wr.ud.remote_qpn = i->wr.ud.remote_qpn;
+		tmp->imm_data 	= i->imm_data;
+		if (ibqp->qp_type == IBV_QPT_UD) {
+			tmp->wr.ud.ah 	       = i->wr.ud.ah->handle;
+			tmp->wr.ud.remote_qpn  = i->wr.ud.remote_qpn;
 			tmp->wr.ud.remote_qkey = i->wr.ud.remote_qkey;
 		} else {
 			switch(i->opcode) {
@@ -642,7 +643,7 @@ int ibv_cmd_post_send(struct ibv_qp *ibq
 			}
 		}
 
-		if(tmp->num_sge) {
+		if (tmp->num_sge) {
 			memcpy(s, i->sg_list, tmp->num_sge * sizeof *s);
 			s += tmp->num_sge;
 		}
@@ -659,54 +660,51 @@ int ibv_cmd_post_send(struct ibv_qp *ibq
 
 out:
 	wr_count = resp.bad_wr;
-	if(wr_count) {
+	if (wr_count) {
 		i = wr;
-		while(--wr_count)
+		while (--wr_count)
 			i = i->next;
 		*bad_wr = i;
 	}
 
-	free(n);
 	return ret;
 }
 
 int ibv_cmd_post_recv(struct ibv_qp *ibqp, struct ibv_recv_wr *wr,
 		      struct ibv_recv_wr **bad_wr)
 {
-	struct ibv_post_recv      cmd;
+	struct ibv_post_recv     *cmd;
 	struct ibv_post_recv_resp resp;
 	struct ibv_recv_wr       *i;
 	struct ibv_kern_recv_wr  *n, *tmp;
 	struct ibv_sge           *s;
 	unsigned                  wr_count = 0;
 	unsigned                  sge_count = 0;
+	int                       size;
 	int                       ret;
 
-	for(i = wr; i; i = i->next) {
+	for (i = wr; i; i = i->next) {
 		wr_count++;
 		sge_count += i->num_sge;
 	}
 
-	n = malloc((wr_count * sizeof *n) + (sge_count * sizeof *s));
-	if(!n) {
-		return errno;
-	}
+	size = sizeof *cmd + wr_count * sizeof *n + sge_count * sizeof *s;
+	cmd  = alloca(size);
 
-	resp.bad_wr = 0;
-	IBV_INIT_CMD_RESP(&cmd, sizeof cmd, POST_RECV, &resp, sizeof resp);
-	cmd.qp_handle = ibqp->handle;
-	cmd.wr_count  = wr_count;
-	cmd.sge_count = sge_count;
-	cmd.wr        = (uintptr_t)n;
+	IBV_INIT_CMD_RESP(cmd, size, POST_RECV, &resp, sizeof resp);
+	cmd->qp_handle = ibqp->handle;
+	cmd->wr_count  = wr_count;
+	cmd->sge_count = sge_count;
 
-	s = (struct ibv_sge *)(n + wr_count);
+	n = (struct ibv_kern_recv_wr *) ((void *) cmd + sizeof *cmd);
+	s = (struct ibv_sge *) (n + wr_count);
 
 	tmp = n;
-	for(i = wr; i; i = i->next) {
-		tmp->wr_id = i->wr_id;
+	for (i = wr; i; i = i->next) {
+		tmp->wr_id   = i->wr_id;
 		tmp->num_sge = i->num_sge;
 
-		if(tmp->num_sge) {
+		if (tmp->num_sge) {
 			memcpy(s, i->sg_list, tmp->num_sge * sizeof *s);
 			s += tmp->num_sge;
 		}
@@ -723,27 +721,27 @@ int ibv_cmd_post_recv(struct ibv_qp *ibq
 
 out:
 	wr_count = resp.bad_wr;
-	if(wr_count) {
+	if (wr_count) {
 		i = wr;
-		while(--wr_count)
+		while (--wr_count)
 			i = i->next;
 		*bad_wr = i;
 	}
 
-	free(n);
 	return ret;
 }
 
 int ibv_cmd_post_srq_recv(struct ibv_srq *srq, struct ibv_recv_wr *wr,
 		      struct ibv_recv_wr **bad_wr)
 {
-	struct ibv_post_srq_recv cmd;
+	struct ibv_post_srq_recv *cmd;
 	struct ibv_post_srq_recv_resp resp;
 	struct ibv_recv_wr       *i;
 	struct ibv_kern_recv_wr  *n, *tmp;
 	struct ibv_sge           *s;
 	unsigned                  wr_count = 0;
 	unsigned                  sge_count = 0;
+	int                       size;
 	int                       ret;
 
 	for (i = wr; i; i = i->next) {
@@ -751,19 +749,16 @@ int ibv_cmd_post_srq_recv(struct ibv_srq
 		sge_count += i->num_sge;
 	}
 
-	n = malloc((wr_count * sizeof *n) + (sge_count * sizeof *s));
-	if (!n) {
-		return errno;
-	}
+	size = sizeof *cmd + wr_count * sizeof *n + sge_count * sizeof *s;
+	cmd  = alloca(size);
 
-	resp.bad_wr = 0;
-	IBV_INIT_CMD_RESP(&cmd, sizeof cmd, POST_SRQ_RECV, &resp, sizeof resp);
-	cmd.srq_handle = srq->handle;
-	cmd.wr_count  = wr_count;
-	cmd.sge_count = sge_count;
-	cmd.wr        = (uintptr_t)n;
+	IBV_INIT_CMD_RESP(cmd, size, POST_SRQ_RECV, &resp, sizeof resp);
+	cmd->srq_handle = srq->handle;
+	cmd->wr_count  = wr_count;
+	cmd->sge_count = sge_count;
 
-	s = (struct ibv_sge *)(n + wr_count);
+	n = (struct ibv_kern_recv_wr *) ((void *) cmd + sizeof *cmd);
+	s = (struct ibv_sge *) (n + wr_count);
 
 	tmp = n;
 	for (i = wr; i; i = i->next) {
@@ -794,7 +789,6 @@ out:
 		*bad_wr = i;
 	}
 
-	free(n);
 	return ret;
 }
 
@@ -805,7 +799,7 @@ int ibv_cmd_create_ah(struct ibv_pd *pd,
 	struct ibv_create_ah_resp resp;
 
 	IBV_INIT_CMD_RESP(&cmd, sizeof cmd, CREATE_AH, &resp, sizeof resp);
-	cmd.user_handle            = (uintptr_t)ah;
+	cmd.user_handle            = (uintptr_t) ah;
 	cmd.pd_handle              = pd->handle;
 	cmd.attr.dlid              = attr->dlid;
 	cmd.attr.sl                = attr->sl;
@@ -817,8 +811,7 @@ int ibv_cmd_create_ah(struct ibv_pd *pd,
 	cmd.attr.grh.sgid_index    = attr->grh.sgid_index;
 	cmd.attr.grh.hop_limit     = attr->grh.hop_limit;
 	cmd.attr.grh.traffic_class = attr->grh.traffic_class;
-	cmd.attr.grh.dgid.global.subnet_prefix = attr->grh.dgid.global.subnet_prefix;
-	cmd.attr.grh.dgid.global.interface_id = attr->grh.dgid.global.interface_id;
+	memcpy(cmd.attr.grh.dgid, attr->grh.dgid.raw, 16);
 
 	if (write(pd->context->cmd_fd, &cmd, sizeof cmd) != sizeof cmd)
 		return errno;
_______________________________________________
openib-general mailing list
openib-general@openib.org
http://openib.org/mailman/listinfo/openib-general

To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general

Reply via email to