Here is an updated kernel patch and a matching libibverbs patch (both against your branch). Still compile tested only. I'll get my Pathscale system set up to do some testing of this code tomorrow morning and see if this stuff actually works.
- R.
Index: infiniband/include/rdma/ib_user_verbs.h =================================================================== --- infiniband/include/rdma/ib_user_verbs.h (revision 3742) +++ infiniband/include/rdma/ib_user_verbs.h (working copy) @@ -89,8 +89,11 @@ enum { * Make sure that all structs defined in this file remain laid out so * that they pack the same way on 32-bit and 64-bit architectures (to * avoid incompatibility between 32-bit userspace and 64-bit kernels). - * In particular do not use pointer types -- pass pointers in __u64 - * instead. + * Specifically: + * - Do not use pointer types -- pass pointers in __u64 instead. + * - Make sure that any structure larger than 4 bytes is padded to a + * multiple of 8 bytes. Otherwise the structure size will be + * different between 32-bit and 64-bit architectures. */ struct ib_uverbs_async_event_desc { @@ -284,12 +287,12 @@ struct ib_uverbs_wc { __u8 sl; __u8 dlid_path_bits; __u8 port_num; - __u8 reserved; /* Align struct to 8 bytes */ + __u8 reserved; }; struct ib_uverbs_poll_cq_resp { __u32 count; - __u32 reserved; /* Align struct to 8 bytes */ + __u32 reserved; struct ib_uverbs_wc wc[]; }; @@ -417,20 +420,20 @@ struct ib_uverbs_send_wr { struct { __u64 remote_addr; __u32 rkey; - __u32 reserved; /* Align struct to 8 bytes */ + __u32 reserved; } rdma; struct { __u64 remote_addr; __u64 compare_add; __u64 swap; __u32 rkey; - __u32 reserved; /* Align struct to 8 bytes */ + __u32 reserved; } atomic; struct { __u32 ah; __u32 remote_qpn; __u32 remote_qkey; - __u32 reserved; /* Align struct to 8 bytes */ + __u32 reserved; } ud; } wr; }; @@ -440,8 +443,7 @@ struct ib_uverbs_post_send { __u32 qp_handle; __u32 wr_count; __u32 sge_count; - __u32 reserved; /* Align struct to 8 bytes */ - __u64 wr; + __u32 wqe_size; }; struct ib_uverbs_post_send_resp { @@ -451,7 +453,7 @@ struct ib_uverbs_post_send_resp { struct ib_uverbs_recv_wr { __u64 wr_id; __u32 num_sge; - __u32 reserved; /* Align struct to 8 bytes */ + __u32 reserved; }; struct ib_uverbs_post_recv { @@ -459,8 +461,7 @@ struct ib_uverbs_post_recv { __u32 qp_handle; __u32 wr_count; __u32 sge_count; - __u32 reserved; /* Align struct to 8 bytes */ - __u64 wr; + __u32 wqe_size; }; struct ib_uverbs_post_recv_resp { @@ -472,47 +473,38 @@ struct ib_uverbs_post_srq_recv { __u32 srq_handle; __u32 wr_count; __u32 sge_count; - __u32 reserved; /* Align struct to 8 bytes */ - __u64 wr; + __u32 wqe_size; }; struct ib_uverbs_post_srq_recv_resp { __u32 bad_wr; }; -union ib_uverbs_gid { - __u8 raw[16]; - struct { - __u64 subnet_prefix; - __u64 interface_id; - } global; -}; - -struct ibv_m_global_route { - union ib_uverbs_gid dgid; +struct ib_uverbs_global_route { + __u8 dgid[16]; __u32 flow_label; __u8 sgid_index; __u8 hop_limit; __u8 traffic_class; - __u8 reserved; /* Align struct to 8 bytes */ + __u8 reserved; }; struct ib_uverbs_ah_attr { - struct ibv_m_global_route grh; + struct ib_uverbs_global_route grh; __u16 dlid; __u8 sl; __u8 src_path_bits; __u8 static_rate; __u8 is_global; __u8 port_num; - __u8 reserved; /* Align struct to 8 bytes */ + __u8 reserved; }; struct ib_uverbs_create_ah { __u64 response; __u64 user_handle; __u32 pd_handle; - __u32 reserved; /* Align struct to 8 bytes */ + __u32 reserved; struct ib_uverbs_ah_attr attr; }; Index: infiniband/core/uverbs_cmd.c =================================================================== --- infiniband/core/uverbs_cmd.c (revision 3742) +++ infiniband/core/uverbs_cmd.c (working copy) @@ -699,24 +699,25 @@ ssize_t ib_uverbs_poll_cq(struct ib_uver } resp->count = ib_poll_cq(cq, cmd.ne, wc); - for(i = 0; i < cmd.ne; i++) { - resp->wc[i].wr_id = wc[i].wr_id; - resp->wc[i].status = wc[i].status; - resp->wc[i].opcode = wc[i].opcode; - resp->wc[i].vendor_err = wc[i].vendor_err; - resp->wc[i].byte_len = wc[i].byte_len; - resp->wc[i].imm_data = wc[i].imm_data; - resp->wc[i].qp_num = wc[i].qp_num; - resp->wc[i].src_qp = wc[i].src_qp; - resp->wc[i].wc_flags = wc[i].wc_flags; - resp->wc[i].pkey_index = wc[i].pkey_index; - resp->wc[i].slid = wc[i].slid; - resp->wc[i].sl = wc[i].sl; + for (i = 0; i < resp->count; i++) { + resp->wc[i].wr_id = wc[i].wr_id; + resp->wc[i].status = wc[i].status; + resp->wc[i].opcode = wc[i].opcode; + resp->wc[i].vendor_err = wc[i].vendor_err; + resp->wc[i].byte_len = wc[i].byte_len; + resp->wc[i].imm_data = wc[i].imm_data; + resp->wc[i].qp_num = wc[i].qp_num; + resp->wc[i].src_qp = wc[i].src_qp; + resp->wc[i].wc_flags = wc[i].wc_flags; + resp->wc[i].pkey_index = wc[i].pkey_index; + resp->wc[i].slid = wc[i].slid; + resp->wc[i].sl = wc[i].sl; resp->wc[i].dlid_path_bits = wc[i].dlid_path_bits; - resp->wc[i].port_num = wc[i].port_num; + resp->wc[i].port_num = wc[i].port_num; } - if (copy_to_user((void __user *)cmd.response, resp, rsize)) + if (copy_to_user((void __user *) (unsigned long) cmd.response, + &resp, sizeof resp)) ret = -EFAULT; out: @@ -741,15 +742,12 @@ ssize_t ib_uverbs_req_notify_cq(struct i down(&ib_uverbs_idr_mutex); cq = idr_find(&ib_uverbs_cq_idr, cmd.cq_handle); - if (!cq || cq->uobject->context != file->ucontext) - goto out; - - ib_req_notify_cq(cq, cmd.solicited ? IB_CQ_SOLICITED : IB_CQ_NEXT_COMP); - - ret = in_len; - -out: + if (cq && cq->uobject->context == file->ucontext) { + ib_req_notify_cq(cq, cmd.solicited ? IB_CQ_SOLICITED : IB_CQ_NEXT_COMP); + ret = in_len; + } up(&ib_uverbs_idr_mutex); + return ret; } @@ -1097,195 +1095,275 @@ ssize_t ib_uverbs_post_send(struct ib_uv { struct ib_uverbs_post_send cmd; struct ib_uverbs_post_send_resp resp; - struct ib_uverbs_send_wr *m_wr, *j; - struct ib_send_wr *wr, *i, *bad_wr; - struct ib_sge *s; + struct ib_uverbs_send_wr *user_wr; + struct ib_send_wr *wr = NULL, *last, *next, *bad_wr; struct ib_qp *qp; - int size; - int count; + int i, sg_ind; ssize_t ret = -EINVAL; - resp.bad_wr = 0; if (copy_from_user(&cmd, buf, sizeof cmd)) return -EFAULT; + if (in_len < sizeof cmd + cmd.wqe_size * cmd.wr_count + + cmd.sge_count * sizeof (struct ib_uverbs_sge)) + return -EINVAL; + + if (cmd.wqe_size < sizeof (struct ib_uverbs_send_wr)) + return -EINVAL; + + user_wr = kmalloc(cmd.wqe_size, GFP_KERNEL); + if (!user_wr) + return -ENOMEM; + down(&ib_uverbs_idr_mutex); qp = idr_find(&ib_uverbs_qp_idr, cmd.qp_handle); if (!qp || qp->uobject->context != file->ucontext) goto out; - size = (cmd.wr_count * sizeof *wr) + (cmd.sge_count * sizeof *s); - m_wr = kmalloc(size, GFP_KERNEL); - if (!m_wr) { - ret = -ENOMEM; - goto out; - } - - if (copy_from_user(m_wr, (void __user *)cmd.wr, size)) { - ret = -EFAULT; - goto wrout; - } - - wr = kmalloc(cmd.wr_count * sizeof *wr, GFP_KERNEL); - if (!wr) { - ret = -ENOMEM; - goto wrout; - } + sg_ind = 0; + last = NULL; + for (i = 0; i < cmd.wr_count; ++i) { + if (copy_from_user(user_wr, + buf + sizeof cmd + i * cmd.wqe_size, + cmd.wqe_size)) { + ret = -EFAULT; + goto out; + } - s = (struct ib_sge *)(m_wr + cmd.wr_count); + if (user_wr->num_sge + sg_ind > cmd.sge_count) { + ret = -EINVAL; + goto out; + } - i = wr; - j = m_wr; - count = 0; - while (count++ < cmd.wr_count) { - struct ib_send_wr *t = i++; - struct ib_uverbs_send_wr *u = j++; + next = kmalloc(ALIGN(sizeof *next, sizeof (struct ib_sge)) + + user_wr->num_sge * sizeof (struct ib_sge), + GFP_KERNEL); + if (!next) { + ret = -ENOMEM; + goto out; + } - if (count < cmd.wr_count) - t->next = i; + if (!last) + wr = next; else - t->next = NULL; + last->next = next; + last = next; - t->wr_id = u->wr_id; - t->num_sge = u->num_sge; - t->opcode = u->opcode; - t->send_flags = u->send_flags; - t->imm_data = u->imm_data; + next->next = NULL; + next->wr_id = user_wr->wr_id; + next->num_sge = user_wr->num_sge; + next->opcode = user_wr->opcode; + next->send_flags = user_wr->send_flags; + next->imm_data = user_wr->imm_data; if (qp->qp_type == IB_QPT_UD) { - t->wr.ud.ah = idr_find(&ib_uverbs_ah_idr, u->wr.ud.ah); - if (!t->wr.ud.ah) - goto kwrout; - t->wr.ud.remote_qpn = u->wr.ud.remote_qpn; - t->wr.ud.remote_qkey = u->wr.ud.remote_qkey; + next->wr.ud.ah = idr_find(&ib_uverbs_ah_idr, + user_wr->wr.ud.ah); + if (!next->wr.ud.ah) { + ret = -EINVAL; + goto out; + } + next->wr.ud.remote_qpn = user_wr->wr.ud.remote_qpn; + next->wr.ud.remote_qkey = user_wr->wr.ud.remote_qkey; } else { - switch (t->opcode) { + switch (next->opcode) { case IB_WR_RDMA_WRITE: case IB_WR_RDMA_WRITE_WITH_IMM: case IB_WR_RDMA_READ: - t->wr.rdma.remote_addr = u->wr.rdma.remote_addr; - t->wr.rdma.rkey = u->wr.rdma.rkey; + next->wr.rdma.remote_addr = + user_wr->wr.rdma.remote_addr; + next->wr.rdma.rkey = + user_wr->wr.rdma.rkey; break; case IB_WR_ATOMIC_CMP_AND_SWP: case IB_WR_ATOMIC_FETCH_AND_ADD: - t->wr.atomic.remote_addr = - u->wr.atomic.remote_addr; - t->wr.atomic.compare_add = - u->wr.atomic.compare_add; - t->wr.atomic.swap = u->wr.atomic.swap; - t->wr.atomic.rkey = u->wr.atomic.rkey; + next->wr.atomic.remote_addr = + user_wr->wr.atomic.remote_addr; + next->wr.atomic.compare_add = + user_wr->wr.atomic.compare_add; + next->wr.atomic.swap = user_wr->wr.atomic.swap; + next->wr.atomic.rkey = user_wr->wr.atomic.rkey; break; default: break; } } - if (t->num_sge) { - t->sg_list = s; - s += t->num_sge; + if (next->num_sge) { + next->sg_list = (void *) next + + ALIGN(sizeof *next, sizeof (struct ib_sge)); + if (copy_from_user(next->sg_list, + buf + sizeof cmd + + cmd.wr_count * cmd.wqe_size + + sg_ind * sizeof (struct ib_sge), + next->num_sge * sizeof (struct ib_sge))) { + ret = -EFAULT; + goto out; + } + sg_ind += next->num_sge; } else - t->sg_list = NULL; + next->sg_list = NULL; } + resp.bad_wr = 0; ret = qp->device->post_send(qp, wr, &bad_wr); - resp.bad_wr = ret ? (bad_wr - wr) + 1 : 0; - -kwrout: - kfree(wr); + if (ret) + for (next = wr; next; next = next->next) { + ++resp.bad_wr; + if (next == bad_wr) + break; + } -wrout: - kfree(m_wr); + if (copy_to_user((void __user *) (unsigned long) cmd.response, + &resp, sizeof resp)) + ret = -EFAULT; out: up(&ib_uverbs_idr_mutex); - if (copy_to_user((void __user *) (unsigned long) cmd.response, - &resp, sizeof resp)) - ret = -EFAULT; + while (wr) { + next = wr->next; + kfree(wr); + wr = next; + } + + kfree(user_wr); return ret ? ret : in_len; } +static struct ib_recv_wr *ib_uverbs_unmarshall_recv(const char __user *buf, + int in_len, + u32 wr_count, + u32 sge_count, + u32 wqe_size) +{ + struct ib_uverbs_recv_wr *user_wr; + struct ib_recv_wr *wr = NULL, *last, *next; + int sg_ind; + int i; + int ret; + + if (in_len < wqe_size * wr_count + + sge_count * sizeof (struct ib_uverbs_sge)) + return ERR_PTR(-EINVAL); + + if (wqe_size < sizeof (struct ib_uverbs_recv_wr)) + return ERR_PTR(-EINVAL); + + user_wr = kmalloc(wqe_size, GFP_KERNEL); + if (!user_wr) + return ERR_PTR(-ENOMEM); + + sg_ind = 0; + last = NULL; + for (i = 0; i < wr_count; ++i) { + if (copy_from_user(user_wr, buf + i * wqe_size, + wqe_size)) { + ret = -EFAULT; + goto err; + } + + if (user_wr->num_sge + sg_ind > sge_count) { + ret = -EINVAL; + goto err; + } + + next = kmalloc(ALIGN(sizeof *next, sizeof (struct ib_sge)) + + user_wr->num_sge * sizeof (struct ib_sge), + GFP_KERNEL); + if (!next) { + ret = -ENOMEM; + goto err; + } + + if (!last) + wr = next; + else + last->next = next; + last = next; + + next->next = NULL; + next->wr_id = user_wr->wr_id; + next->num_sge = user_wr->num_sge; + + if (next->num_sge) { + next->sg_list = (void *) next + + ALIGN(sizeof *next, sizeof (struct ib_sge)); + if (copy_from_user(next->sg_list, + buf + wr_count * wqe_size + + sg_ind * sizeof (struct ib_sge), + next->num_sge * sizeof (struct ib_sge))) { + ret = -EFAULT; + goto err; + } + sg_ind += next->num_sge; + } else + next->sg_list = NULL; + } + + kfree(user_wr); + return wr; + +err: + kfree(user_wr); + + while (wr) { + next = wr->next; + kfree(wr); + wr = next; + } + + return ERR_PTR(ret); +} + ssize_t ib_uverbs_post_recv(struct ib_uverbs_file *file, const char __user *buf, int in_len, int out_len) { struct ib_uverbs_post_recv cmd; struct ib_uverbs_post_recv_resp resp; - struct ib_uverbs_recv_wr *m_wr, *j; - struct ib_recv_wr *wr, *i, *bad_wr; - struct ib_sge *s; + struct ib_recv_wr *wr, *next, *bad_wr; struct ib_qp *qp; - int size; - int count; ssize_t ret = -EINVAL; if (copy_from_user(&cmd, buf, sizeof cmd)) return -EFAULT; + wr = ib_uverbs_unmarshall_recv(buf + sizeof cmd, + in_len - sizeof cmd, cmd.wr_count, + cmd.sge_count, cmd.wqe_size); + if (IS_ERR(wr)) + return PTR_ERR(wr); + down(&ib_uverbs_idr_mutex); qp = idr_find(&ib_uverbs_qp_idr, cmd.qp_handle); if (!qp || qp->uobject->context != file->ucontext) goto out; - size = (cmd.wr_count * sizeof *m_wr) + (cmd.sge_count * sizeof *s); - m_wr = kmalloc(size, GFP_KERNEL); - if (!m_wr) { - ret = -ENOMEM; - goto out; - } - - if (copy_from_user(m_wr, (void __user *)cmd.wr, size)) { - ret = -EFAULT; - goto wrout; - } - - wr = kmalloc(cmd.wr_count * sizeof *wr, GFP_KERNEL); - if (!wr) { - ret = -ENOMEM; - goto wrout; - } - - s = (struct ib_sge *)(m_wr + cmd.wr_count); - - i = wr; - j = m_wr; - count = 0; - while (count++ < cmd.wr_count) { - struct ib_recv_wr *t = i++; - struct ib_uverbs_recv_wr *u = j++; - - if (count < cmd.wr_count) - t->next = i; - else - t->next = NULL; - - t->wr_id = u->wr_id; - t->num_sge = u->num_sge; - - if (t->num_sge) { - t->sg_list = s; - s += t->num_sge; - } else - t->sg_list = NULL; - } - + resp.bad_wr = 0; ret = qp->device->post_recv(qp, wr, &bad_wr); - resp.bad_wr = ret ? (bad_wr - wr) + 1 : 0; + if (ret) + for (next = wr; next; next = next->next) { + ++resp.bad_wr; + if (next == bad_wr) + break; + } + + up(&ib_uverbs_idr_mutex); if (copy_to_user((void __user *) (unsigned long) cmd.response, &resp, sizeof resp)) ret = -EFAULT; - kfree(wr); - -wrout: - kfree(m_wr); - out: - up(&ib_uverbs_idr_mutex); + while (wr) { + next = wr->next; + kfree(wr); + wr = next; + } return ret ? ret : in_len; } @@ -1294,80 +1372,48 @@ ssize_t ib_uverbs_post_srq_recv(struct i const char __user *buf, int in_len, int out_len) { - struct ib_uverbs_post_srq_recv cmd; + struct ib_uverbs_post_srq_recv cmd; struct ib_uverbs_post_srq_recv_resp resp; - struct ib_uverbs_recv_wr *m_wr, *j; - struct ib_recv_wr *wr, *i, *bad_wr; - struct ib_sge *s; - struct ib_srq *srq; - int size; - int count; - ssize_t ret = -EFAULT; + struct ib_recv_wr *wr, *next, *bad_wr; + struct ib_srq *srq; + ssize_t ret = -EINVAL; if (copy_from_user(&cmd, buf, sizeof cmd)) return -EFAULT; + wr = ib_uverbs_unmarshall_recv(buf + sizeof cmd, + in_len - sizeof cmd, cmd.wr_count, + cmd.sge_count, cmd.wqe_size); + if (IS_ERR(wr)) + return PTR_ERR(wr); + down(&ib_uverbs_idr_mutex); srq = idr_find(&ib_uverbs_srq_idr, cmd.srq_handle); if (!srq || srq->uobject->context != file->ucontext) goto out; - size = (cmd.wr_count * sizeof *m_wr) + (cmd.sge_count * sizeof *s); - m_wr = kmalloc(size, GFP_KERNEL); - if (!m_wr) { - ret = -ENOMEM; - goto out; - } - - if (copy_from_user(m_wr, (void __user *)cmd.wr, size)) { - goto wrout; - } - - wr = kmalloc(cmd.wr_count * sizeof *wr, GFP_KERNEL); - if (!wr) { - ret = -ENOMEM; - goto wrout; - } - - s = (struct ib_sge *)(m_wr + cmd.wr_count); - - i = wr; - j = m_wr; - count = 0; - while (count++ < cmd.wr_count) { - struct ib_recv_wr *t = i++; - struct ib_uverbs_recv_wr *u = j++; - - if (count < cmd.wr_count) - t->next = i; - else - t->next = NULL; - - t->wr_id = u->wr_id; - t->num_sge = u->num_sge; - - if (t->num_sge) { - t->sg_list = s; - s += t->num_sge; - } else - t->sg_list = NULL; - } - + resp.bad_wr = 0; ret = srq->device->post_srq_recv(srq, wr, &bad_wr); - resp.bad_wr = ret ? (bad_wr - wr) + 1 : 0; + if (ret) + for (next = wr; next; next = next->next) { + ++resp.bad_wr; + if (next == bad_wr) + break; + } + + up(&ib_uverbs_idr_mutex); if (copy_to_user((void __user *) (unsigned long) cmd.response, &resp, sizeof resp)) ret = -EFAULT; - kfree(wr); - -wrout: - kfree(m_wr); - out: - up(&ib_uverbs_idr_mutex); + while (wr) { + next = wr->next; + kfree(wr); + wr = next; + } return ret ? ret : in_len; } @@ -1405,19 +1451,16 @@ ssize_t ib_uverbs_create_ah(struct ib_uv uobj->user_handle = cmd.user_handle; uobj->context = file->ucontext; - attr.dlid = cmd.attr.dlid; - attr.sl = cmd.attr.sl; - attr.src_path_bits = cmd.attr.src_path_bits; - attr.static_rate = cmd.attr.static_rate; - attr.port_num = cmd.attr.port_num; - attr.grh.flow_label = cmd.attr.grh.flow_label; - attr.grh.sgid_index = cmd.attr.grh.sgid_index; - attr.grh.hop_limit = cmd.attr.grh.hop_limit; + attr.dlid = cmd.attr.dlid; + attr.sl = cmd.attr.sl; + attr.src_path_bits = cmd.attr.src_path_bits; + attr.static_rate = cmd.attr.static_rate; + attr.port_num = cmd.attr.port_num; + attr.grh.flow_label = cmd.attr.grh.flow_label; + attr.grh.sgid_index = cmd.attr.grh.sgid_index; + attr.grh.hop_limit = cmd.attr.grh.hop_limit; attr.grh.traffic_class = cmd.attr.grh.traffic_class; - attr.grh.dgid.global.subnet_prefix = - cmd.attr.grh.dgid.global.subnet_prefix; - attr.grh.dgid.global.interface_id = - cmd.attr.grh.dgid.global.interface_id; + memcpy(attr.grh.dgid.raw, cmd.attr.grh.dgid, 16); ah = ib_create_ah(pd, &attr); if (IS_ERR(ah)) {
Index: libibverbs/include/infiniband/kern-abi.h =================================================================== --- libibverbs/include/infiniband/kern-abi.h (revision 3739) +++ libibverbs/include/infiniband/kern-abi.h (working copy) @@ -94,8 +94,11 @@ enum { * Make sure that all structs defined in this file remain laid out so * that they pack the same way on 32-bit and 64-bit architectures (to * avoid incompatibility between 32-bit userspace and 64-bit kernels). - * In particular do not use pointer types -- pass pointers in __u64 - * instead. + * Specifically: + * - Do not use pointer types -- pass pointers in __u64 instead. + * - Make sure that any structure larger than 4 bytes is padded to a + * multiple of 8 bytes. Otherwise the structure size will be + * different between 32-bit and 64-bit architectures. */ struct ibv_kern_async_event { @@ -314,7 +317,7 @@ struct ibv_kern_wc { __u8 sl; __u8 dlid_path_bits; __u8 port_num; - __u8 reserved; /* Align struct to 8 bytes */ + __u8 reserved; }; struct ibv_poll_cq { @@ -328,7 +331,7 @@ struct ibv_poll_cq { struct ibv_poll_cq_resp { __u32 count; - __u32 reserved; /* Align struct to 8 bytes */ + __u32 reserved; struct ibv_kern_wc wc[]; }; @@ -452,20 +455,20 @@ struct ibv_kern_send_wr { struct { __u64 remote_addr; __u32 rkey; - __u32 reserved; /* Align struct to 8 bytes */ + __u32 reserved; } rdma; struct { __u64 remote_addr; __u64 compare_add; __u64 swap; __u32 rkey; - __u32 reserved; /* Align struct to 8 bytes */ + __u32 reserved; } atomic; struct { __u32 ah; __u32 remote_qpn; __u32 remote_qkey; - __u32 reserved; /* Align struct to 8 bytes */ + __u32 reserved; } ud; } wr; }; @@ -478,8 +481,7 @@ struct ibv_post_send { __u32 qp_handle; __u32 wr_count; __u32 sge_count; - __u32 reserved; /* Align struct to 8 bytes */ - __u64 wr; + __u32 wqe_size; }; struct ibv_post_send_resp { @@ -489,7 +491,7 @@ struct ibv_post_send_resp { struct ibv_kern_recv_wr { __u64 wr_id; __u32 num_sge; - __u32 reserved; /* Align struct to 8 bytes */ + __u32 reserved; }; struct ibv_post_recv { @@ -500,8 +502,7 @@ struct ibv_post_recv { __u32 qp_handle; __u32 wr_count; __u32 sge_count; - __u32 reserved; /* Align struct to 8 bytes */ - __u64 wr; + __u32 wqe_size; }; struct ibv_post_recv_resp { @@ -516,29 +517,20 @@ struct ibv_post_srq_recv { __u32 srq_handle; __u32 wr_count; __u32 sge_count; - __u32 reserved; /* Align struct to 8 bytes */ - __u64 wr; + __u32 wqe_size; }; struct ibv_post_srq_recv_resp { __u32 bad_wr; }; -union ibv_kern_gid { - __u8 raw[16]; - struct { - __u64 subnet_prefix; - __u64 interface_id; - } global; -}; - struct ibv_kern_global_route { - union ibv_kern_gid dgid; + __u8 dgid[16]; __u32 flow_label; __u8 sgid_index; __u8 hop_limit; __u8 traffic_class; - __u8 reserved; /* Align struct to 8 bytes */ + __u8 reserved; }; struct ibv_kern_ah_attr { @@ -549,7 +541,7 @@ struct ibv_kern_ah_attr { __u8 static_rate; __u8 is_global; __u8 port_num; - __u8 reserved; /* Align struct to 8 bytes */ + __u8 reserved; }; struct ibv_create_ah { @@ -559,7 +551,7 @@ struct ibv_create_ah { __u64 response; __u64 user_handle; __u32 pd_handle; - __u32 reserved; /* Align struct to 8 bytes */ + __u32 reserved; struct ibv_kern_ah_attr attr; }; Index: libibverbs/src/cmd.c =================================================================== --- libibverbs/src/cmd.c (revision 3739) +++ libibverbs/src/cmd.c (working copy) @@ -327,19 +327,19 @@ int ibv_cmd_poll_cq(struct ibv_cq *ibcq, goto out; } - for(i = 0; i < ne; i++) { - wc[i].wr_id = resp->wc[i].wr_id; - wc[i].status = resp->wc[i].status; - wc[i].opcode = resp->wc[i].opcode; - wc[i].vendor_err = resp->wc[i].vendor_err; - wc[i].byte_len = resp->wc[i].byte_len; - wc[i].imm_data = resp->wc[i].imm_data; - wc[i].qp_num = resp->wc[i].qp_num; - wc[i].src_qp = resp->wc[i].src_qp; - wc[i].wc_flags = resp->wc[i].wc_flags; - wc[i].pkey_index = resp->wc[i].pkey_index; - wc[i].slid = resp->wc[i].slid; - wc[i].sl = resp->wc[i].sl; + for (i = 0; i < ne; i++) { + wc[i].wr_id = resp->wc[i].wr_id; + wc[i].status = resp->wc[i].status; + wc[i].opcode = resp->wc[i].opcode; + wc[i].vendor_err = resp->wc[i].vendor_err; + wc[i].byte_len = resp->wc[i].byte_len; + wc[i].imm_data = resp->wc[i].imm_data; + wc[i].qp_num = resp->wc[i].qp_num; + wc[i].src_qp = resp->wc[i].src_qp; + wc[i].wc_flags = resp->wc[i].wc_flags; + wc[i].pkey_index = resp->wc[i].pkey_index; + wc[i].slid = resp->wc[i].slid; + wc[i].sl = resp->wc[i].sl; wc[i].dlid_path_bits = resp->wc[i].dlid_path_bits; } @@ -582,42 +582,43 @@ static int ibv_cmd_destroy_qp_v1(struct int ibv_cmd_post_send(struct ibv_qp *ibqp, struct ibv_send_wr *wr, struct ibv_send_wr **bad_wr) { - struct ibv_post_send cmd; + struct ibv_post_send *cmd; struct ibv_post_send_resp resp; struct ibv_send_wr *i; struct ibv_kern_send_wr *n, *tmp; struct ibv_sge *s; unsigned wr_count = 0; unsigned sge_count = 0; + int size; int ret; - for(i = wr; i; i = i->next) { + for (i = wr; i; i = i->next) { wr_count++; sge_count += i->num_sge; } - n = malloc((wr_count * sizeof *n) + (sge_count * sizeof *s)); - if(!n) - return errno; + size = sizeof *cmd + wr_count * sizeof *n + sge_count * sizeof *s; + cmd = alloca(size); - IBV_INIT_CMD_RESP(&cmd, sizeof cmd, POST_SEND, &resp, sizeof resp); - cmd.qp_handle = ibqp->handle; - cmd.wr_count = wr_count; - cmd.sge_count = sge_count; - cmd.wr = (uintptr_t)n; + IBV_INIT_CMD_RESP(cmd, size, POST_SEND, &resp, sizeof resp); + cmd->qp_handle = ibqp->handle; + cmd->wr_count = wr_count; + cmd->sge_count = sge_count; + cmd->wqe_size = sizeof *n; - s = (struct ibv_sge *)(n + wr_count); + n = (struct ibv_kern_send_wr *) ((void *) cmd + sizeof *cmd); + s = (struct ibv_sge *) (n + wr_count); tmp = n; - for(i = wr; i; i = i->next) { - tmp->wr_id = i->wr_id; - tmp->num_sge = i->num_sge; - tmp->opcode = i->opcode; + for (i = wr; i; i = i->next) { + tmp->wr_id = i->wr_id; + tmp->num_sge = i->num_sge; + tmp->opcode = i->opcode; tmp->send_flags = i->send_flags; - tmp->imm_data = i->imm_data; - if(ibqp->qp_type == IBV_QPT_UD) { - tmp->wr.ud.ah = i->wr.ud.ah->handle; - tmp->wr.ud.remote_qpn = i->wr.ud.remote_qpn; + tmp->imm_data = i->imm_data; + if (ibqp->qp_type == IBV_QPT_UD) { + tmp->wr.ud.ah = i->wr.ud.ah->handle; + tmp->wr.ud.remote_qpn = i->wr.ud.remote_qpn; tmp->wr.ud.remote_qkey = i->wr.ud.remote_qkey; } else { switch(i->opcode) { @@ -642,7 +643,7 @@ int ibv_cmd_post_send(struct ibv_qp *ibq } } - if(tmp->num_sge) { + if (tmp->num_sge) { memcpy(s, i->sg_list, tmp->num_sge * sizeof *s); s += tmp->num_sge; } @@ -659,54 +660,51 @@ int ibv_cmd_post_send(struct ibv_qp *ibq out: wr_count = resp.bad_wr; - if(wr_count) { + if (wr_count) { i = wr; - while(--wr_count) + while (--wr_count) i = i->next; *bad_wr = i; } - free(n); return ret; } int ibv_cmd_post_recv(struct ibv_qp *ibqp, struct ibv_recv_wr *wr, struct ibv_recv_wr **bad_wr) { - struct ibv_post_recv cmd; + struct ibv_post_recv *cmd; struct ibv_post_recv_resp resp; struct ibv_recv_wr *i; struct ibv_kern_recv_wr *n, *tmp; struct ibv_sge *s; unsigned wr_count = 0; unsigned sge_count = 0; + int size; int ret; - for(i = wr; i; i = i->next) { + for (i = wr; i; i = i->next) { wr_count++; sge_count += i->num_sge; } - n = malloc((wr_count * sizeof *n) + (sge_count * sizeof *s)); - if(!n) { - return errno; - } + size = sizeof *cmd + wr_count * sizeof *n + sge_count * sizeof *s; + cmd = alloca(size); - resp.bad_wr = 0; - IBV_INIT_CMD_RESP(&cmd, sizeof cmd, POST_RECV, &resp, sizeof resp); - cmd.qp_handle = ibqp->handle; - cmd.wr_count = wr_count; - cmd.sge_count = sge_count; - cmd.wr = (uintptr_t)n; + IBV_INIT_CMD_RESP(cmd, size, POST_RECV, &resp, sizeof resp); + cmd->qp_handle = ibqp->handle; + cmd->wr_count = wr_count; + cmd->sge_count = sge_count; - s = (struct ibv_sge *)(n + wr_count); + n = (struct ibv_kern_recv_wr *) ((void *) cmd + sizeof *cmd); + s = (struct ibv_sge *) (n + wr_count); tmp = n; - for(i = wr; i; i = i->next) { - tmp->wr_id = i->wr_id; + for (i = wr; i; i = i->next) { + tmp->wr_id = i->wr_id; tmp->num_sge = i->num_sge; - if(tmp->num_sge) { + if (tmp->num_sge) { memcpy(s, i->sg_list, tmp->num_sge * sizeof *s); s += tmp->num_sge; } @@ -723,27 +721,27 @@ int ibv_cmd_post_recv(struct ibv_qp *ibq out: wr_count = resp.bad_wr; - if(wr_count) { + if (wr_count) { i = wr; - while(--wr_count) + while (--wr_count) i = i->next; *bad_wr = i; } - free(n); return ret; } int ibv_cmd_post_srq_recv(struct ibv_srq *srq, struct ibv_recv_wr *wr, struct ibv_recv_wr **bad_wr) { - struct ibv_post_srq_recv cmd; + struct ibv_post_srq_recv *cmd; struct ibv_post_srq_recv_resp resp; struct ibv_recv_wr *i; struct ibv_kern_recv_wr *n, *tmp; struct ibv_sge *s; unsigned wr_count = 0; unsigned sge_count = 0; + int size; int ret; for (i = wr; i; i = i->next) { @@ -751,19 +749,16 @@ int ibv_cmd_post_srq_recv(struct ibv_srq sge_count += i->num_sge; } - n = malloc((wr_count * sizeof *n) + (sge_count * sizeof *s)); - if (!n) { - return errno; - } + size = sizeof *cmd + wr_count * sizeof *n + sge_count * sizeof *s; + cmd = alloca(size); - resp.bad_wr = 0; - IBV_INIT_CMD_RESP(&cmd, sizeof cmd, POST_SRQ_RECV, &resp, sizeof resp); - cmd.srq_handle = srq->handle; - cmd.wr_count = wr_count; - cmd.sge_count = sge_count; - cmd.wr = (uintptr_t)n; + IBV_INIT_CMD_RESP(cmd, size, POST_SRQ_RECV, &resp, sizeof resp); + cmd->srq_handle = srq->handle; + cmd->wr_count = wr_count; + cmd->sge_count = sge_count; - s = (struct ibv_sge *)(n + wr_count); + n = (struct ibv_kern_recv_wr *) ((void *) cmd + sizeof *cmd); + s = (struct ibv_sge *) (n + wr_count); tmp = n; for (i = wr; i; i = i->next) { @@ -794,7 +789,6 @@ out: *bad_wr = i; } - free(n); return ret; } @@ -805,7 +799,7 @@ int ibv_cmd_create_ah(struct ibv_pd *pd, struct ibv_create_ah_resp resp; IBV_INIT_CMD_RESP(&cmd, sizeof cmd, CREATE_AH, &resp, sizeof resp); - cmd.user_handle = (uintptr_t)ah; + cmd.user_handle = (uintptr_t) ah; cmd.pd_handle = pd->handle; cmd.attr.dlid = attr->dlid; cmd.attr.sl = attr->sl; @@ -817,8 +811,7 @@ int ibv_cmd_create_ah(struct ibv_pd *pd, cmd.attr.grh.sgid_index = attr->grh.sgid_index; cmd.attr.grh.hop_limit = attr->grh.hop_limit; cmd.attr.grh.traffic_class = attr->grh.traffic_class; - cmd.attr.grh.dgid.global.subnet_prefix = attr->grh.dgid.global.subnet_prefix; - cmd.attr.grh.dgid.global.interface_id = attr->grh.dgid.global.interface_id; + memcpy(cmd.attr.grh.dgid, attr->grh.dgid.raw, 16); if (write(pd->context->cmd_fd, &cmd, sizeof cmd) != sizeof cmd) return errno;
_______________________________________________ openib-general mailing list openib-general@openib.org http://openib.org/mailman/listinfo/openib-general To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general