user_mad: Support RMPP on send side Note that this change will need a coordinated change to OpenSM and some userspace/management libraries which will be done as soon as possible once this patch is accepted.
Receive side support for RMPP will be added separately. Signed-off-by: Hal Rosenstock <[EMAIL PROTECTED]> Index: infiniband/include/ib_user_mad.h =================================================================== --- infiniband/include/ib_user_mad.h (revision 2413) +++ infiniband/include/ib_user_mad.h (working copy) @@ -1,5 +1,6 @@ /* * Copyright (c) 2004 Topspin Communications. All rights reserved. + * Copyright (c) 2005 Voltaire, Inc. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU @@ -42,7 +43,7 @@ * Increment this value if any changes that break userspace ABI * compatibility are made. */ -#define IB_USER_MAD_ABI_VERSION 2 +#define IB_USER_MAD_ABI_VERSION 3 /* * Make sure that all structs defined in this file remain laid out so @@ -51,8 +52,7 @@ */ /** - * ib_user_mad - MAD packet - * @data - Contents of MAD + * ib_user_mad_hdr - MAD packet header * @id - ID of agent MAD received with/to be sent with * @status - 0 on successful receive, ETIMEDOUT if no response * received (transaction ID in data[] will be set to TID of original @@ -72,8 +72,7 @@ * * All multi-byte quantities are stored in network (big endian) byte order. */ -struct ib_user_mad { - __u8 data[256]; +struct ib_user_mad_hdr { __u32 id; __u32 status; __u32 timeout_ms; @@ -91,6 +90,17 @@ }; /** + * ib_user_mad - MAD packet + * @hdr - MAD packet header + * @data - Contents of MAD + * + */ +struct ib_user_mad { + struct ib_user_mad_hdr hdr; + __u8 data[0]; +}; + +/** * ib_user_mad_reg_req - MAD registration request * @id - Set by the kernel; used to identify agent in future requests. * @qpn - Queue pair number; must be 0 or 1. @@ -103,6 +113,8 @@ * management class to receive. * @oui: Indicates IEEE OUI when mgmt_class is a vendor class * in the range from 0x30 to 0x4f. Otherwise not used. + * @rmpp_version: If set, indicates the RMPP version used. + * */ struct ib_user_mad_reg_req { __u32 id; @@ -111,6 +123,7 @@ __u8 mgmt_class; __u8 mgmt_class_version; __u8 oui[3]; + __u8 rmpp_version; }; #define IB_IOCTL_MAGIC 0x1b Index: infiniband/core/user_mad.c =================================================================== --- infiniband/core/user_mad.c (revision 2413) +++ infiniband/core/user_mad.c (working copy) @@ -1,5 +1,6 @@ /* * Copyright (c) 2004 Topspin Communications. All rights reserved. + * Copyright (c) 2005 Voltaire, Inc. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU @@ -94,10 +95,12 @@ }; struct ib_umad_packet { - struct ib_user_mad mad; struct ib_ah *ah; + struct ib_mad_send_buf *msg; struct list_head list; + int length; DECLARE_PCI_UNMAP_ADDR(mapping) + struct ib_user_mad mad; }; static const dev_t base_dev = MKDEV(IB_UMAD_MAJOR, IB_UMAD_MINOR_BASE); @@ -114,10 +117,10 @@ int ret = 1; down_read(&file->agent_mutex); - for (packet->mad.id = 0; - packet->mad.id < IB_UMAD_MAX_AGENTS; - packet->mad.id++) - if (agent == file->agent[packet->mad.id]) { + for (packet->mad.hdr.id = 0; + packet->mad.hdr.id < IB_UMAD_MAX_AGENTS; + packet->mad.hdr.id++) + if (agent == file->agent[packet->mad.hdr.id]) { spin_lock_irq(&file->recv_lock); list_add_tail(&packet->list, &file->recv_list); spin_unlock_irq(&file->recv_lock); @@ -138,14 +141,11 @@ struct ib_umad_packet *packet = (void *) (unsigned long) send_wc->wr_id; - dma_unmap_single(agent->device->dma_device, - pci_unmap_addr(packet, mapping), - sizeof packet->mad.data, - DMA_TO_DEVICE); - ib_destroy_ah(packet->ah); + ib_free_send_mad(packet->msg); + ib_destroy_ah(packet->msg->send_wr.wr.ud.ah); if (send_wc->status == IB_WC_RESP_TIMEOUT_ERR) { - packet->mad.status = ETIMEDOUT; + packet->mad.hdr.status = ETIMEDOUT; if (!queue_packet(file, agent, packet)) return; @@ -159,30 +159,34 @@ { struct ib_umad_file *file = agent->context; struct ib_umad_packet *packet; + int length; + if (mad_recv_wc->wc->status != IB_WC_SUCCESS) goto out; - packet = kmalloc(sizeof *packet, GFP_KERNEL); + length = 256; /* until RMPP is supported */ + packet = kmalloc(sizeof *packet + length, GFP_KERNEL); if (!packet) goto out; - memset(packet, 0, sizeof *packet); + memset(packet, 0, sizeof *packet + length); + packet->length = length; - memcpy(packet->mad.data, mad_recv_wc->recv_buf.mad, sizeof packet->mad.data); - packet->mad.status = 0; - packet->mad.qpn = cpu_to_be32(mad_recv_wc->wc->src_qp); - packet->mad.lid = cpu_to_be16(mad_recv_wc->wc->slid); - packet->mad.sl = mad_recv_wc->wc->sl; - packet->mad.path_bits = mad_recv_wc->wc->dlid_path_bits; - packet->mad.grh_present = !!(mad_recv_wc->wc->wc_flags & IB_WC_GRH); - if (packet->mad.grh_present) { + memcpy(packet->mad.data, mad_recv_wc->recv_buf.mad, length); + packet->mad.hdr.status = 0; + packet->mad.hdr.qpn = cpu_to_be32(mad_recv_wc->wc->src_qp); + packet->mad.hdr.lid = cpu_to_be16(mad_recv_wc->wc->slid); + packet->mad.hdr.sl = mad_recv_wc->wc->sl; + packet->mad.hdr.path_bits = mad_recv_wc->wc->dlid_path_bits; + packet->mad.hdr.grh_present = !!(mad_recv_wc->wc->wc_flags & IB_WC_GRH); + if (packet->mad.hdr.grh_present) { /* XXX parse GRH */ - packet->mad.gid_index = 0; - packet->mad.hop_limit = 0; - packet->mad.traffic_class = 0; - memset(packet->mad.gid, 0, 16); - packet->mad.flow_label = 0; + packet->mad.hdr.gid_index = 0; + packet->mad.hdr.hop_limit = 0; + packet->mad.hdr.traffic_class = 0; + memset(packet->mad.hdr.gid, 0, 16); + packet->mad.hdr.flow_label = 0; } if (queue_packet(file, agent, packet)) @@ -199,7 +203,7 @@ struct ib_umad_packet *packet; ssize_t ret; - if (count < sizeof (struct ib_user_mad)) + if (count < sizeof (struct ib_user_mad) + 256) /* until RMPP supported */ return -EINVAL; spin_lock_irq(&file->recv_lock); @@ -222,12 +226,20 @@ spin_unlock_irq(&file->recv_lock); - if (copy_to_user(buf, &packet->mad, sizeof packet->mad)) + if (count < packet->length + sizeof (struct ib_user_mad)) + ret = -EINVAL; + else if (copy_to_user(buf, &packet->mad, + packet->length + sizeof (struct ib_user_mad))) ret = -EFAULT; else - ret = sizeof packet->mad; - - kfree(packet); + ret = packet->length + sizeof (struct ib_user_mad); + if (ret < 0) { + /* Requeue packet */ + spin_lock_irq(&file->recv_lock); + list_add(&packet->list, &file->recv_list); + spin_unlock_irq(&file->recv_lock); + } else + kfree(packet); return ret; } @@ -238,107 +250,155 @@ struct ib_umad_packet *packet; struct ib_mad_agent *agent; struct ib_ah_attr ah_attr; - struct ib_sge gather_list; - struct ib_send_wr *bad_wr, wr = { - .opcode = IB_WR_SEND, - .sg_list = &gather_list, - .num_sge = 1, - .send_flags = IB_SEND_SIGNALED, - }; + struct ib_send_wr *bad_wr; + struct ib_rmpp_mad *rmpp_mad; u8 method; u64 *tid; - int ret; + int ret, length, hdr_len, data_len, rmpp_hdr_size; + int rmpp_active = 0; if (count < sizeof (struct ib_user_mad)) return -EINVAL; - packet = kmalloc(sizeof *packet, GFP_KERNEL); + length = count - sizeof (struct ib_user_mad); + packet = kmalloc(sizeof *packet + sizeof(struct ib_mad_hdr) + + sizeof(struct ib_rmpp_hdr), GFP_KERNEL); if (!packet) return -ENOMEM; - if (copy_from_user(&packet->mad, buf, sizeof packet->mad)) { - kfree(packet); - return -EFAULT; + if (copy_from_user(&packet->mad, buf, + sizeof (struct ib_user_mad) + + sizeof(struct ib_mad_hdr) + + sizeof(struct ib_rmpp_hdr))) { + ret = -EFAULT; + goto err; } - if (packet->mad.id < 0 || packet->mad.id >= IB_UMAD_MAX_AGENTS) { + if (packet->mad.hdr.id < 0 || + packet->mad.hdr.id >= IB_UMAD_MAX_AGENTS) { ret = -EINVAL; goto err; } + packet->length = length; + down_read(&file->agent_mutex); - agent = file->agent[packet->mad.id]; + agent = file->agent[packet->mad.hdr.id]; if (!agent) { ret = -EINVAL; goto err_up; } + memset(&ah_attr, 0, sizeof ah_attr); + ah_attr.dlid = be16_to_cpu(packet->mad.hdr.lid); + ah_attr.sl = packet->mad.hdr.sl; + ah_attr.src_path_bits = packet->mad.hdr.path_bits; + ah_attr.port_num = file->port->port_num; + if (packet->mad.hdr.grh_present) { + ah_attr.ah_flags = IB_AH_GRH; + memcpy(ah_attr.grh.dgid.raw, packet->mad.hdr.gid, 16); + ah_attr.grh.flow_label = packet->mad.hdr.flow_label; + ah_attr.grh.hop_limit = packet->mad.hdr.hop_limit; + ah_attr.grh.traffic_class = packet->mad.hdr.traffic_class; + } + + packet->ah = ib_create_ah(agent->qp->pd, &ah_attr); + if (IS_ERR(packet->ah)) { + ret = PTR_ERR(packet->ah); + goto err_up; + } + + rmpp_mad = (struct ib_rmpp_mad *) packet->mad.data; + if (ib_get_rmpp_flags(&rmpp_mad->rmpp_hdr) & IB_MGMT_RMPP_FLAG_ACTIVE) { + /* RMPP active */ + if (!agent->rmpp_version) { + ret = -EINVAL; + goto err_ah; + } + /* Validate that management class can support RMPP */ + if (rmpp_mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_SUBN_ADM) { + hdr_len = offsetof(struct ib_sa_mad, data); + data_len = length; + } else if ((rmpp_mad->mad_hdr.mgmt_class >= IB_MGMT_CLASS_VENDOR_RANGE2_START) && + (rmpp_mad->mad_hdr.mgmt_class <= IB_MGMT_CLASS_VENDOR_RANGE2_END)) { + hdr_len = offsetof(struct ib_vendor_mad, data); + data_len = length - hdr_len; + } else { + ret = -EINVAL; + goto err_ah; + } + rmpp_active = 1; + } else { + if (length > sizeof(struct ib_mad)) { + ret = -EINVAL; + goto err_ah; + } + hdr_len = offsetof(struct ib_mad, data); + data_len = length - hdr_len; + } + + packet->msg = ib_create_send_mad(agent, + be32_to_cpu(packet->mad.hdr.qpn), + 0, packet->ah, rmpp_active, + hdr_len, data_len, + GFP_KERNEL); + if (IS_ERR(packet->msg)) { + ret = PTR_ERR(packet->msg); + goto err_ah; + } + + packet->msg->send_wr.wr.ud.retries = 0; + packet->msg->send_wr.wr.ud.timeout_ms = packet->mad.hdr.timeout_ms; + /* Override send WR WRID created by ib_create_send_mad */ + packet->msg->send_wr.wr_id = (unsigned long) packet; + + if (!rmpp_active) { + /* Copy message from user into send buffer */ + copy_from_user(packet->msg->mad, + buf + sizeof(struct ib_user_mad), length); + } else { + rmpp_hdr_size = sizeof(struct ib_mad_hdr) + + sizeof(struct ib_rmpp_hdr); + /* Only copy MAD headers (RMPP header in place) */ + memcpy(packet->msg->mad, packet->mad.data, + sizeof(struct ib_mad_hdr)); + /* Now, copy rest of message from user into send buffer */ + copy_from_user(((struct ib_rmpp_mad *)packet->msg->mad)->data, + buf + sizeof(struct ib_user_mad) + rmpp_hdr_size, + length - rmpp_hdr_size); + } + /* * If userspace is generating a request that will generate a * response, we need to make sure the high-order part of the * transaction ID matches the agent being used to send the * MAD. */ - method = ((struct ib_mad_hdr *) packet->mad.data)->method; + method = packet->msg->mad->mad_hdr.method; if (!(method & IB_MGMT_METHOD_RESP) && method != IB_MGMT_METHOD_TRAP_REPRESS && method != IB_MGMT_METHOD_SEND) { - tid = &((struct ib_mad_hdr *) packet->mad.data)->tid; + tid = &packet->msg->mad->mad_hdr.tid; *tid = cpu_to_be64(((u64) agent->hi_tid) << 32 | (be64_to_cpup(tid) & 0xffffffff)); } - memset(&ah_attr, 0, sizeof ah_attr); - ah_attr.dlid = be16_to_cpu(packet->mad.lid); - ah_attr.sl = packet->mad.sl; - ah_attr.src_path_bits = packet->mad.path_bits; - ah_attr.port_num = file->port->port_num; - if (packet->mad.grh_present) { - ah_attr.ah_flags = IB_AH_GRH; - memcpy(ah_attr.grh.dgid.raw, packet->mad.gid, 16); - ah_attr.grh.flow_label = packet->mad.flow_label; - ah_attr.grh.hop_limit = packet->mad.hop_limit; - ah_attr.grh.traffic_class = packet->mad.traffic_class; - } + ret = ib_post_send_mad(agent, &packet->msg->send_wr, &bad_wr); + if (ret) + goto err_msg; - packet->ah = ib_create_ah(agent->qp->pd, &ah_attr); - if (IS_ERR(packet->ah)) { - ret = PTR_ERR(packet->ah); - goto err_up; - } + up_read(&file->agent_mutex); - gather_list.addr = dma_map_single(agent->device->dma_device, - packet->mad.data, - sizeof packet->mad.data, - DMA_TO_DEVICE); - gather_list.length = sizeof packet->mad.data; - gather_list.lkey = file->mr[packet->mad.id]->lkey; - pci_unmap_addr_set(packet, mapping, gather_list.addr); + return sizeof (struct ib_user_mad_hdr) + packet->length; - wr.wr.ud.mad_hdr = (struct ib_mad_hdr *) packet->mad.data; - wr.wr.ud.ah = packet->ah; - wr.wr.ud.remote_qpn = be32_to_cpu(packet->mad.qpn); - wr.wr.ud.remote_qkey = be32_to_cpu(packet->mad.qkey); - wr.wr.ud.timeout_ms = packet->mad.timeout_ms; - wr.wr.ud.retries = 0; +err_msg: + ib_free_send_mad(packet->msg); - wr.wr_id = (unsigned long) packet; +err_ah: + ib_destroy_ah(packet->ah); - ret = ib_post_send_mad(agent, &wr, &bad_wr); - if (ret) { - dma_unmap_single(agent->device->dma_device, - pci_unmap_addr(packet, mapping), - sizeof packet->mad.data, - DMA_TO_DEVICE); - goto err_up; - } - - up_read(&file->agent_mutex); - - return sizeof packet->mad; - err_up: up_read(&file->agent_mutex); @@ -400,7 +460,8 @@ agent = ib_register_mad_agent(file->port->ib_dev, file->port->port_num, ureq.qpn ? IB_QPT_GSI : IB_QPT_SMI, ureq.mgmt_class ? &req : NULL, - 0, send_handler, recv_handler, file); + ureq.rmpp_version, + send_handler, recv_handler, file); if (IS_ERR(agent)) { ret = PTR_ERR(agent); goto out; Index: docs/user_mad.txt =================================================================== --- docs/user_mad.txt (revision 2413) +++ docs/user_mad.txt (working copy) @@ -28,12 +28,14 @@ Receiving MADs - MADs are received using read(). The buffer passed to read() must be - large enough to hold at least one struct ib_user_mad. For example: + MADs are received using read(). The receive side does not currently + support RMPP so the buffer passed to read() must be at least one + struct ib_user_mad + 256 bytes. For example: - struct ib_user_mad mad; - ret = read(fd, &mad, sizeof mad); - if (ret != sizeof mad) + struct ib_user_mad *mad; + mad = malloc(sizeof *mad + 256); + ret = read(fd, mad, sizeof *mad + 256); + if (ret != sizeof mad + 256) perror("read"); In addition to the actual MAD contents, the other struct ib_user_mad @@ -50,18 +52,21 @@ MADs are sent using write(). The agent ID for sending should be filled into the id field of the MAD, the destination LID should be - filled into the lid field, and so on. For example: + filled into the lid field, and so on. The send side does support + RMPP so arbitrary length MAD can be sent. For example: - struct ib_user_mad mad; + struct ib_user_mad *mad; - /* fill in mad.data */ + mad = malloc(sizeof *mad + mad_length); - mad.id = my_agent; /* req.id from agent registration */ - mad.lid = my_dest; /* in network byte order... */ + /* fill in mad->data */ + + mad->hdr.id = my_agent; /* req.id from agent registration */ + mad->hdr.lid = my_dest; /* in network byte order... */ /* etc. */ - ret = write(fd, &mad, sizeof mad); - if (ret != sizeof mad) + ret = write(fd, &mad, sizeof *mad + mad_length); + if (ret != sizeof *mad + mad_length) perror("write"); Setting IsSM Capability Bit _______________________________________________ openib-general mailing list openib-general@openib.org http://openib.org/mailman/listinfo/openib-general To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general