This patch handles the implementation at the mlx4 level, of the changes required to support RCA. It mainly handles the creation of a range of QPs and also handles the configuration of the special RCA QP and the required changes to the inbox parameters.
Signed-off-by: Eli Cohen <[EMAIL PROTECTED]> --- drivers/infiniband/hw/mlx4/main.c | 4 +- drivers/infiniband/hw/mlx4/mlx4_ib.h | 4 + drivers/infiniband/hw/mlx4/qp.c | 228 +++++++++++++++++++++------------- include/linux/mlx4/qp.h | 48 +++++++- 4 files changed, 193 insertions(+), 91 deletions(-) diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c index 0c453d0..d3c8878 100644 --- a/drivers/infiniband/hw/mlx4/main.c +++ b/drivers/infiniband/hw/mlx4/main.c @@ -91,7 +91,8 @@ static int mlx4_ib_query_device(struct ib_device *ibdev, IB_DEVICE_PORT_ACTIVE_EVENT | IB_DEVICE_SYS_IMAGE_GUID | IB_DEVICE_RC_RNR_NAK_GEN | - IB_DEVICE_BLOCK_MULTICAST_LOOPBACK; + IB_DEVICE_BLOCK_MULTICAST_LOOPBACK | + IB_DEVICE_IPOIB_RCA; if (dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_BAD_PKEY_CNTR) props->device_cap_flags |= IB_DEVICE_BAD_PKEY_CNTR; if (dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_BAD_QKEY_CNTR) @@ -618,6 +619,7 @@ static void *mlx4_ib_add(struct mlx4_dev *dev) ibdev->ib_dev.query_srq = mlx4_ib_query_srq; ibdev->ib_dev.destroy_srq = mlx4_ib_destroy_srq; ibdev->ib_dev.post_srq_recv = mlx4_ib_post_srq_recv; + ibdev->ib_dev.create_qp_range = mlx4_ib_create_qp_range; ibdev->ib_dev.create_qp = mlx4_ib_create_qp; ibdev->ib_dev.modify_qp = mlx4_ib_modify_qp; ibdev->ib_dev.query_qp = mlx4_ib_query_qp; diff --git a/drivers/infiniband/hw/mlx4/mlx4_ib.h b/drivers/infiniband/hw/mlx4/mlx4_ib.h index ec9bf28..e26c3d6 100644 --- a/drivers/infiniband/hw/mlx4/mlx4_ib.h +++ b/drivers/infiniband/hw/mlx4/mlx4_ib.h @@ -103,6 +103,7 @@ struct mlx4_ib_wq { enum mlx4_ib_qp_flags { MLX4_IB_QP_LSO = 1 << 0, MLX4_IB_QP_BLOCK_MULTICAST_LOOPBACK = 1 << 1, + MLX4_IB_QP_RCA = 1 << 2, }; struct mlx4_ib_qp { @@ -268,6 +269,9 @@ void mlx4_ib_free_srq_wqe(struct mlx4_ib_srq *srq, int wqe_index); int mlx4_ib_post_srq_recv(struct ib_srq *ibsrq, struct ib_recv_wr *wr, struct ib_recv_wr **bad_wr); +int mlx4_ib_create_qp_range(struct ib_pd *pd, struct ib_qp_init_attr *qp_init_attr, + struct ib_udata *udata, int nqps, + int align, struct ib_qp *list[]); struct ib_qp *mlx4_ib_create_qp(struct ib_pd *pd, struct ib_qp_init_attr *init_attr, struct ib_udata *udata); diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c index fc61556..72a2d5d 100644 --- a/drivers/infiniband/hw/mlx4/qp.c +++ b/drivers/infiniband/hw/mlx4/qp.c @@ -502,9 +502,10 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd, if (init_attr->create_flags & IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK) qp->flags |= MLX4_IB_QP_BLOCK_MULTICAST_LOOPBACK; - if (init_attr->create_flags & IB_QP_CREATE_IPOIB_UD_LSO) qp->flags |= MLX4_IB_QP_LSO; + if (init_attr->create_flags & IB_QP_CREATE_IPOIB_RCA) + qp->flags |= MLX4_IB_QP_RCA; err = set_kernel_sq_size(dev, &init_attr->cap, init_attr->qp_type, qp); if (err) @@ -541,11 +542,6 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd, } } - if (!sqpn) - err = mlx4_qp_reserve_range(dev->dev, 1, 1, &sqpn); - if (err) - goto err_wrid; - err = mlx4_qp_alloc(dev->dev, sqpn, &qp->mqp); if (err) { mlx4_qp_release_range(dev->dev, sqpn, 1); @@ -659,9 +655,6 @@ static void destroy_qp_common(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp, mlx4_qp_free(dev->dev, &qp->mqp); - if (!is_sqp(dev, qp)) - mlx4_qp_release_range(dev->dev, qp->mqp.qpn, 1); - mlx4_mtt_cleanup(dev->dev, &qp->mtt); if (is_user) { @@ -678,91 +671,138 @@ static void destroy_qp_common(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp, } } -struct ib_qp *mlx4_ib_create_qp(struct ib_pd *pd, - struct ib_qp_init_attr *init_attr, - struct ib_udata *udata) +int mlx4_ib_create_qp_range(struct ib_pd *pd, struct ib_qp_init_attr *init_attr, + struct ib_udata *udata, int nqps, + int align, struct ib_qp *list[]) { struct mlx4_ib_dev *dev = to_mdev(pd->device); struct mlx4_ib_sqp *sqp; struct mlx4_ib_qp *qp; int err; + int base_qpn, qpn; + int i; - /* - * We only support LSO and multicast loopback blocking, and - * only for kernel UD QPs. - */ - if (init_attr->create_flags & ~(IB_QP_CREATE_IPOIB_UD_LSO | - IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK)) - return ERR_PTR(-EINVAL); + for (i = 0; i < nqps; ++i) { + /* + * We only support LSO, multicast loopback blocking and RCA, and + * only for kernel UD QPs. + */ + if (init_attr[i].create_flags & ~(IB_QP_CREATE_IPOIB_UD_LSO | + IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK | + IB_QP_CREATE_IPOIB_RCA)) + return -EINVAL; + if (init_attr[i].create_flags & (IB_QP_CREATE_IPOIB_UD_LSO | + IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK | + IB_QP_CREATE_IPOIB_RCA) && + (pd->uobject || init_attr[i].qp_type != IB_QPT_UD)) + return -EINVAL; - if (init_attr->create_flags && - (pd->uobject || init_attr->qp_type != IB_QPT_UD)) - return ERR_PTR(-EINVAL); + /* Userspace is not allowed to create special QPs: */ + if (pd->uobject && (init_attr[i].qp_type == IB_QPT_SMI || + init_attr[i].qp_type == IB_QPT_GSI)) + return -EINVAL; - switch (init_attr->qp_type) { - case IB_QPT_RC: - case IB_QPT_UC: - case IB_QPT_UD: - { - qp = kzalloc(sizeof *qp, GFP_KERNEL); - if (!qp) - return ERR_PTR(-ENOMEM); - - err = create_qp_common(dev, pd, init_attr, udata, 0, qp); - if (err) { - kfree(qp); - return ERR_PTR(err); - } + if (nqps > 1 && (init_attr[i].qp_type == IB_QPT_SMI || + init_attr[i].qp_type == IB_QPT_GSI)) + return -EINVAL; + } - qp->ibqp.qp_num = qp->mqp.qpn; + err = mlx4_qp_reserve_range(dev->dev, nqps, align, &base_qpn); + if (err) + return err; - break; - } - case IB_QPT_SMI: - case IB_QPT_GSI: - { - /* Userspace is not allowed to create special QPs: */ - if (pd->uobject) - return ERR_PTR(-EINVAL); - - sqp = kzalloc(sizeof *sqp, GFP_KERNEL); - if (!sqp) - return ERR_PTR(-ENOMEM); - - qp = &sqp->qp; - - err = create_qp_common(dev, pd, init_attr, udata, - dev->dev->caps.sqp_start + - (init_attr->qp_type == IB_QPT_SMI ? 0 : 2) + - init_attr->port_num - 1, - qp); - if (err) { - kfree(sqp); - return ERR_PTR(err); + for (i = 0, qpn = base_qpn; i < nqps; ++i, ++qpn) { + switch (init_attr[i].qp_type) { + case IB_QPT_RC: + case IB_QPT_UC: + case IB_QPT_UD: + { + qp = kzalloc(sizeof *qp, GFP_KERNEL); + if (!qp) { + err = -ENOMEM; + goto exit_fail; + } + + err = create_qp_common(dev, pd, init_attr + i, udata, qpn, qp); + if (err) { + kfree(qp); + err = err; + goto exit_fail; + } + + qp->ibqp.qp_num = qp->mqp.qpn; + + break; } + case IB_QPT_SMI: + case IB_QPT_GSI: + { + sqp = kzalloc(sizeof *sqp, GFP_KERNEL); + if (!sqp) { + err = -ENOMEM; + goto exit_fail; + } - qp->port = init_attr->port_num; - qp->ibqp.qp_num = init_attr->qp_type == IB_QPT_SMI ? 0 : 1; + qp = &sqp->qp; - break; - } - default: - /* Don't support raw QPs */ - return ERR_PTR(-EINVAL); + err = create_qp_common(dev, pd, init_attr + i, udata, + dev->dev->caps.sqp_start + + (init_attr[i].qp_type == IB_QPT_SMI ? 0 : 2) + + init_attr[i].port_num - 1, + qp); + if (err) { + kfree(sqp); + goto exit_fail; + } + + qp->port = init_attr[i].port_num; + qp->ibqp.qp_num = init_attr[i].qp_type == IB_QPT_SMI ? 0 : 1; + + break; + } + default: + /* Don't support raw QPs */ + err = -EINVAL; + goto exit_fail; + } + list[i] = &qp->ibqp; } + return 0; + +exit_fail: + for (--i; i >= 0; --i) + destroy_qp_common(dev, to_mqp(list[i]), !!pd->uobject); - return &qp->ibqp; + mlx4_qp_release_range(dev->dev, base_qpn, nqps); + return err; +} + +struct ib_qp *mlx4_ib_create_qp(struct ib_pd *pd, + struct ib_qp_init_attr *init_attr, + struct ib_udata *udata) +{ + struct ib_qp *qp; + int err; + + err = mlx4_ib_create_qp_range(pd, init_attr, udata, 1, 1, &qp); + if (err) + return ERR_PTR(err); + + return qp; } int mlx4_ib_destroy_qp(struct ib_qp *qp) { struct mlx4_ib_dev *dev = to_mdev(qp->device); struct mlx4_ib_qp *mqp = to_mqp(qp); + int qpn = qp->qp_num; if (is_qp0(dev, mqp)) mlx4_CLOSE_PORT(dev->dev, mqp->port); destroy_qp_common(dev, mqp, !!qp->pd->uobject); + if (qpn >= dev->dev->caps.sqp_start + 8) + mlx4_qp_release_range(dev->dev, qpn, 1); if (is_sqp(dev, mqp)) kfree(to_msqp(mqp)); @@ -884,6 +924,7 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp, context->flags = cpu_to_be32((to_mlx4_state(new_state) << 28) | (to_mlx4_st(ibqp->qp_type) << 16)); context->flags |= cpu_to_be32(1 << 8); /* DE? */ + context->flags |= cpu_to_be32(qp->flags & MLX4_IB_QP_RCA ? 1 << 13 : 0); if (!(attr_mask & IB_QP_PATH_MIG_STATE)) context->flags |= cpu_to_be32(MLX4_QP_PM_MIGRATED << 11); @@ -942,18 +983,18 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp, if (attr_mask & IB_QP_PORT) { if (cur_state == IB_QPS_SQD && new_state == IB_QPS_SQD && !(attr_mask & IB_QP_AV)) { - mlx4_set_sched(&context->pri_path, attr->port_num); + mlx4_set_sched(&context->path.pri_path, attr->port_num); optpar |= MLX4_QP_OPTPAR_SCHED_QUEUE; } } if (attr_mask & IB_QP_PKEY_INDEX) { - context->pri_path.pkey_index = attr->pkey_index; + context->path.pri_path.pkey_index = attr->pkey_index; optpar |= MLX4_QP_OPTPAR_PKEY_INDEX; } if (attr_mask & IB_QP_AV) { - if (mlx4_set_path(dev, &attr->ah_attr, &context->pri_path, + if (mlx4_set_path(dev, &attr->ah_attr, &context->path.pri_path, attr_mask & IB_QP_PORT ? attr->port_num : qp->port)) goto out; @@ -962,7 +1003,7 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp, } if (attr_mask & IB_QP_TIMEOUT) { - context->pri_path.ackto = attr->timeout << 3; + context->path.pri_path.ackto = attr->timeout << 3; optpar |= MLX4_QP_OPTPAR_ACK_TIMEOUT; } @@ -975,12 +1016,12 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp, dev->dev->caps.pkey_table_len[attr->alt_port_num]) goto out; - if (mlx4_set_path(dev, &attr->alt_ah_attr, &context->alt_path, + if (mlx4_set_path(dev, &attr->alt_ah_attr, &context->path.alt_path, attr->alt_port_num)) goto out; - context->alt_path.pkey_index = attr->alt_pkey_index; - context->alt_path.ackto = attr->alt_timeout << 3; + context->path.alt_path.pkey_index = attr->alt_pkey_index; + context->path.alt_path.ackto = attr->alt_timeout << 3; optpar |= MLX4_QP_OPTPAR_ALT_ADDR_PATH; } @@ -1048,11 +1089,11 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp, new_state == IB_QPS_RTR && (ibqp->qp_type == IB_QPT_GSI || ibqp->qp_type == IB_QPT_SMI || ibqp->qp_type == IB_QPT_UD)) { - context->pri_path.sched_queue = (qp->port - 1) << 6; + context->path.pri_path.sched_queue = (qp->port - 1) << 6; if (is_qp0(dev, qp)) - context->pri_path.sched_queue |= MLX4_IB_DEFAULT_QP0_SCHED_QUEUE; + context->path.pri_path.sched_queue |= MLX4_IB_DEFAULT_QP0_SCHED_QUEUE; else - context->pri_path.sched_queue |= MLX4_IB_DEFAULT_SCHED_QUEUE; + context->path.pri_path.sched_queue |= MLX4_IB_DEFAULT_SCHED_QUEUE; } if (cur_state == IB_QPS_RTS && new_state == IB_QPS_SQD && @@ -1061,6 +1102,17 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp, else sqd_event = 0; + if (attr_mask & IB_QP_RCA) { + context->path.pri_path.rss.tbl_sz_base_qpn = + cpu_to_be32(attr->rca.base_qpn | ilog2(attr->rca.num_qpn) << 24); + context->path.pri_path.rss.default_qpn = cpu_to_be32(attr->rca.default_qpn); + context->rca.key.flags_hash_fn = cpu_to_be32(MLX4_RCA_TCP_IPV6 | + MLX4_RCA_IPV6 | + MLX4_RCA_TCP_IPV4 | + MLX4_RCA_IPV4); + memset(context->rca.key.rca_key, 0, sizeof context->rca.key.rca_key); + } + /* * Before passing a kernel QP to the HW, make sure that the * ownership bits of the send queue are set and the SQ @@ -1182,6 +1234,12 @@ int mlx4_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, goto out; } + if (attr_mask & IB_QP_RCA){ + if ((cur_state != IB_QPS_RESET || new_state != IB_QPS_INIT) && + (cur_state != IB_QPS_RTS || new_state != IB_QPS_RTS)) + goto out; + } + err = __mlx4_ib_modify_qp(ibqp, attr, attr_mask, cur_state, new_state); out: @@ -1805,17 +1863,17 @@ int mlx4_ib_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr, int qp_attr to_ib_qp_access_flags(be32_to_cpu(context.params2)); if (qp->ibqp.qp_type == IB_QPT_RC || qp->ibqp.qp_type == IB_QPT_UC) { - to_ib_ah_attr(dev->dev, &qp_attr->ah_attr, &context.pri_path); - to_ib_ah_attr(dev->dev, &qp_attr->alt_ah_attr, &context.alt_path); - qp_attr->alt_pkey_index = context.alt_path.pkey_index & 0x7f; + to_ib_ah_attr(dev->dev, &qp_attr->ah_attr, &context.path.pri_path); + to_ib_ah_attr(dev->dev, &qp_attr->alt_ah_attr, &context.path.alt_path); + qp_attr->alt_pkey_index = context.path.alt_path.pkey_index & 0x7f; qp_attr->alt_port_num = qp_attr->alt_ah_attr.port_num; } - qp_attr->pkey_index = context.pri_path.pkey_index & 0x7f; + qp_attr->pkey_index = context.path.pri_path.pkey_index & 0x7f; if (qp_attr->qp_state == IB_QPS_INIT) qp_attr->port_num = qp->port; else - qp_attr->port_num = context.pri_path.sched_queue & 0x40 ? 2 : 1; + qp_attr->port_num = context.path.pri_path.sched_queue & 0x40 ? 2 : 1; /* qp_attr->en_sqd_async_notify is only applicable in modify qp */ qp_attr->sq_draining = mlx4_state == MLX4_QP_STATE_SQ_DRAINING; @@ -1826,10 +1884,10 @@ int mlx4_ib_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr, int qp_attr 1 << ((be32_to_cpu(context.params2) >> 21) & 0x7); qp_attr->min_rnr_timer = (be32_to_cpu(context.rnr_nextrecvpsn) >> 24) & 0x1f; - qp_attr->timeout = context.pri_path.ackto >> 3; + qp_attr->timeout = context.path.pri_path.ackto >> 3; qp_attr->retry_cnt = (be32_to_cpu(context.params1) >> 16) & 0x7; qp_attr->rnr_retry = (be32_to_cpu(context.params1) >> 13) & 0x7; - qp_attr->alt_timeout = context.alt_path.ackto >> 3; + qp_attr->alt_timeout = context.path.alt_path.ackto >> 3; done: qp_attr->cur_qp_state = qp_attr->qp_state; diff --git a/include/linux/mlx4/qp.h b/include/linux/mlx4/qp.h index 1bb2ba4..333afce 100644 --- a/include/linux/mlx4/qp.h +++ b/include/linux/mlx4/qp.h @@ -95,11 +95,22 @@ enum { MLX4_QP_BIT_RIC = 1 << 4, }; +struct mlx4_net_path { + u16 reserved; + __be16 dmac_hi; + __be32 dmac_lo; +}; + +struct mlx4_rss_path { + __be32 tbl_sz_base_qpn; + __be32 default_qpn; +}; + struct mlx4_qp_path { u8 fl; u8 reserved1[2]; u8 pkey_index; - u8 reserved2; + u8 counter_index; u8 grh_mylmc; __be16 rlid; u8 ackto; @@ -111,8 +122,33 @@ struct mlx4_qp_path { u8 sched_queue; u8 snooper_flags; u8 reserved3[2]; - u8 counter_index; - u8 reserved4[7]; + union { + struct mlx4_net_path net; + struct mlx4_rss_path rss; + }; +}; + +struct mlx4_addr_path { + struct mlx4_qp_path pri_path; + struct mlx4_qp_path alt_path; +}; + +enum { + MLX4_RCA_TCP_IPV6 = 1 << 2, + MLX4_RCA_IPV6 = 1 << 3, + MLX4_RCA_TCP_IPV4 = 1 << 4, + MLX4_RCA_IPV4 = 1 << 5, + MLX4_HASH_FN_OFF = 8 +}; + +struct mlx4_rca_key { + __be32 flags_hash_fn; + __be32 rca_key[10]; +}; + +struct mlx4_rca { + struct mlx4_qp_path pri_path; + struct mlx4_rca_key key; }; struct mlx4_qp_context { @@ -125,8 +161,10 @@ struct mlx4_qp_context { __be32 usr_page; __be32 local_qpn; __be32 remote_qpn; - struct mlx4_qp_path pri_path; - struct mlx4_qp_path alt_path; + union { + struct mlx4_addr_path path; + struct mlx4_rca rca; + }; __be32 params1; u32 reserved1; __be32 next_send_psn; -- 1.5.6 _______________________________________________ ewg mailing list ewg@lists.openfabrics.org http://lists.openfabrics.org/cgi-bin/mailman/listinfo/ewg