RE: [PATCH 1/3] IB/core: Add support of checksum capability reporting in ib verbs

2015-09-16 Thread Bodong Wang
For RX: if corresponding QP is not supported, it will not validate the csum, 
but packets are still received normally. 
For TX: if corresponding QP is not supported for csum calculation and user 
application sets the IBV_SEND_IP_CSUM flag, it will return error.

-Original Message-
From: Christoph Lameter [mailto:c...@linux.com] 
Sent: Wednesday, September 16, 2015 12:07 PM
To: Bodong Wang
Cc: dledf...@redhat.com; linux-rdma@vger.kernel.org; Bodong Wang; Or Gerlitz; 
jguntho...@obsidianresearch.com; Moshe Lazer; Haggai Eran; Matan Barak
Subject: Re: [PATCH 1/3] IB/core: Add support of checksum capability reporting 
in ib verbs

On Wed, 16 Sep 2015, Bodong Wang wrote:

> A new filed csum_cap is added to both ib_query_device. It contains two 
> members:
> eth_csum_cap and ib_csum_cap, indicates checksum capability of 
> Ethernet and Infiniband link layer respectively for different QP types.
>
> Current checksum caps use the following enum members:
> - IB_CSUM_SUPPORT_UD: device supports validation/calculation of csum for UD 
> QP.
> - IB_CSUM_SUPPORT_RAW: device supports validation/calculation of csum for raw 
> QP.

A combination? Is it possible then to also support calculation without 
validation? Maybe we want to receive packets that do have invalid checksums.

--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 1/2] Update ibv_create_flow/ibv_destroy_flow according to change of libibverbs

2015-09-16 Thread Bodong Wang
Signed-off-by: Bodong Wang 
---
 src/mlx4.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/mlx4.c b/src/mlx4.c
index 2999150..9fe8c6a 100644
--- a/src/mlx4.c
+++ b/src/mlx4.c
@@ -203,8 +203,8 @@ static int mlx4_init_context(struct verbs_device *v_device,
verbs_set_ctx_op(verbs_ctx, get_srq_num, verbs_get_srq_num);
verbs_set_ctx_op(verbs_ctx, create_qp_ex, mlx4_create_qp_ex);
verbs_set_ctx_op(verbs_ctx, open_qp, mlx4_open_qp);
-   verbs_set_ctx_op(verbs_ctx, drv_ibv_create_flow, ibv_cmd_create_flow);
-   verbs_set_ctx_op(verbs_ctx, drv_ibv_destroy_flow, ibv_cmd_destroy_flow);
+   verbs_set_ctx_op(verbs_ctx, ibv_create_flow, ibv_cmd_create_flow);
+   verbs_set_ctx_op(verbs_ctx, ibv_destroy_flow, ibv_cmd_destroy_flow);
 
return 0;
 
-- 
1.8.3.1

--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 2/2] Add support for TX/RX checksum offload

2015-09-16 Thread Bodong Wang
RX checksum verification status is reported through wc_flag when polling CQ if
device supports checksum offload. When IBV_WC_IP_CSUM_OK is set, that means
both IPv4 header checksum and TCP/UDP checksum are OK.

TX checksum offload will be enabled for TCP/UDP over IPv4 if user sets
send_flag IBV_SEND_IP_CSUM and device supports checksum offload.

A new field: qp_cap_cache, is added to mlx4_qp in order to 'cache' the device
capabilities to minimize performance hit on poll_one and post_send function.
The capabilities are set inside mlx4_modify_qp. Post_send will return error
if device doesn't support checksum but user sets flag IBV_SEND_IP_CSUM.

Signed-off-by: Bodong Wang 
---
 src/cq.c|  6 ++
 src/mlx4.c  |  1 +
 src/mlx4.h  | 23 ++-
 src/qp.c| 19 +++
 src/verbs.c | 54 ++
 src/wqe.h   |  8 +---
 6 files changed, 107 insertions(+), 4 deletions(-)

diff --git a/src/cq.c b/src/cq.c
index 8b27795..32c9070 100644
--- a/src/cq.c
+++ b/src/cq.c
@@ -329,6 +329,12 @@ static int mlx4_poll_one(struct mlx4_cq *cq,
wc->sl = ntohs(cqe->sl_vid) >> 13;
else
wc->sl = ntohs(cqe->sl_vid) >> 12;
+
+   if ((*cur_qp) && ((*cur_qp)->qp_cap_cache & 
MLX4_RX_CSUM_VALID)) {
+   wc->wc_flags |= ((cqe->status & 
htonl(MLX4_CQE_STATUS_IPV4_CSUM_OK)) ==
+htonl(MLX4_CQE_STATUS_IPV4_CSUM_OK)) <<
+   IBV_WC_IP_CSUM_OK_SHIFT;
+   }
}
 
return CQ_OK;
diff --git a/src/mlx4.c b/src/mlx4.c
index 9fe8c6a..427a3a8 100644
--- a/src/mlx4.c
+++ b/src/mlx4.c
@@ -205,6 +205,7 @@ static int mlx4_init_context(struct verbs_device *v_device,
verbs_set_ctx_op(verbs_ctx, open_qp, mlx4_open_qp);
verbs_set_ctx_op(verbs_ctx, ibv_create_flow, ibv_cmd_create_flow);
verbs_set_ctx_op(verbs_ctx, ibv_destroy_flow, ibv_cmd_destroy_flow);
+   verbs_set_ctx_op(verbs_ctx, query_device_ex, mlx4_query_device_ex);
 
return 0;
 
diff --git a/src/mlx4.h b/src/mlx4.h
index d71450f..7e229d7 100644
--- a/src/mlx4.h
+++ b/src/mlx4.h
@@ -257,6 +257,7 @@ struct mlx4_qp {
struct mlx4_wq  rq;
 
uint8_t link_layer;
+   uint32_tqp_cap_cache;
 };
 
 struct mlx4_av {
@@ -279,6 +280,22 @@ struct mlx4_ah {
uint8_t mac[6];
 };
 
+enum {
+   MLX4_CSUM_SUPPORT_UD_OVER_IB= (1 <<  0),
+   MLX4_CSUM_SUPPORT_RAW_OVER_ETH  = (1 <<  1),
+   /* Only report rx checksum when the validation is valid */
+   MLX4_RX_CSUM_VALID  = (1 <<  16),
+};
+
+enum mlx4_cqe_status {
+   MLX4_CQE_STATUS_TCP_UDP_CSUM_OK = (1 <<  2),
+   MLX4_CQE_STATUS_IPV4_PKT= (1 << 22),
+   MLX4_CQE_STATUS_IP_HDR_CSUM_OK  = (1 << 28),
+   MLX4_CQE_STATUS_IPV4_CSUM_OK= MLX4_CQE_STATUS_IPV4_PKT |
+   MLX4_CQE_STATUS_IP_HDR_CSUM_OK |
+   MLX4_CQE_STATUS_TCP_UDP_CSUM_OK
+};
+
 struct mlx4_cqe {
uint32_tvlan_my_qpn;
uint32_timmed_rss_invalid;
@@ -286,7 +303,7 @@ struct mlx4_cqe {
uint8_t sl_vid;
uint8_t reserved1;
uint16_trlid;
-   uint32_treserved2;
+   uint32_tstatus;
uint32_tbyte_cnt;
uint16_twqe_index;
uint16_tchecksum;
@@ -352,6 +369,10 @@ void mlx4_free_db(struct mlx4_context *context, enum 
mlx4_db_type type, uint32_t
 
 int mlx4_query_device(struct ibv_context *context,
   struct ibv_device_attr *attr);
+int mlx4_query_device_ex(struct ibv_context *context,
+const struct ibv_query_device_ex_input *input,
+struct ibv_device_attr_ex *attr,
+size_t attr_size);
 int mlx4_query_port(struct ibv_context *context, uint8_t port,
 struct ibv_port_attr *attr);
 
diff --git a/src/qp.c b/src/qp.c
index 721bed4..057490b 100644
--- a/src/qp.c
+++ b/src/qp.c
@@ -289,12 +289,31 @@ int mlx4_post_send(struct ibv_qp *ibqp, struct 
ibv_send_wr *wr,
set_datagram_seg(wqe, wr);
wqe  += sizeof (struct mlx4_wqe_datagram_seg);
size += sizeof (struct mlx4_wqe_datagram_seg) / 16;
+
+   if (wr->send_flags & IBV_SEND_IP_CSUM) {
+   if (!(qp->qp_cap_cache & 
MLX4_CSUM_SUPPORT_UD_OVER_IB)) {
+   ret = EINVAL;
+   *bad_wr = wr;
+   goto out;

[PATCH 1/1] Add support for TX/RX checksum offload

2015-09-16 Thread Bodong Wang
Add a device capability field csum_cap to denote IPv4 checksum offload
support. Devices should configure this field if they support
insertion/verification of IPv4, TCP and UDP checksums on outgoing/incoming
IPv4 packets according link layer and QP types.

Flags IBV_SEND_IP_CSUM and IBV_WC_IP_CSUM_OK are added for utilizing this
capability for send and receive separately.

Signed-off-by: Bodong Wang 
---
 examples/devinfo.c| 33 +
 include/infiniband/kern-abi.h |  7 +++
 include/infiniband/verbs.h| 22 --
 man/ibv_poll_cq.3 |  5 +
 man/ibv_post_send.3   |  4 
 src/cmd.c | 13 +
 6 files changed, 82 insertions(+), 2 deletions(-)

diff --git a/examples/devinfo.c b/examples/devinfo.c
index a8de982..46d4614 100644
--- a/examples/devinfo.c
+++ b/examples/devinfo.c
@@ -253,6 +253,38 @@ void print_odp_caps(const struct ibv_odp_caps *caps)
print_odp_trans_caps(caps->per_transport_caps.ud_odp_caps);
 }
 
+void print_csum_caps(const struct ibv_csum_cap_per_link *caps)
+{
+   uint32_t unknown_csum_caps = ~(IBV_CSUM_SUPPORT_RAW |
+  IBV_CSUM_SUPPORT_UD);
+
+   printf("\teth_csum_cap:\n");
+   if (!caps->eth_csum_cap) {
+   printf("\t\t\t\t\tNO_SUPPORT\n");
+   } else {
+   if (caps->eth_csum_cap & IBV_CSUM_SUPPORT_RAW)
+   printf("\t\t\t\t\tRAW_QP_SUPPORT\n");
+   if (caps->eth_csum_cap & IBV_CSUM_SUPPORT_UD)
+   printf("\t\t\t\t\tUD_QP_SUPPORT\n");
+   if (caps->eth_csum_cap & unknown_csum_caps)
+   printf("\t\t\t\t\tUnknown flags: 0x%" PRIX32 "\n",
+  caps->eth_csum_cap & unknown_csum_caps);
+   }
+
+   printf("\tib_csum_cap:\n");
+   if (!caps->ib_csum_cap) {
+   printf("\t\t\t\t\tNO_SUPPORT\n");
+   } else {
+   if (caps->ib_csum_cap & IBV_CSUM_SUPPORT_RAW)
+   printf("\t\t\t\t\tRAW_QP_SUPPORT\n");
+   if (caps->ib_csum_cap & IBV_CSUM_SUPPORT_UD)
+   printf("\t\t\t\t\tUD_QP_SUPPORT\n");
+   if (caps->ib_csum_cap & unknown_csum_caps)
+   printf("\t\t\t\t\tUnknown flags: 0x%" PRIX32 "\n",
+  caps->ib_csum_cap & unknown_csum_caps);
+   }
+}
+
 static int print_hca_cap(struct ibv_device *ib_dev, uint8_t ib_port)
 {
struct ibv_context *ctx;
@@ -339,6 +371,7 @@ static int print_hca_cap(struct ibv_device *ib_dev, uint8_t 
ib_port)
printf("\tlocal_ca_ack_delay:\t\t%d\n", 
device_attr.orig_attr.local_ca_ack_delay);
 
print_odp_caps(&device_attr.odp_caps);
+   print_csum_caps(&device_attr.csum_cap);
}
 
for (port = 1; port <= device_attr.orig_attr.phys_port_cnt; ++port) {
diff --git a/include/infiniband/kern-abi.h b/include/infiniband/kern-abi.h
index 800c5ab..51d4fb0 100644
--- a/include/infiniband/kern-abi.h
+++ b/include/infiniband/kern-abi.h
@@ -262,11 +262,18 @@ struct ibv_odp_caps_resp {
__u32 reserved;
 };
 
+struct ibv_csum_cap_per_link_resp {
+   __u32 eth_csum_cap;
+   __u32 ib_csum_cap;
+};
+
 struct ibv_query_device_resp_ex {
struct ibv_query_device_resp base;
__u32 comp_mask;
__u32 response_length;
struct ibv_odp_caps_resp odp_caps;
+   __u64 reserved0[2];
+   struct ibv_csum_cap_per_link_resp csum_cap;
 };
 
 struct ibv_query_port {
diff --git a/include/infiniband/verbs.h b/include/infiniband/verbs.h
index 1ff5265..134359f 100644
--- a/include/infiniband/verbs.h
+++ b/include/infiniband/verbs.h
@@ -196,10 +196,16 @@ enum ibv_odp_general_caps {
IBV_ODP_SUPPORT = 1 << 0,
 };
 
+struct ibv_csum_cap_per_link {
+   uint32_t eth_csum_cap;
+   uint32_t ib_csum_cap;
+};
+
 struct ibv_device_attr_ex {
struct ibv_device_attr  orig_attr;
uint32_tcomp_mask;
struct ibv_odp_caps odp_caps;
+   struct ibv_csum_cap_per_link csum_cap;
 };
 
 enum ibv_mtu {
@@ -348,9 +354,14 @@ enum ibv_wc_opcode {
IBV_WC_RECV_RDMA_WITH_IMM
 };
 
+enum {
+   IBV_WC_IP_CSUM_OK_SHIFT = 2
+};
+
 enum ibv_wc_flags {
IBV_WC_GRH  = 1 << 0,
-   IBV_WC_WITH_IMM = 1 << 1
+   IBV_WC_WITH_IMM = 1 << 1,
+   IBV_WC_IP_CSUM_OK   = 1 << IBV_WC_IP_CSUM_OK_SHIFT
 };
 
 struct ibv_wc {
@@ -646,6 +657,11 @@ enum ibv_mig_state {
IBV_MIG_ARMED
 };
 
+enum ibv_csum_cap_flags {
+   IBV_CSUM_SUPPORT_UD = 1 << IBV_QPT_UD,
+   IBV_CSUM_SUPPORT_RAW= 1 << IBV_QPT_RAW_

[PATCH 1/3] IB/core: Add support of checksum capability reporting in ib verbs

2015-09-16 Thread Bodong Wang
A new filed csum_cap is added to both ib_query_device. It contains two members:
eth_csum_cap and ib_csum_cap, indicates checksum capability of Ethernet and
Infiniband link layer respectively for different QP types.

Current checksum caps use the following enum members:
- IB_CSUM_SUPPORT_UD: device supports validation/calculation of csum for UD QP.
- IB_CSUM_SUPPORT_RAW: device supports validation/calculation of csum for raw 
QP.

Signed-off-by: Bodong Wang 
---
 include/rdma/ib_verbs.h | 10 ++
 1 file changed, 10 insertions(+)

diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h
index b0f898e..94dbaee 100644
--- a/include/rdma/ib_verbs.h
+++ b/include/rdma/ib_verbs.h
@@ -183,6 +183,11 @@ struct ib_cq_init_attr {
u32 flags;
 };
 
+struct ib_csum_cap_per_link {
+   uint32_t  eth_csum_cap;
+   uint32_t  ib_csum_cap;
+};
+
 struct ib_device_attr {
u64 fw_ver;
__be64  sys_image_guid;
@@ -229,6 +234,7 @@ struct ib_device_attr {
struct ib_odp_caps  odp_caps;
uint64_ttimestamp_mask;
uint64_thca_core_clock; /* in KHZ */
+   struct ib_csum_cap_per_link csum_cap;
 };
 
 enum ib_mtu {
@@ -868,6 +874,10 @@ enum ib_qp_create_flags {
IB_QP_CREATE_RESERVED_END   = 1 << 31,
 };
 
+enum ib_csum_cap_flags {
+   IB_CSUM_SUPPORT_UD  = (1 << IB_QPT_UD),
+   IB_CSUM_SUPPORT_RAW = (1 << IB_QPT_RAW_PACKET),
+};
 
 /*
  * Note: users may not call ib_close_qp or ib_destroy_qp from the event_handler
-- 
1.8.3.1

--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 2/3] IB/uverbs: Add support for checksum capability reporting in user verbs

2015-09-16 Thread Bodong Wang
New field csum_cap is added to respective uverbs counterpart according
to ib_verbs.

Signed-off-by: Bodong Wang 
---
 drivers/infiniband/core/uverbs_cmd.c | 7 +++
 include/uapi/rdma/ib_user_verbs.h| 6 ++
 2 files changed, 13 insertions(+)

diff --git a/drivers/infiniband/core/uverbs_cmd.c 
b/drivers/infiniband/core/uverbs_cmd.c
index bbb02ff..9d5deec 100644
--- a/drivers/infiniband/core/uverbs_cmd.c
+++ b/drivers/infiniband/core/uverbs_cmd.c
@@ -3464,6 +3464,13 @@ int ib_uverbs_ex_query_device(struct ib_uverbs_file 
*file,
resp.hca_core_clock = attr.hca_core_clock;
resp.response_length += sizeof(resp.hca_core_clock);
 
+   if (ucore->outlen < resp.response_length + sizeof(resp.csum_cap))
+   goto end;
+
+   resp.csum_cap.eth_csum_cap = attr.csum_cap.eth_csum_cap;
+   resp.csum_cap.ib_csum_cap = attr.csum_cap.ib_csum_cap;
+   resp.response_length += sizeof(resp.csum_cap);
+
 end:
err = ib_copy_to_udata(ucore, &resp, resp.response_length);
if (err)
diff --git a/include/uapi/rdma/ib_user_verbs.h 
b/include/uapi/rdma/ib_user_verbs.h
index 978841e..9d69546 100644
--- a/include/uapi/rdma/ib_user_verbs.h
+++ b/include/uapi/rdma/ib_user_verbs.h
@@ -218,6 +218,11 @@ struct ib_uverbs_odp_caps {
__u32 reserved;
 };
 
+struct ib_uverbs_csum_cap_per_link {
+   __u32 eth_csum_cap;
+   __u32 ib_csum_cap;
+};
+
 struct ib_uverbs_ex_query_device_resp {
struct ib_uverbs_query_device_resp base;
__u32 comp_mask;
@@ -225,6 +230,7 @@ struct ib_uverbs_ex_query_device_resp {
struct ib_uverbs_odp_caps odp_caps;
__u64 timestamp_mask;
__u64 hca_core_clock; /* in KHZ */
+   struct ib_uverbs_csum_cap_per_link csum_cap;
 };
 
 struct ib_uverbs_query_port {
-- 
1.8.3.1

--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 0/3] Enable checksum offload capability reporting

2015-09-16 Thread Bodong Wang
The checksum offload capability reporting is enabled based on extended verbs.
The capability field has sub-fields for every link layer, and depends on device
cap, each link layer will support specific QP types. These will be reported to
user space.

I'm new to uverbs extensions and looking forward for review comments on that
aspect of the patches.

Bodong Wang (3):
  IB/core: Add support of checksum capability reporting in ib verbs
  IB/uverbs: Add support for checksum capability reporting in user verbs
  IB/mlx4: Report checksum offload cap when query device

 drivers/infiniband/core/uverbs_cmd.c |  7 +++
 drivers/infiniband/hw/mlx4/main.c|  3 +++
 include/rdma/ib_verbs.h  | 10 ++
 include/uapi/rdma/ib_user_verbs.h|  6 ++
 4 files changed, 26 insertions(+)

-- 
1.8.3.1

--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 3/3] IB/mlx4: Report checksum offload cap when query device

2015-09-16 Thread Bodong Wang
Signed-off-by: Bodong Wang 
---
 drivers/infiniband/hw/mlx4/main.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/infiniband/hw/mlx4/main.c 
b/drivers/infiniband/hw/mlx4/main.c
index 8be6db8..a70ca6a 100644
--- a/drivers/infiniband/hw/mlx4/main.c
+++ b/drivers/infiniband/hw/mlx4/main.c
@@ -217,6 +217,9 @@ static int mlx4_ib_query_device(struct ib_device *ibdev,
props->device_cap_flags |= IB_DEVICE_MANAGED_FLOW_STEERING;
}
 
+   props->csum_cap.eth_csum_cap |= IB_CSUM_SUPPORT_RAW;
+   props->csum_cap.ib_csum_cap |= IB_CSUM_SUPPORT_UD;
+
props->vendor_id   = be32_to_cpup((__be32 *) (out_mad->data + 
36)) &
0xff;
props->vendor_part_id  = dev->dev->persist->pdev->device;
-- 
1.8.3.1

--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH libmlx4] Add support for TX/RX checksum offload

2015-08-17 Thread Bodong Wang
RX checksum verification status is reported through wc_flag when polling
CQ. When IBV_WC_IP_CSUM_OK is set, that means both IPv4 header checksum and
TCP/UDP checksum are OK.

TX checksum offload will be enabled for TCP/UDP over IPv4 if user sets
send_flag IBV_SEND_IP_CSUM.

A new field, qp_cap_cache, is added to mlx4_qp in order to 'cache'
the device capabilities to minimize perfromance hit on poll_one
function. The capabilities are set during mlx4_modify_qp for RAW ETH
and UD QPs.

Signed-off-by: Bodong Wang 
---
 src/cq.c|  8 
 src/mlx4.h  | 16 +++-
 src/qp.c| 11 ++-
 src/verbs.c | 12 
 src/wqe.h   |  5 +
 5 files changed, 50 insertions(+), 2 deletions(-)

diff --git a/src/cq.c b/src/cq.c
index 8b27795..186b960 100644
--- a/src/cq.c
+++ b/src/cq.c
@@ -329,6 +329,14 @@ static int mlx4_poll_one(struct mlx4_cq *cq,
wc->sl = ntohs(cqe->sl_vid) >> 13;
else
wc->sl = ntohs(cqe->sl_vid) >> 12;
+
+   if ((*cur_qp) && ((*cur_qp)->qp_cap_cache & MLX4_RX_CSUM_IPV4)) 
{
+   uint32_t status = ntohl(cqe->status);
+   wc->wc_flags |= ((status & 
MLX4_CQE_STATUS_IPV4_CSUM_OK) ==
+MLX4_CQE_STATUS_IPV4_CSUM_OK) <<
+   IBV_WC_IP_CSUM_OK_SHIFT;
+   }
+
}
 
return CQ_OK;
diff --git a/src/mlx4.h b/src/mlx4.h
index d71450f..ec7c58d 100644
--- a/src/mlx4.h
+++ b/src/mlx4.h
@@ -257,6 +257,7 @@ struct mlx4_qp {
struct mlx4_wq  rq;
 
uint8_t link_layer;
+   uint8_t qp_cap_cache;
 };
 
 struct mlx4_av {
@@ -279,6 +280,19 @@ struct mlx4_ah {
uint8_t mac[6];
 };
 
+enum {
+   MLX4_RX_CSUM_IPV4 = (1 <<  0),
+};
+
+enum mlx4_cqe_status {
+   MLX4_CQE_STATUS_TCP_UDP_CSUM_OK = (1 <<  2),
+   MLX4_CQE_STATUS_IPV4_PKT= (1 << 22),
+   MLX4_CQE_STATUS_IP_HDR_CSUM_OK  = (1 << 28),
+   MLX4_CQE_STATUS_IPV4_CSUM_OK= MLX4_CQE_STATUS_IPV4_PKT |
+   MLX4_CQE_STATUS_IP_HDR_CSUM_OK |
+   MLX4_CQE_STATUS_TCP_UDP_CSUM_OK
+};
+
 struct mlx4_cqe {
uint32_tvlan_my_qpn;
uint32_timmed_rss_invalid;
@@ -286,7 +300,7 @@ struct mlx4_cqe {
uint8_t sl_vid;
uint8_t reserved1;
uint16_trlid;
-   uint32_treserved2;
+   uint32_tstatus;
uint32_tbyte_cnt;
uint16_twqe_index;
uint16_tchecksum;
diff --git a/src/qp.c b/src/qp.c
index 721bed4..eb4c488 100644
--- a/src/qp.c
+++ b/src/qp.c
@@ -200,6 +200,7 @@ int mlx4_post_send(struct ibv_qp *ibqp, struct ibv_send_wr 
*wr,
int ret = 0;
int size;
int i;
+   int is_csum;
 
pthread_spin_lock(&qp->sq.lock);
 
@@ -286,15 +287,23 @@ int mlx4_post_send(struct ibv_qp *ibqp, struct 
ibv_send_wr *wr,
break;
 
case IBV_QPT_UD:
+   is_csum = !!(wr->send_flags & IBV_SEND_IP_CSUM);
set_datagram_seg(wqe, wr);
wqe  += sizeof (struct mlx4_wqe_datagram_seg);
size += sizeof (struct mlx4_wqe_datagram_seg) / 16;
+
+   ctrl->srcrb_flags |=
+   htonl((is_csum << 
MLX4_WQE_CTRL_IP_HDR_CSUM_SHIFT) |
+ (is_csum << 
MLX4_WQE_CTRL_TCP_UDP_CSUM_SHIFT));
break;
 
case IBV_QPT_RAW_PACKET:
+   is_csum = !!(wr->send_flags & IBV_SEND_IP_CSUM);
/* For raw eth, the MLX4_WQE_CTRL_SOLICIT flag is used
 * to indicate that no icrc should be calculated */
-   ctrl->srcrb_flags |= htonl(MLX4_WQE_CTRL_SOLICIT);
+   ctrl->srcrb_flags |= htonl(MLX4_WQE_CTRL_SOLICIT |
+   (is_csum << MLX4_WQE_CTRL_IP_HDR_CSUM_SHIFT) |
+   (is_csum << MLX4_WQE_CTRL_TCP_UDP_CSUM_SHIFT));
break;
 
default:
diff --git a/src/verbs.c b/src/verbs.c
index 623d576..3fbbbf9 100644
--- a/src/verbs.c
+++ b/src/verbs.c
@@ -606,14 +606,26 @@ int mlx4_modify_qp(struct ibv_qp *qp, struct ibv_qp_attr 
*attr,
struct ibv_modify_qp cmd;
struct ibv_port_attr port_attr;
struct mlx4_qp *mqp = to_mqp(qp);
+   struct ibv_device_attr device_attr;
int ret;
 
+   memset(&device_attr, 0, sizeof(device_attr));
if (attr_mask & IBV_QP_PORT) {
ret = ibv_query_port(qp->c

[PATCH libibverbs] Add support for TX/RX checksum offload

2015-08-17 Thread Bodong Wang
Add a device capability flag IBV_DEVICE_IP_CSUM to denote IPv4 checksum
offload support. Devices should set this flag if they support
insertion/verification of IPv4, TCP and UDP checksums on
outgoing/incoming IPv4 packets sent over IB UD or ETH RAW PACKET QPs.

Flags IBV_SEND_IP_CSUM and IBV_WC_IP_CSUM_OK are added for utilizing this
capability for send and receive separately.

Change-Id: Ie02d708dcbef07ca0d2eac1b156f12aafdba6a97
Signed-off-by: Moshe Lazer 
Signed-off-by: Or Gerlitz 
Signed-off-by: Bodong Wang 
---
 include/infiniband/verbs.h | 11 +--
 man/ibv_poll_cq.3  |  3 +++
 man/ibv_post_send.3|  4 
 3 files changed, 16 insertions(+), 2 deletions(-)

diff --git a/include/infiniband/verbs.h b/include/infiniband/verbs.h
index 28e1586..6ae7e6e 100644
--- a/include/infiniband/verbs.h
+++ b/include/infiniband/verbs.h
@@ -115,6 +115,7 @@ enum ibv_device_cap_flags {
IBV_DEVICE_RC_RNR_NAK_GEN   = 1 << 12,
IBV_DEVICE_SRQ_RESIZE   = 1 << 13,
IBV_DEVICE_N_NOTIFY_CQ  = 1 << 14,
+   IBV_DEVICE_IP_CSUM  = 1 << 18,
IBV_DEVICE_XRC  = 1 << 20,
IBV_DEVICE_MANAGED_FLOW_STEERING = 1 << 29
 };
@@ -314,9 +315,14 @@ enum ibv_wc_opcode {
IBV_WC_RECV_RDMA_WITH_IMM
 };
 
+enum {
+   IBV_WC_IP_CSUM_OK_SHIFT = 2
+};
+
 enum ibv_wc_flags {
IBV_WC_GRH  = 1 << 0,
-   IBV_WC_WITH_IMM = 1 << 1
+   IBV_WC_WITH_IMM = 1 << 1,
+   IBV_WC_IP_CSUM_OK   = 1 << IBV_WC_IP_CSUM_OK_SHIFT
 };
 
 struct ibv_wc {
@@ -653,7 +659,8 @@ enum ibv_send_flags {
IBV_SEND_FENCE  = 1 << 0,
IBV_SEND_SIGNALED   = 1 << 1,
IBV_SEND_SOLICITED  = 1 << 2,
-   IBV_SEND_INLINE = 1 << 3
+   IBV_SEND_INLINE = 1 << 3,
+   IBV_SEND_IP_CSUM= 1 << 4
 };
 
 struct ibv_sge {
diff --git a/man/ibv_poll_cq.3 b/man/ibv_poll_cq.3
index 57c6daa..539940d 100644
--- a/man/ibv_poll_cq.3
+++ b/man/ibv_poll_cq.3
@@ -50,6 +50,9 @@ It is either 0 or the bitwise OR of one or more of the 
following flags:
 .B IBV_WC_GRH \fR  GRH is present (valid only for UD QPs)
 .TP
 .B IBV_WC_WITH_IMM \fR Immediate data value is valid
+.TP
+.B IBV_WC_IP_CSUM_OK \fR TCP/UDP checksum over IPv4 and IPv4 header checksum 
are verified.
+This feature is supported only when \fBIBV_DEVICE_IP_CSUM\fR flag is set in 
the device capability flags.
 .PP
 Not all
 .I wc
diff --git a/man/ibv_post_send.3 b/man/ibv_post_send.3
index 33fbb50..3b07bcb 100644
--- a/man/ibv_post_send.3
+++ b/man/ibv_post_send.3
@@ -98,6 +98,10 @@ The attribute send_flags describes the properties of the 
\s-1WR\s0. It is either
 .TP
 .B IBV_SEND_INLINE \fR Send data in given gather list as inline data
 in a send WQE.  Valid only for Send and RDMA Write.  The L_Key will not be 
checked.
++.TP
++.B IBV_SEND_IP_CSUM \fR Offload the IPv4 and TCP/UDP checksum calculation.
++Valid only for QPs with Transport Service Type \fBIBV_QPT_UD\fR or 
\fBIBV_QPT_RAW_PACKET\fR.
++This feature supported only when \fBIBV_DEVICE_IP_CSUM\fR the flag is set in 
the device capability flags.
 .SH "RETURN VALUE"
 .B ibv_post_send()
 returns 0 on success, or the value of errno on failure (which indicates the 
failure reason).
-- 
1.8.3.1

--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html