RE: [[PATCH v1] 05/37] [CIFS] SMBD: Implement API for upper layer to create SMBD transport and establish RDMA connection

2017-08-29 Thread Long Li


> -Original Message-
> From: Tom Talpey
> Sent: Monday, August 14, 2017 12:55 PM
> To: Long Li ; Steve French ;
> linux-c...@vger.kernel.org; samba-techni...@lists.samba.org; linux-
> ker...@vger.kernel.org; linux-r...@vger.kernel.org
> Subject: RE: [[PATCH v1] 05/37] [CIFS] SMBD: Implement API for upper layer
> to create SMBD transport and establish RDMA connection
> 
> > -Original Message-
> > From: linux-cifs-ow...@vger.kernel.org [mailto:linux-cifs-
> > ow...@vger.kernel.org] On Behalf Of Long Li
> > Sent: Wednesday, August 2, 2017 4:10 PM
> > To: Steve French ; linux-c...@vger.kernel.org;
> > samba- techni...@lists.samba.org; linux-kernel@vger.kernel.org
> > Cc: Long Li 
> > Subject: [[PATCH v1] 05/37] [CIFS] SMBD: Implement API for upper layer
> > to create SMBD transport and establish RDMA connection
> >
> > From: Long Li 
> >
> > Implement the code for connecting to SMBD server. The client and
> > server are connected using RC Queue Pair over RDMA API, which
> > suppports Infiniband, RoCE and iWARP. Upper layer code can call
> > cifs_create_rdma_session to establish a SMBD RDMA connection.
> >
> > +/* Upcall from RDMA CM */
> > +static int cifs_rdma_conn_upcall(
> > +   struct rdma_cm_id *id, struct rdma_cm_event *event) {
> > +   struct cifs_rdma_info *info = id->context;
> > +
> > +   log_rdma_event("event=%d status=%d\n", event->event,
> > + event->status);
> > +
> > +   switch (event->event) {
> > +   case RDMA_CM_EVENT_ADDR_RESOLVED:
> > +   case RDMA_CM_EVENT_ROUTE_RESOLVED:
> > +   info->ri_rc = 0;
> > +   complete(>ri_done);
> > +   break;
> > +
> > +   case RDMA_CM_EVENT_ADDR_ERROR:
> > +   info->ri_rc = -EHOSTUNREACH;
> > +   complete(>ri_done);
> > +   break;
> > +
> > +   case RDMA_CM_EVENT_ROUTE_ERROR:
> > +   info->ri_rc = -ENETUNREACH;
> > +   complete(>ri_done);
> > +   break;
> > +
> > +   case RDMA_CM_EVENT_ESTABLISHED:
> > +   case RDMA_CM_EVENT_CONNECT_ERROR:
> > +   case RDMA_CM_EVENT_UNREACHABLE:
> > +   case RDMA_CM_EVENT_REJECTED:
> > +   case RDMA_CM_EVENT_DEVICE_REMOVAL:
> > +   log_rdma_event("connected event=%d\n", event->event);
> > +   info->connect_state = event->event;
> > +   break;
> > +
> > +   case RDMA_CM_EVENT_DISCONNECTED:
> > +   break;
> > +
> > +   default:
> > +   break;
> > +   }
> > +
> > +   return 0;
> > +}
> 
> This code looks a lot like the connection stuff in the NFS/RDMA RPC transport.
> Does your code have the same needs? If so, you might consider moving this
> to a common RDMA handler.

> 
> > +/* Upcall from RDMA QP */
> > +static void
> > +cifs_rdma_qp_async_error_upcall(struct ib_event *event, void
> > +*context) {
> > +   struct cifs_rdma_info *info = context;
> > +   log_rdma_event("%s on device %s info %p\n",
> > +   ib_event_msg(event->event), event->device->name,
> > +info);
> > +
> > +   switch (event->event)
> > +   {
> > +   case IB_EVENT_CQ_ERR:
> > +   case IB_EVENT_QP_FATAL:
> > +   case IB_EVENT_QP_REQ_ERR:
> > +   case IB_EVENT_QP_ACCESS_ERR:
> > +
> > +   default:
> > +   break;
> > +   }
> > +}
> 
> Ditto. But, what's up with the empty switch(event->event) processing?

I have changed to code to disconnect RDMA connection on QP errors.


> 
> > +static struct rdma_cm_id* cifs_rdma_create_id(
> > +   struct cifs_rdma_info *info, struct sockaddr *dstaddr)
> > +{
> ...
> > +   log_rdma_event("connecting to IP %pI4 port %d\n",
> > +   _in->sin_addr, ntohs(addr_in->sin_port));
> >... and then...
> > +   if (dstaddr->sa_family == AF_INET6)
> > +   sport = &((struct sockaddr_in6 *)dstaddr)->sin6_port;
> > +   else
> > +   sport = &((struct sockaddr_in *)dstaddr)->sin_port;
> > +
> > +   *sport = htons(445);
> ...and
> > +out:
> > +   // try port number 5445 if port 445 doesn't work
> > +   if (*sport == htons(445)) {
> > +   *sport = htons(5445);
> > +   goto try_again;
> > +   }
> 
> Suggest rearranging the log_rdma_event() call to reflect reality.
> 
> The IANA-assigned port for SMB Direct is 5445, and port 445 will be listening
> on TCP. Should you really be probing that port before 5445?
> I suggest not doing so unconditionally.

This part is reworked in V3 to behave as you suggested.

> 
> > +struct cifs_rdma_info* cifs_create_rdma_session(
> > +   struct TCP_Server_Info *server, struct sockaddr *dstaddr) {
> > ...
> > +   int max_pending = receive_credit_max + send_credit_target;
> >...
> > +   if (max_pending > info->id->device->attrs.max_cqe ||
> > +   max_pending > 

RE: [[PATCH v1] 05/37] [CIFS] SMBD: Implement API for upper layer to create SMBD transport and establish RDMA connection

2017-08-29 Thread Long Li


> -Original Message-
> From: Tom Talpey
> Sent: Monday, August 14, 2017 12:55 PM
> To: Long Li ; Steve French ;
> linux-c...@vger.kernel.org; samba-techni...@lists.samba.org; linux-
> ker...@vger.kernel.org; linux-r...@vger.kernel.org
> Subject: RE: [[PATCH v1] 05/37] [CIFS] SMBD: Implement API for upper layer
> to create SMBD transport and establish RDMA connection
> 
> > -Original Message-
> > From: linux-cifs-ow...@vger.kernel.org [mailto:linux-cifs-
> > ow...@vger.kernel.org] On Behalf Of Long Li
> > Sent: Wednesday, August 2, 2017 4:10 PM
> > To: Steve French ; linux-c...@vger.kernel.org;
> > samba- techni...@lists.samba.org; linux-kernel@vger.kernel.org
> > Cc: Long Li 
> > Subject: [[PATCH v1] 05/37] [CIFS] SMBD: Implement API for upper layer
> > to create SMBD transport and establish RDMA connection
> >
> > From: Long Li 
> >
> > Implement the code for connecting to SMBD server. The client and
> > server are connected using RC Queue Pair over RDMA API, which
> > suppports Infiniband, RoCE and iWARP. Upper layer code can call
> > cifs_create_rdma_session to establish a SMBD RDMA connection.
> >
> > +/* Upcall from RDMA CM */
> > +static int cifs_rdma_conn_upcall(
> > +   struct rdma_cm_id *id, struct rdma_cm_event *event) {
> > +   struct cifs_rdma_info *info = id->context;
> > +
> > +   log_rdma_event("event=%d status=%d\n", event->event,
> > + event->status);
> > +
> > +   switch (event->event) {
> > +   case RDMA_CM_EVENT_ADDR_RESOLVED:
> > +   case RDMA_CM_EVENT_ROUTE_RESOLVED:
> > +   info->ri_rc = 0;
> > +   complete(>ri_done);
> > +   break;
> > +
> > +   case RDMA_CM_EVENT_ADDR_ERROR:
> > +   info->ri_rc = -EHOSTUNREACH;
> > +   complete(>ri_done);
> > +   break;
> > +
> > +   case RDMA_CM_EVENT_ROUTE_ERROR:
> > +   info->ri_rc = -ENETUNREACH;
> > +   complete(>ri_done);
> > +   break;
> > +
> > +   case RDMA_CM_EVENT_ESTABLISHED:
> > +   case RDMA_CM_EVENT_CONNECT_ERROR:
> > +   case RDMA_CM_EVENT_UNREACHABLE:
> > +   case RDMA_CM_EVENT_REJECTED:
> > +   case RDMA_CM_EVENT_DEVICE_REMOVAL:
> > +   log_rdma_event("connected event=%d\n", event->event);
> > +   info->connect_state = event->event;
> > +   break;
> > +
> > +   case RDMA_CM_EVENT_DISCONNECTED:
> > +   break;
> > +
> > +   default:
> > +   break;
> > +   }
> > +
> > +   return 0;
> > +}
> 
> This code looks a lot like the connection stuff in the NFS/RDMA RPC transport.
> Does your code have the same needs? If so, you might consider moving this
> to a common RDMA handler.

> 
> > +/* Upcall from RDMA QP */
> > +static void
> > +cifs_rdma_qp_async_error_upcall(struct ib_event *event, void
> > +*context) {
> > +   struct cifs_rdma_info *info = context;
> > +   log_rdma_event("%s on device %s info %p\n",
> > +   ib_event_msg(event->event), event->device->name,
> > +info);
> > +
> > +   switch (event->event)
> > +   {
> > +   case IB_EVENT_CQ_ERR:
> > +   case IB_EVENT_QP_FATAL:
> > +   case IB_EVENT_QP_REQ_ERR:
> > +   case IB_EVENT_QP_ACCESS_ERR:
> > +
> > +   default:
> > +   break;
> > +   }
> > +}
> 
> Ditto. But, what's up with the empty switch(event->event) processing?

I have changed to code to disconnect RDMA connection on QP errors.


> 
> > +static struct rdma_cm_id* cifs_rdma_create_id(
> > +   struct cifs_rdma_info *info, struct sockaddr *dstaddr)
> > +{
> ...
> > +   log_rdma_event("connecting to IP %pI4 port %d\n",
> > +   _in->sin_addr, ntohs(addr_in->sin_port));
> >... and then...
> > +   if (dstaddr->sa_family == AF_INET6)
> > +   sport = &((struct sockaddr_in6 *)dstaddr)->sin6_port;
> > +   else
> > +   sport = &((struct sockaddr_in *)dstaddr)->sin_port;
> > +
> > +   *sport = htons(445);
> ...and
> > +out:
> > +   // try port number 5445 if port 445 doesn't work
> > +   if (*sport == htons(445)) {
> > +   *sport = htons(5445);
> > +   goto try_again;
> > +   }
> 
> Suggest rearranging the log_rdma_event() call to reflect reality.
> 
> The IANA-assigned port for SMB Direct is 5445, and port 445 will be listening
> on TCP. Should you really be probing that port before 5445?
> I suggest not doing so unconditionally.

This part is reworked in V3 to behave as you suggested.

> 
> > +struct cifs_rdma_info* cifs_create_rdma_session(
> > +   struct TCP_Server_Info *server, struct sockaddr *dstaddr) {
> > ...
> > +   int max_pending = receive_credit_max + send_credit_target;
> >...
> > +   if (max_pending > info->id->device->attrs.max_cqe ||
> > +   max_pending > info->id->device->attrs.max_qp_wr) {
> > +   log_rdma_event("consider lowering receive_credit_max and "
> > +  

RE: [[PATCH v1] 05/37] [CIFS] SMBD: Implement API for upper layer to create SMBD transport and establish RDMA connection

2017-08-14 Thread Tom Talpey
> -Original Message-
> From: linux-cifs-ow...@vger.kernel.org [mailto:linux-cifs-
> ow...@vger.kernel.org] On Behalf Of Long Li
> Sent: Wednesday, August 2, 2017 4:10 PM
> To: Steve French ; linux-c...@vger.kernel.org; samba-
> techni...@lists.samba.org; linux-kernel@vger.kernel.org
> Cc: Long Li 
> Subject: [[PATCH v1] 05/37] [CIFS] SMBD: Implement API for upper layer to
> create SMBD transport and establish RDMA connection
> 
> From: Long Li 
> 
> Implement the code for connecting to SMBD server. The client and server are
> connected using RC Queue Pair over RDMA API, which suppports Infiniband,
> RoCE and iWARP. Upper layer code can call cifs_create_rdma_session to
> establish a SMBD RDMA connection.
> 
> +/* Upcall from RDMA CM */
> +static int cifs_rdma_conn_upcall(
> +   struct rdma_cm_id *id, struct rdma_cm_event *event)
> +{
> +   struct cifs_rdma_info *info = id->context;
> +
> +   log_rdma_event("event=%d status=%d\n", event->event, event->status);
> +
> +   switch (event->event) {
> +   case RDMA_CM_EVENT_ADDR_RESOLVED:
> +   case RDMA_CM_EVENT_ROUTE_RESOLVED:
> +   info->ri_rc = 0;
> +   complete(>ri_done);
> +   break;
> +
> +   case RDMA_CM_EVENT_ADDR_ERROR:
> +   info->ri_rc = -EHOSTUNREACH;
> +   complete(>ri_done);
> +   break;
> +
> +   case RDMA_CM_EVENT_ROUTE_ERROR:
> +   info->ri_rc = -ENETUNREACH;
> +   complete(>ri_done);
> +   break;
> +
> +   case RDMA_CM_EVENT_ESTABLISHED:
> +   case RDMA_CM_EVENT_CONNECT_ERROR:
> +   case RDMA_CM_EVENT_UNREACHABLE:
> +   case RDMA_CM_EVENT_REJECTED:
> +   case RDMA_CM_EVENT_DEVICE_REMOVAL:
> +   log_rdma_event("connected event=%d\n", event->event);
> +   info->connect_state = event->event;
> +   break;
> +
> +   case RDMA_CM_EVENT_DISCONNECTED:
> +   break;
> +
> +   default:
> +   break;
> +   }
> +
> +   return 0;
> +}

This code looks a lot like the connection stuff in the NFS/RDMA RPC transport.
Does your code have the same needs? If so, you might consider moving this to
a common RDMA handler.

> +/* Upcall from RDMA QP */
> +static void
> +cifs_rdma_qp_async_error_upcall(struct ib_event *event, void *context)
> +{
> +   struct cifs_rdma_info *info = context;
> +   log_rdma_event("%s on device %s info %p\n",
> +   ib_event_msg(event->event), event->device->name, info);
> +
> +   switch (event->event)
> +   {
> +   case IB_EVENT_CQ_ERR:
> +   case IB_EVENT_QP_FATAL:
> +   case IB_EVENT_QP_REQ_ERR:
> +   case IB_EVENT_QP_ACCESS_ERR:
> +
> +   default:
> +   break;
> +   }
> +}

Ditto. But, what's up with the empty switch(event->event) processing?

> +static struct rdma_cm_id* cifs_rdma_create_id(
> +   struct cifs_rdma_info *info, struct sockaddr *dstaddr)
> +{
...
> +   log_rdma_event("connecting to IP %pI4 port %d\n",
> +   _in->sin_addr, ntohs(addr_in->sin_port));
>... and then...
> +   if (dstaddr->sa_family == AF_INET6)
> +   sport = &((struct sockaddr_in6 *)dstaddr)->sin6_port;
> +   else
> +   sport = &((struct sockaddr_in *)dstaddr)->sin_port;
> +
> +   *sport = htons(445);
...and
> +out:
> +   // try port number 5445 if port 445 doesn't work
> +   if (*sport == htons(445)) {
> +   *sport = htons(5445);
> +   goto try_again;
> +   }

Suggest rearranging the log_rdma_event() call to reflect reality.

The IANA-assigned port for SMB Direct is 5445, and port 445 will be
listening on TCP. Should you really be probing that port before 5445?
I suggest not doing so unconditionally.

> +struct cifs_rdma_info* cifs_create_rdma_session(
> +   struct TCP_Server_Info *server, struct sockaddr *dstaddr)
> +{
> ...
> +   int max_pending = receive_credit_max + send_credit_target;
>...
> +   if (max_pending > info->id->device->attrs.max_cqe ||
> +   max_pending > info->id->device->attrs.max_qp_wr) {
> +   log_rdma_event("consider lowering receive_credit_max and "
> +   "send_credit_target. Possible CQE overrun, device "
> +   "reporting max_cpe %d max_qp_wr %d\n",
> +   info->id->device->attrs.max_cqe,
> +   info->id->device->attrs.max_qp_wr);
> +   goto out2;
> +   }

I don't understand this. Why are you directing both Receive and Send completions
to the same CQ, won't that make it very hard to manage completions and their
interrupts? Also, what device(s) have you seen trigger this log? CQ's are 
generally
allowed to be quite large.

> +   conn_param.responder_resources = 32;
> +   if (info->id->device->attrs.max_qp_rd_atom < 32)
> +   

RE: [[PATCH v1] 05/37] [CIFS] SMBD: Implement API for upper layer to create SMBD transport and establish RDMA connection

2017-08-14 Thread Tom Talpey
> -Original Message-
> From: linux-cifs-ow...@vger.kernel.org [mailto:linux-cifs-
> ow...@vger.kernel.org] On Behalf Of Long Li
> Sent: Wednesday, August 2, 2017 4:10 PM
> To: Steve French ; linux-c...@vger.kernel.org; samba-
> techni...@lists.samba.org; linux-kernel@vger.kernel.org
> Cc: Long Li 
> Subject: [[PATCH v1] 05/37] [CIFS] SMBD: Implement API for upper layer to
> create SMBD transport and establish RDMA connection
> 
> From: Long Li 
> 
> Implement the code for connecting to SMBD server. The client and server are
> connected using RC Queue Pair over RDMA API, which suppports Infiniband,
> RoCE and iWARP. Upper layer code can call cifs_create_rdma_session to
> establish a SMBD RDMA connection.
> 
> +/* Upcall from RDMA CM */
> +static int cifs_rdma_conn_upcall(
> +   struct rdma_cm_id *id, struct rdma_cm_event *event)
> +{
> +   struct cifs_rdma_info *info = id->context;
> +
> +   log_rdma_event("event=%d status=%d\n", event->event, event->status);
> +
> +   switch (event->event) {
> +   case RDMA_CM_EVENT_ADDR_RESOLVED:
> +   case RDMA_CM_EVENT_ROUTE_RESOLVED:
> +   info->ri_rc = 0;
> +   complete(>ri_done);
> +   break;
> +
> +   case RDMA_CM_EVENT_ADDR_ERROR:
> +   info->ri_rc = -EHOSTUNREACH;
> +   complete(>ri_done);
> +   break;
> +
> +   case RDMA_CM_EVENT_ROUTE_ERROR:
> +   info->ri_rc = -ENETUNREACH;
> +   complete(>ri_done);
> +   break;
> +
> +   case RDMA_CM_EVENT_ESTABLISHED:
> +   case RDMA_CM_EVENT_CONNECT_ERROR:
> +   case RDMA_CM_EVENT_UNREACHABLE:
> +   case RDMA_CM_EVENT_REJECTED:
> +   case RDMA_CM_EVENT_DEVICE_REMOVAL:
> +   log_rdma_event("connected event=%d\n", event->event);
> +   info->connect_state = event->event;
> +   break;
> +
> +   case RDMA_CM_EVENT_DISCONNECTED:
> +   break;
> +
> +   default:
> +   break;
> +   }
> +
> +   return 0;
> +}

This code looks a lot like the connection stuff in the NFS/RDMA RPC transport.
Does your code have the same needs? If so, you might consider moving this to
a common RDMA handler.

> +/* Upcall from RDMA QP */
> +static void
> +cifs_rdma_qp_async_error_upcall(struct ib_event *event, void *context)
> +{
> +   struct cifs_rdma_info *info = context;
> +   log_rdma_event("%s on device %s info %p\n",
> +   ib_event_msg(event->event), event->device->name, info);
> +
> +   switch (event->event)
> +   {
> +   case IB_EVENT_CQ_ERR:
> +   case IB_EVENT_QP_FATAL:
> +   case IB_EVENT_QP_REQ_ERR:
> +   case IB_EVENT_QP_ACCESS_ERR:
> +
> +   default:
> +   break;
> +   }
> +}

Ditto. But, what's up with the empty switch(event->event) processing?

> +static struct rdma_cm_id* cifs_rdma_create_id(
> +   struct cifs_rdma_info *info, struct sockaddr *dstaddr)
> +{
...
> +   log_rdma_event("connecting to IP %pI4 port %d\n",
> +   _in->sin_addr, ntohs(addr_in->sin_port));
>... and then...
> +   if (dstaddr->sa_family == AF_INET6)
> +   sport = &((struct sockaddr_in6 *)dstaddr)->sin6_port;
> +   else
> +   sport = &((struct sockaddr_in *)dstaddr)->sin_port;
> +
> +   *sport = htons(445);
...and
> +out:
> +   // try port number 5445 if port 445 doesn't work
> +   if (*sport == htons(445)) {
> +   *sport = htons(5445);
> +   goto try_again;
> +   }

Suggest rearranging the log_rdma_event() call to reflect reality.

The IANA-assigned port for SMB Direct is 5445, and port 445 will be
listening on TCP. Should you really be probing that port before 5445?
I suggest not doing so unconditionally.

> +struct cifs_rdma_info* cifs_create_rdma_session(
> +   struct TCP_Server_Info *server, struct sockaddr *dstaddr)
> +{
> ...
> +   int max_pending = receive_credit_max + send_credit_target;
>...
> +   if (max_pending > info->id->device->attrs.max_cqe ||
> +   max_pending > info->id->device->attrs.max_qp_wr) {
> +   log_rdma_event("consider lowering receive_credit_max and "
> +   "send_credit_target. Possible CQE overrun, device "
> +   "reporting max_cpe %d max_qp_wr %d\n",
> +   info->id->device->attrs.max_cqe,
> +   info->id->device->attrs.max_qp_wr);
> +   goto out2;
> +   }

I don't understand this. Why are you directing both Receive and Send completions
to the same CQ, won't that make it very hard to manage completions and their
interrupts? Also, what device(s) have you seen trigger this log? CQ's are 
generally
allowed to be quite large.

> +   conn_param.responder_resources = 32;
> +   if (info->id->device->attrs.max_qp_rd_atom < 32)
> +   conn_param.responder_resources =
> +   

[[PATCH v1] 05/37] [CIFS] SMBD: Implement API for upper layer to create SMBD transport and establish RDMA connection

2017-08-02 Thread Long Li
From: Long Li 

Implement the code for connecting to SMBD server. The client and server are 
connected using RC Queue Pair over RDMA API, which suppports Infiniband, RoCE 
and iWARP. Upper layer code can call cifs_create_rdma_session to establish a 
SMBD RDMA connection.

Signed-off-by: Long Li 
---
 fs/cifs/cifsrdma.c | 257 +
 fs/cifs/cifsrdma.h |  14 +++
 2 files changed, 271 insertions(+)

diff --git a/fs/cifs/cifsrdma.c b/fs/cifs/cifsrdma.c
index 7c4c178..b18fb79 100644
--- a/fs/cifs/cifsrdma.c
+++ b/fs/cifs/cifsrdma.c
@@ -120,3 +120,260 @@ do {  
\
atomic_read(>send_credits),   \
info->send_credit_target);  \
 } while (0)
+
+/* Upcall from RDMA CM */
+static int cifs_rdma_conn_upcall(
+   struct rdma_cm_id *id, struct rdma_cm_event *event)
+{
+   struct cifs_rdma_info *info = id->context;
+
+   log_rdma_event("event=%d status=%d\n", event->event, event->status);
+
+   switch (event->event) {
+   case RDMA_CM_EVENT_ADDR_RESOLVED:
+   case RDMA_CM_EVENT_ROUTE_RESOLVED:
+   info->ri_rc = 0;
+   complete(>ri_done);
+   break;
+
+   case RDMA_CM_EVENT_ADDR_ERROR:
+   info->ri_rc = -EHOSTUNREACH;
+   complete(>ri_done);
+   break;
+
+   case RDMA_CM_EVENT_ROUTE_ERROR:
+   info->ri_rc = -ENETUNREACH;
+   complete(>ri_done);
+   break;
+
+   case RDMA_CM_EVENT_ESTABLISHED:
+   case RDMA_CM_EVENT_CONNECT_ERROR:
+   case RDMA_CM_EVENT_UNREACHABLE:
+   case RDMA_CM_EVENT_REJECTED:
+   case RDMA_CM_EVENT_DEVICE_REMOVAL:
+   log_rdma_event("connected event=%d\n", event->event);
+   info->connect_state = event->event;
+   break;
+
+   case RDMA_CM_EVENT_DISCONNECTED:
+   break;
+
+   default:
+   break;
+   }
+
+   return 0;
+}
+
+/* Upcall from RDMA QP */
+static void
+cifs_rdma_qp_async_error_upcall(struct ib_event *event, void *context)
+{
+   struct cifs_rdma_info *info = context;
+   log_rdma_event("%s on device %s info %p\n",
+   ib_event_msg(event->event), event->device->name, info);
+
+   switch (event->event)
+   {
+   case IB_EVENT_CQ_ERR:
+   case IB_EVENT_QP_FATAL:
+   case IB_EVENT_QP_REQ_ERR:
+   case IB_EVENT_QP_ACCESS_ERR:
+
+   default:
+   break;
+   }
+}
+
+static struct rdma_cm_id* cifs_rdma_create_id(
+   struct cifs_rdma_info *info, struct sockaddr *dstaddr)
+{
+   struct rdma_cm_id *id;
+   int rc;
+   struct sockaddr_in *addr_in = (struct sockaddr_in*) dstaddr;
+   __be16 *sport;
+
+   log_rdma_event("connecting to IP %pI4 port %d\n",
+   _in->sin_addr, ntohs(addr_in->sin_port));
+
+   id = rdma_create_id(_net, cifs_rdma_conn_upcall, info,
+   RDMA_PS_TCP, IB_QPT_RC);
+   if (IS_ERR(id)) {
+   rc = PTR_ERR(id);
+   log_rdma_event("rdma_create_id() failed %i\n", rc);
+   return id;
+   }
+
+   if (dstaddr->sa_family == AF_INET6)
+   sport = &((struct sockaddr_in6 *)dstaddr)->sin6_port;
+   else
+   sport = &((struct sockaddr_in *)dstaddr)->sin_port;
+
+   *sport = htons(445);
+try_again:
+   init_completion(>ri_done);
+   info->ri_rc = -ETIMEDOUT;
+   rc = rdma_resolve_addr(id, NULL, (struct sockaddr*)dstaddr, 5000);
+   if (rc) {
+   log_rdma_event("rdma_resolve_addr() failed %i\n", rc);
+   goto out;
+   }
+   wait_for_completion_interruptible_timeout(
+   >ri_done, msecs_to_jiffies(8000));
+   rc = info->ri_rc;
+   if (rc) {
+   log_rdma_event("rdma_resolve_addr() completed %i\n", rc);
+   goto out;
+   }
+
+   info->ri_rc = -ETIMEDOUT;
+   rc = rdma_resolve_route(id, 5000);
+   if (rc) {
+   log_rdma_event("rdma_resolve_route() failed %i\n", rc);
+   goto out;
+   }
+   wait_for_completion_interruptible_timeout(
+   >ri_done, msecs_to_jiffies(8000));
+   rc = info->ri_rc;
+   if (rc) {
+   log_rdma_event("rdma_resolve_route() completed %i\n", rc);
+   goto out;
+   }
+
+   return id;
+
+out:
+   // try port number 5445 if port 445 doesn't work
+   if (*sport == htons(445)) {
+   *sport = htons(5445);
+   goto try_again;
+   }
+   rdma_destroy_id(id);
+   return ERR_PTR(rc);
+}
+
+static int cifs_rdma_ia_open(
+   struct cifs_rdma_info *info, struct sockaddr *dstaddr)
+{
+   int rc;
+
+   info->id = cifs_rdma_create_id(info, dstaddr);
+   if 

[[PATCH v1] 05/37] [CIFS] SMBD: Implement API for upper layer to create SMBD transport and establish RDMA connection

2017-08-02 Thread Long Li
From: Long Li 

Implement the code for connecting to SMBD server. The client and server are 
connected using RC Queue Pair over RDMA API, which suppports Infiniband, RoCE 
and iWARP. Upper layer code can call cifs_create_rdma_session to establish a 
SMBD RDMA connection.

Signed-off-by: Long Li 
---
 fs/cifs/cifsrdma.c | 257 +
 fs/cifs/cifsrdma.h |  14 +++
 2 files changed, 271 insertions(+)

diff --git a/fs/cifs/cifsrdma.c b/fs/cifs/cifsrdma.c
index 7c4c178..b18fb79 100644
--- a/fs/cifs/cifsrdma.c
+++ b/fs/cifs/cifsrdma.c
@@ -120,3 +120,260 @@ do {  
\
atomic_read(>send_credits),   \
info->send_credit_target);  \
 } while (0)
+
+/* Upcall from RDMA CM */
+static int cifs_rdma_conn_upcall(
+   struct rdma_cm_id *id, struct rdma_cm_event *event)
+{
+   struct cifs_rdma_info *info = id->context;
+
+   log_rdma_event("event=%d status=%d\n", event->event, event->status);
+
+   switch (event->event) {
+   case RDMA_CM_EVENT_ADDR_RESOLVED:
+   case RDMA_CM_EVENT_ROUTE_RESOLVED:
+   info->ri_rc = 0;
+   complete(>ri_done);
+   break;
+
+   case RDMA_CM_EVENT_ADDR_ERROR:
+   info->ri_rc = -EHOSTUNREACH;
+   complete(>ri_done);
+   break;
+
+   case RDMA_CM_EVENT_ROUTE_ERROR:
+   info->ri_rc = -ENETUNREACH;
+   complete(>ri_done);
+   break;
+
+   case RDMA_CM_EVENT_ESTABLISHED:
+   case RDMA_CM_EVENT_CONNECT_ERROR:
+   case RDMA_CM_EVENT_UNREACHABLE:
+   case RDMA_CM_EVENT_REJECTED:
+   case RDMA_CM_EVENT_DEVICE_REMOVAL:
+   log_rdma_event("connected event=%d\n", event->event);
+   info->connect_state = event->event;
+   break;
+
+   case RDMA_CM_EVENT_DISCONNECTED:
+   break;
+
+   default:
+   break;
+   }
+
+   return 0;
+}
+
+/* Upcall from RDMA QP */
+static void
+cifs_rdma_qp_async_error_upcall(struct ib_event *event, void *context)
+{
+   struct cifs_rdma_info *info = context;
+   log_rdma_event("%s on device %s info %p\n",
+   ib_event_msg(event->event), event->device->name, info);
+
+   switch (event->event)
+   {
+   case IB_EVENT_CQ_ERR:
+   case IB_EVENT_QP_FATAL:
+   case IB_EVENT_QP_REQ_ERR:
+   case IB_EVENT_QP_ACCESS_ERR:
+
+   default:
+   break;
+   }
+}
+
+static struct rdma_cm_id* cifs_rdma_create_id(
+   struct cifs_rdma_info *info, struct sockaddr *dstaddr)
+{
+   struct rdma_cm_id *id;
+   int rc;
+   struct sockaddr_in *addr_in = (struct sockaddr_in*) dstaddr;
+   __be16 *sport;
+
+   log_rdma_event("connecting to IP %pI4 port %d\n",
+   _in->sin_addr, ntohs(addr_in->sin_port));
+
+   id = rdma_create_id(_net, cifs_rdma_conn_upcall, info,
+   RDMA_PS_TCP, IB_QPT_RC);
+   if (IS_ERR(id)) {
+   rc = PTR_ERR(id);
+   log_rdma_event("rdma_create_id() failed %i\n", rc);
+   return id;
+   }
+
+   if (dstaddr->sa_family == AF_INET6)
+   sport = &((struct sockaddr_in6 *)dstaddr)->sin6_port;
+   else
+   sport = &((struct sockaddr_in *)dstaddr)->sin_port;
+
+   *sport = htons(445);
+try_again:
+   init_completion(>ri_done);
+   info->ri_rc = -ETIMEDOUT;
+   rc = rdma_resolve_addr(id, NULL, (struct sockaddr*)dstaddr, 5000);
+   if (rc) {
+   log_rdma_event("rdma_resolve_addr() failed %i\n", rc);
+   goto out;
+   }
+   wait_for_completion_interruptible_timeout(
+   >ri_done, msecs_to_jiffies(8000));
+   rc = info->ri_rc;
+   if (rc) {
+   log_rdma_event("rdma_resolve_addr() completed %i\n", rc);
+   goto out;
+   }
+
+   info->ri_rc = -ETIMEDOUT;
+   rc = rdma_resolve_route(id, 5000);
+   if (rc) {
+   log_rdma_event("rdma_resolve_route() failed %i\n", rc);
+   goto out;
+   }
+   wait_for_completion_interruptible_timeout(
+   >ri_done, msecs_to_jiffies(8000));
+   rc = info->ri_rc;
+   if (rc) {
+   log_rdma_event("rdma_resolve_route() completed %i\n", rc);
+   goto out;
+   }
+
+   return id;
+
+out:
+   // try port number 5445 if port 445 doesn't work
+   if (*sport == htons(445)) {
+   *sport = htons(5445);
+   goto try_again;
+   }
+   rdma_destroy_id(id);
+   return ERR_PTR(rc);
+}
+
+static int cifs_rdma_ia_open(
+   struct cifs_rdma_info *info, struct sockaddr *dstaddr)
+{
+   int rc;
+
+   info->id = cifs_rdma_create_id(info, dstaddr);
+   if (IS_ERR(info->id)) {
+   rc =