[Patch v2 03/19] CIFS: SMBD: Implement SMBDirect

2017-08-20 Thread Long Li
From: Long Li 

Add code to implement SMBDirect transport and protocol.

1. Add APIs in header file. Upper layer code uses transport through the APIs.
2. Define SMBDirect connection in the header file. A connection is based on a 
RC QP in RDMA.
3. The implementation doesn't maintain send buffers or send queue for 
transfering payload via RDMA send. There is no data copy in the transport on 
send.
4. On the receive path, the implementation maintains receive buffers and 
reassembly queue for transfering payload via RDMA recv. There is data copy in 
the transport on recv.
5. The implementation recognizes the RFC1002 header length use in the SMB upper 
layer payloads in CIFS. Because this length is mainly used for TCP and not 
applicable to RDMA, it is handled as a out-of-band information never sent over 
the wire, and the trasnport behaves like TCP to upper layer by processing and 
exposing the length correctly on data payloads.
6. SMBDirect protocol enforces credits on RDMA send or recv, credits are 
exchanged and mutually managed by SMB server and client.
7. Each connection defines a user-configuration rdma_readwrite_threshold. Upper 
layer payloads larger than rdma_readwrite_threshold are sent through RDMA read, 
and received via RDMA write. There are fixed number of registered memory 
regions per connection for doing RDMA read/write. There is no data copy in the 
transport on RDMA read/write.
8. There are choices between workqueue and softirq on RDMA notification calls 
on CQ completions. Benchmark shows no visible difference between those two. 
This implemention chooses workqueue IB_POLL_WORKQUEUE, this also avoids using 
spin_lock_irqsave (use spin_lock instead) throughout the code.

Signed-off-by: Long Li 
---
 fs/cifs/smbdirect.c | 2250 +++
 fs/cifs/smbdirect.h |  280 +++
 2 files changed, 2530 insertions(+)

diff --git a/fs/cifs/smbdirect.c b/fs/cifs/smbdirect.c
index d785bc1..01bf418 100644
--- a/fs/cifs/smbdirect.c
+++ b/fs/cifs/smbdirect.c
@@ -17,6 +17,35 @@
 #include "smbdirect.h"
 #include "cifs_debug.h"
 
+static struct smbd_response *get_receive_buffer(
+   struct smbd_connection *info);
+static void put_receive_buffer(
+   struct smbd_connection *info,
+   struct smbd_response *response);
+static int allocate_receive_buffers(struct smbd_connection *info, int num_buf);
+static void destroy_receive_buffers(struct smbd_connection *info);
+
+static void enqueue_reassembly(
+   struct smbd_connection *info,
+   struct smbd_response *response, int data_length);
+static struct smbd_response *_get_first_reassembly(
+   struct smbd_connection *info);
+
+static int smbd_post_recv(
+   struct smbd_connection *info,
+   struct smbd_response *response);
+
+static int smbd_post_send_empty(struct smbd_connection *info);
+static int smbd_post_send_data(
+   struct smbd_connection *info,
+   struct kvec *iov, int n_vec, int remaining_data_length);
+static int smbd_post_send_page(struct smbd_connection *info,
+   struct page *page, unsigned long offset,
+   size_t size, int remaining_data_length);
+
+static void destroy_mr_list(struct smbd_connection *info);
+static int allocate_mr_list(struct smbd_connection *info);
+
 /* SMBD version number */
 #define SMBD_V10x0100
 
@@ -76,3 +105,2224 @@ static int max_frmr_depth = 2048;
 
 /* If payload is less than this byte, use RDMA send/recv not read/write */
 static int rdma_readwrite_threshold = 4096;
+
+/* Transport logging functions
+ * Logging are defined as classes. They can be OR'ed to define the actual
+ * logging level via module parameter smbd_logging_class
+ * e.g. cifs.smbd_logging_class=0x500 will log all log_rdma_recv() and
+ * log_rdma_event()
+ */
+#define LOG_CREDIT 0x1
+#define LOG_OUTGOING   0x2
+#define LOG_INCOMING   0x4
+#define LOG_RECEIVE_QUEUE  0x8
+#define LOG_REASSEMBLY_QUEUE   0x10
+#define LOG_READ   0x20
+#define LOG_WRITE  0x40
+#define LOG_RDMA_SEND  0x80
+#define LOG_RDMA_RECV  0x100
+#define LOG_KEEP_ALIVE 0x200
+#define LOG_RDMA_EVENT 0x400
+#define LOG_RDMA_MR0X800
+
+static unsigned int smbd_logging_class = LOG_RDMA_MR;
+module_param(smbd_logging_class, uint, 0644);
+MODULE_PARM_DESC(smbd_logging_class,
+   "Logging class for SMBD transport 0x0 to 0xfff");
+
+#define log_rdma(class, fmt, args...)  \
+do {   \
+   if (class & smbd_logging_class) \
+   cifs_dbg(VFS, "%s:%d " fmt, __func__, __LINE__, ##args);\
+} while (0)
+
+#define 

[Patch v2 03/19] CIFS: SMBD: Implement SMBDirect

2017-08-20 Thread Long Li
From: Long Li 

Add code to implement SMBDirect transport and protocol.

1. Add APIs in header file. Upper layer code uses transport through the APIs.
2. Define SMBDirect connection in the header file. A connection is based on a 
RC QP in RDMA.
3. The implementation doesn't maintain send buffers or send queue for 
transfering payload via RDMA send. There is no data copy in the transport on 
send.
4. On the receive path, the implementation maintains receive buffers and 
reassembly queue for transfering payload via RDMA recv. There is data copy in 
the transport on recv.
5. The implementation recognizes the RFC1002 header length use in the SMB upper 
layer payloads in CIFS. Because this length is mainly used for TCP and not 
applicable to RDMA, it is handled as a out-of-band information never sent over 
the wire, and the trasnport behaves like TCP to upper layer by processing and 
exposing the length correctly on data payloads.
6. SMBDirect protocol enforces credits on RDMA send or recv, credits are 
exchanged and mutually managed by SMB server and client.
7. Each connection defines a user-configuration rdma_readwrite_threshold. Upper 
layer payloads larger than rdma_readwrite_threshold are sent through RDMA read, 
and received via RDMA write. There are fixed number of registered memory 
regions per connection for doing RDMA read/write. There is no data copy in the 
transport on RDMA read/write.
8. There are choices between workqueue and softirq on RDMA notification calls 
on CQ completions. Benchmark shows no visible difference between those two. 
This implemention chooses workqueue IB_POLL_WORKQUEUE, this also avoids using 
spin_lock_irqsave (use spin_lock instead) throughout the code.

Signed-off-by: Long Li 
---
 fs/cifs/smbdirect.c | 2250 +++
 fs/cifs/smbdirect.h |  280 +++
 2 files changed, 2530 insertions(+)

diff --git a/fs/cifs/smbdirect.c b/fs/cifs/smbdirect.c
index d785bc1..01bf418 100644
--- a/fs/cifs/smbdirect.c
+++ b/fs/cifs/smbdirect.c
@@ -17,6 +17,35 @@
 #include "smbdirect.h"
 #include "cifs_debug.h"
 
+static struct smbd_response *get_receive_buffer(
+   struct smbd_connection *info);
+static void put_receive_buffer(
+   struct smbd_connection *info,
+   struct smbd_response *response);
+static int allocate_receive_buffers(struct smbd_connection *info, int num_buf);
+static void destroy_receive_buffers(struct smbd_connection *info);
+
+static void enqueue_reassembly(
+   struct smbd_connection *info,
+   struct smbd_response *response, int data_length);
+static struct smbd_response *_get_first_reassembly(
+   struct smbd_connection *info);
+
+static int smbd_post_recv(
+   struct smbd_connection *info,
+   struct smbd_response *response);
+
+static int smbd_post_send_empty(struct smbd_connection *info);
+static int smbd_post_send_data(
+   struct smbd_connection *info,
+   struct kvec *iov, int n_vec, int remaining_data_length);
+static int smbd_post_send_page(struct smbd_connection *info,
+   struct page *page, unsigned long offset,
+   size_t size, int remaining_data_length);
+
+static void destroy_mr_list(struct smbd_connection *info);
+static int allocate_mr_list(struct smbd_connection *info);
+
 /* SMBD version number */
 #define SMBD_V10x0100
 
@@ -76,3 +105,2224 @@ static int max_frmr_depth = 2048;
 
 /* If payload is less than this byte, use RDMA send/recv not read/write */
 static int rdma_readwrite_threshold = 4096;
+
+/* Transport logging functions
+ * Logging are defined as classes. They can be OR'ed to define the actual
+ * logging level via module parameter smbd_logging_class
+ * e.g. cifs.smbd_logging_class=0x500 will log all log_rdma_recv() and
+ * log_rdma_event()
+ */
+#define LOG_CREDIT 0x1
+#define LOG_OUTGOING   0x2
+#define LOG_INCOMING   0x4
+#define LOG_RECEIVE_QUEUE  0x8
+#define LOG_REASSEMBLY_QUEUE   0x10
+#define LOG_READ   0x20
+#define LOG_WRITE  0x40
+#define LOG_RDMA_SEND  0x80
+#define LOG_RDMA_RECV  0x100
+#define LOG_KEEP_ALIVE 0x200
+#define LOG_RDMA_EVENT 0x400
+#define LOG_RDMA_MR0X800
+
+static unsigned int smbd_logging_class = LOG_RDMA_MR;
+module_param(smbd_logging_class, uint, 0644);
+MODULE_PARM_DESC(smbd_logging_class,
+   "Logging class for SMBD transport 0x0 to 0xfff");
+
+#define log_rdma(class, fmt, args...)  \
+do {   \
+   if (class & smbd_logging_class) \
+   cifs_dbg(VFS, "%s:%d " fmt, __func__, __LINE__, ##args);\
+} while (0)
+
+#define log_rdma_credit(fmt, args...)  log_rdma(LOG_CREDIT, fmt,