From: Jack Wang <jinpu.w...@profitbricks.com>

Signed-off-by: Jack Wang <jinpu.w...@profitbricks.com>
Signed-off-by: Kleber Souza <kleber.so...@profitbricks.com>
Signed-off-by: Danil Kipnis <danil.kip...@profitbricks.com>
Signed-off-by: Roman Pen <roman.peny...@profitbricks.com>
---
 include/rdma/ibtrs.h | 514 +++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 514 insertions(+)
 create mode 100644 include/rdma/ibtrs.h

diff --git a/include/rdma/ibtrs.h b/include/rdma/ibtrs.h
new file mode 100644
index 0000000..4fc572b
--- /dev/null
+++ b/include/rdma/ibtrs.h
@@ -0,0 +1,514 @@
+/*
+ * InfiniBand Transport Layer
+ *
+ * Copyright (c) 2014 - 2017 ProfitBricks GmbH. All rights reserved.
+ * Authors: Fabian Holler < m...@fholler.de>
+ *          Jack Wang <jinpu.w...@profitbricks.com>
+ *         Kleber Souza <kleber.so...@profitbricks.com>
+ *         Danil Kipnis <danil.kip...@profitbricks.com>
+ *         Roman Pen <roman.peny...@profitbricks.com>
+ *          Milind Dumbare <milind.dumb...@gmail.com>
+ *
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions, and the following disclaimer,
+ *    without modification.
+ * 2. Redistributions in binary form must reproduce at minimum a disclaimer
+ *    substantially similar to the "NO WARRANTY" disclaimer below
+ *    ("Disclaimer") and any redistribution must be conditioned upon
+ *    including a substantially similar Disclaimer requirement for further
+ *    binary redistribution.
+ * 3. Neither the names of the above-listed copyright holders nor the names
+ *    of any contributors may be used to endorse or promote products derived
+ *    from this software without specific prior written permission.
+ *
+ * Alternatively, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") version 2 as published by the Free
+ * Software Foundation.
+ *
+ * NO WARRANTY
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * HOLDERS OR CONTRIBUTORS BE LIABLE FOR SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
+ * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGES.
+ *
+ */
+
+#ifndef __IBTRS_H
+#define __IBTRS_H
+
+#include <linux/uio.h>
+#include <linux/types.h>
+#include <linux/uuid.h>
+#include <rdma/rdma_cm.h>
+#include <rdma/ib_cm.h>
+#include <linux/list.h>
+#include <linux/dma-direction.h>
+#include <rdma/ib_verbs.h>
+#include <linux/time.h>
+#include <linux/ktime.h>
+#include <linux/timekeeping.h>
+
+#define IBTRS_SERVER_PORT 1234
+#define WC_ARRAY_SIZE 16
+#define IB_APM_TIMEOUT 16 /* 4.096 * 2 ^ 16 = 260 msec */
+
+#define USR_MSG_CNT 64
+#define USR_CON_BUF_SIZE (USR_MSG_CNT * 2) /* double bufs for ACK's */
+
+#define DEFAULT_HEARTBEAT_TIMEOUT_MS 20000
+#define MIN_HEARTBEAT_TIMEOUT_MS 5000
+#define HEARTBEAT_INTV_MS 500
+#define HEARTBEAT_INTV_JIFFIES msecs_to_jiffies(HEARTBEAT_INTV_MS)
+
+#define MIN_RTR_CNT 1
+#define MAX_RTR_CNT 7
+
+/*
+ * With the current size of the tag allocated on the client, 4K is the maximum
+ * number of tags we can allocate. (see IBNBD-2321)
+ * This number is also used on the client to allocate the IU for the user
+ * connection to receive the RDMA addresses from the server.
+ */
+#define MAX_SESS_QUEUE_DEPTH 4096
+
+#define XX(a) case (a): return #a
+
+#define IBTRS_ADDRLEN sizeof("ipv6:[xxxx:xxxx:xxxx:xxxx:xxxx:xxxx:xxxx:xxxx]")
+
+static inline const char *ib_wc_opcode_str(enum ib_wc_opcode opcode)
+{
+       switch (opcode) {
+       XX(IB_WC_SEND);
+       XX(IB_WC_RDMA_WRITE);
+       XX(IB_WC_RDMA_READ);
+       XX(IB_WC_COMP_SWAP);
+       XX(IB_WC_FETCH_ADD);
+       /* recv-side); inbound completion */
+       XX(IB_WC_RECV);
+       XX(IB_WC_RECV_RDMA_WITH_IMM);
+       default: return "IB_WC_OPCODE_UNKNOWN";
+       }
+}
+
+
+struct ib_session {
+       struct ib_pd            *pd;
+       struct ib_mr            *mr;
+       struct ib_event_handler event_handler;
+};
+
+struct ibtrs_ib_path {
+       union ib_gid    p_sgid;
+       union ib_gid    p_dgid;
+};
+
+struct ib_con {
+       struct ib_qp            *qp ____cacheline_aligned;
+       struct ib_cq            *cq ____cacheline_aligned;
+       struct ib_send_wr       beacon;
+       struct rdma_cm_id       *cm_id;
+       struct ibtrs_ib_path    pri_path;
+       struct ibtrs_ib_path   cur_path;
+       char                    *addr;
+       char                    *hostname;
+};
+
+struct ibtrs_iu {
+       struct list_head        list;
+       dma_addr_t              dma_addr;
+       void                    *buf;
+       size_t                  size;
+       enum dma_data_direction direction;
+       bool                    is_msg;
+       u32                     tag;
+};
+
+struct ibtrs_heartbeat {
+       atomic64_t      send_ts_ms;
+       atomic64_t      recv_ts_ms;
+       u32             timeout_ms;
+       u32             warn_timeout_ms;
+       char            *addr;
+       char            *hostname;
+};
+
+#define IBTRS_VERSION 2
+#define IBTRS_UUID_SIZE 16
+#define IO_MSG_SIZE 24
+#define IB_IMM_SIZE_BITS 32
+
+#define GCC_DIAGNOSTIC_AWARE ((__GNUC__ > 6))
+#if GCC_DIAGNOSTIC_AWARE
+#pragma GCC diagnostic push
+#pragma GCC diagnostic warning "-Wpadded"
+#endif
+
+/**
+ * enum ibtrs_msg_types - IBTRS message types. DO NOT REMOVE OR REORDER!!!
+ * @IBTRS_MSG_SESS_OPEN:       Client requests new session on Server
+ * @IBTRS_MSG_SESS_OPEN_RESP:  Server informs Client about session parameters
+ * @IBTRS_MSG_CON_OPEN:                Client requests new connection to server
+ * @IBTRS_MSG_RDMA_WRITE:      Client writes data per RDMA to Server
+ * @IBTRS_MSG_REQ_RDMA_WRITE:  Client requests data transfer per RDMA
+ * @IBTRS_MSG_USER:            Data transfer per Infiniband message
+ * @IBTRS_MSG_ERR:             Fatal Error happened
+ * @IBTRS_MSG_SESS_INFO:       Client requests about session info
+ */
+enum ibtrs_msg_types {
+       IBTRS_MSG_SESS_OPEN,
+       IBTRS_MSG_SESS_OPEN_RESP,
+       IBTRS_MSG_CON_OPEN,
+       IBTRS_MSG_RDMA_WRITE,
+       IBTRS_MSG_REQ_RDMA_WRITE,
+       IBTRS_MSG_USER,
+       IBTRS_MSG_ERROR,
+       IBTRS_MSG_SESS_INFO,
+};
+
+/**
+ * struct ibtrs_msg_hdr - Common header of all IBTRS messages
+ * @type:      Message type, valid values see: enum ibtrs_msg_types
+ * @tsize:     Total size of transferred data
+ *
+ * Don't move the first 8 padding bytes! It's a workaround for a kernel bug.
+ * See IBNBD-610 for details
+ *
+ * DO NOT CHANGE!
+ */
+struct ibtrs_msg_hdr {
+       u8                      __padding1;
+       u8                      type;
+       u16                     __padding2;
+       u32                     tsize;
+};
+
+#define IBTRS_HDR_LEN sizeof(struct ibtrs_msg_hdr)
+
+/**
+ * struct ibtrs_msg_session_open - Opens a new session between client and 
server
+ * @hdr:       message header
+ * @uuid:      client host identifier, unique until module reload
+ * @ver:       IBTRS protocol version
+ * @con_cnt:    number of connections in this session
+ * @reserved:   reserved fields for future usage, 28 bytes is maximum for
+ *             all IPv6/IPv4 session
+ *
+ * DO NOT CHANGE members before ver.
+ */
+struct ibtrs_msg_sess_open {
+       struct ibtrs_msg_hdr    hdr;
+       u8                      uuid[IBTRS_UUID_SIZE];
+       u8                      ver;
+       u8                      con_cnt;
+       u8                      reserved[30];
+};
+
+/**
+ * struct ibtrs_msg_sess_info
+ * @hdr:               message header
+ * @hostname:          client host name
+ */
+struct ibtrs_msg_sess_info {
+       struct ibtrs_msg_hdr    hdr;
+       u8                      hostname[MAXHOSTNAMELEN];
+};
+
+#define MSG_SESS_INFO_SIZE sizeof(struct ibtrs_msg_sess_info)
+
+/*
+ *  Data Layout in RDMA-Bufs:
+ *
+ * +---------RDMA-BUF--------+
+ * |         Slice N        |
+ * | +---------------------+ |
+ * | |      I/O data       | |
+ * | |---------------------| |
+ * | |      IBNBD MSG     | |
+ * | |---------------------| |
+ * | |     IBTRS MSG      | |
+ * | +---------------------+ |
+ * +-------------------------+
+ * |        Slice N+1       |
+ * | +---------------------+ |
+ * | |       I/O data     | |
+ * | |---------------------| |
+ * | |      IBNBD MSG     | |
+ * | |---------------------| |
+ * | |       IBTRS MSG     | |
+ * | +---------------------+ |
+ * +-------------------------+
+ */
+
+#define IBTRS_MSG_RESV_LEN 128
+/**
+ * struct ibtrs_msg_sess_open_resp - Servers response to %IBTRS_MSG_SESS_OPEN
+ * @hdr:       message header
+ * @ver:       IBTRS protocol version
+ * @cnt:       Number of rdma addresses in this message
+ * @rkey:      remote key to allow client to access buffers
+ * @hostname:   hostname of local host
+ * @reserved:    reserved fields for future usage
+ * @max_inflight_msg:  max inflight messages (queue-depth) in this session
+ * @max_io_size:   max io size server supports
+ * @max_req_size:   max infiniband message size server supports
+ * @addr:      rdma addresses of buffers
+ *
+ * DO NOT CHANGE members before ver.
+ */
+struct ibtrs_msg_sess_open_resp {
+       struct ibtrs_msg_hdr    hdr;
+       u8                      ver;
+       u8                      __padding1;
+       u16                     cnt;
+       u32                     rkey;
+       u8                      hostname[MAXHOSTNAMELEN];
+       u8                      reserved[IBTRS_MSG_RESV_LEN];
+       u16                     max_inflight_msg;
+       u32                     max_io_size;
+       u32                     max_req_size;
+       u64                     addr[];
+};
+
+#define IBTRS_MSG_SESS_OPEN_RESP_LEN(cnt) \
+       (sizeof(struct ibtrs_msg_sess_open_resp) + sizeof(u64) * cnt)
+/**
+ * struct ibtrs_msg_con_open - Opens a new connection between client and server
+ * @hdr:               message header
+ * @uuid:              client host identifier, unique until module reload
+ */
+struct ibtrs_msg_con_open {
+       struct ibtrs_msg_hdr    hdr;
+       u8                      uuid[IBTRS_UUID_SIZE];
+};
+
+/**
+ * struct ibtrs_msg_user - Data exchanged a Infiniband message
+ * @hdr:               message header
+ * @payl:              Payload from user user module
+ */
+struct ibtrs_msg_user {
+       struct ibtrs_msg_hdr    hdr;
+       u8                      payl[];
+};
+
+/**
+ * struct ibtrs_sg_desc - RDMA-Buffer entry description
+ * @addr:      Address of RDMA destination buffer
+ * @key:       Authorization rkey to write to the buffer
+ * @len:       Size of the buffer
+ */
+struct ibtrs_sg_desc {
+       u64                     addr;
+       u32                     key;
+       u32                     len;
+};
+
+#define IBTRS_SG_DESC_LEN sizeof(struct ibtrs_sg_desc)
+
+/**
+ * struct ibtrs_msg_req_rdma_write - RDMA data transfer request from client
+ * @hdr:               message header
+ * @sg_cnt:            number of @desc entries
+ * @desc:              RDMA bufferst where the server can write the result to
+ */
+struct ibtrs_msg_req_rdma_write {
+       struct ibtrs_msg_hdr    hdr;
+       u32                     __padding;
+       u32                     sg_cnt;
+       struct ibtrs_sg_desc    desc[];
+};
+
+/**
+ * struct_msg_rdma_write - Message transferred to server with RDMA-Write
+ * @hdr:               message header
+ */
+struct ibtrs_msg_rdma_write {
+       struct ibtrs_msg_hdr    hdr;
+};
+
+/**
+ * struct ibtrs_msg_error - Error message
+ * @hdr:               message header
+ * @errno:             Errno number describing the error
+ */
+struct ibtrs_msg_error {
+       struct ibtrs_msg_hdr    hdr;
+       s32                     errno;
+       u32                     __padding;
+};
+
+#if GCC_DIAGNOSTIC_AWARE
+#pragma GCC diagnostic pop
+#endif
+
+int ibtrs_validate_message(u16 queue_depth, const void *hdr);
+
+void fill_ibtrs_msg_sess_open(struct ibtrs_msg_sess_open *msg, u8 con_cnt,
+                             const uuid_le *uuid);
+
+void fill_ibtrs_msg_con_open(struct ibtrs_msg_con_open *msg,
+                            const uuid_le *uuid);
+
+void fill_ibtrs_msg_sess_info(struct ibtrs_msg_sess_info *msg,
+                             const char *hostname);
+
+void ibtrs_heartbeat_set_send_ts(struct ibtrs_heartbeat *h);
+void ibtrs_set_last_heartbeat(struct ibtrs_heartbeat *h);
+u64 ibtrs_last_heartbeat_diff_ms(const struct ibtrs_heartbeat *h);
+u64 ibtrs_heartbeat_send_ts_diff_ms(const struct ibtrs_heartbeat *h);
+
+void ibtrs_set_heartbeat_timeout(struct ibtrs_heartbeat *h, u32 timeout_ms);
+
+void ibtrs_heartbeat_warn(const struct ibtrs_heartbeat *h);
+
+bool ibtrs_heartbeat_timeout_is_expired(const struct ibtrs_heartbeat *h);
+
+u32 ibtrs_heartbeat_get_send_delay(const struct ibtrs_heartbeat *h);
+u32 ibtrs_heartbeat_get_check_delay(const struct ibtrs_heartbeat *h);
+void ibtrs_iu_put(struct list_head *iu_list, struct ibtrs_iu *iu);
+struct ibtrs_iu *ibtrs_iu_get(struct list_head *iu_list);
+
+struct ibtrs_iu *ibtrs_iu_alloc(u32 tag, size_t size, gfp_t t,
+                               struct ib_device *dev,
+                               enum dma_data_direction, bool is_msg);
+
+void ibtrs_iu_free(struct ibtrs_iu *iu, enum dma_data_direction dir,
+                  struct ib_device *dev);
+
+int ibtrs_write_empty_imm(struct ib_qp *qp, u32 imm_data,
+                         enum ib_send_flags flags);
+
+int ibtrs_post_send(struct ib_qp *qp, struct ib_mr *mr, struct ibtrs_iu *iu,
+                   u32 size);
+
+int ib_post_rdma_write_imm(struct ib_qp *qp, struct ib_sge *sge,
+                          unsigned int num_sge, u32 rkey, u64 rdma_addr,
+                          u64 wr_id, u32 imm_data, enum ib_send_flags flags);
+
+int ib_post_rdma_write(struct ib_qp *qp, struct ib_sge *sge,
+                      unsigned int num_sge, u32 rkey, u64 rdma_addr,
+                      u64 wr_id);
+int post_beacon(struct ib_con *con);
+/**
+ * ib_session_init() - Create a new IB session
+ */
+int ib_session_init(struct ib_device *dev, struct ib_session *session);
+
+/**
+ * ib_con_init() - initialize and add a ib_con to the session
+ * @con:       &ib_con to initialize
+ * @session:   session the &ib_con is added to
+ * @ctx:       CQ context, returned to the user via completion handler
+ *
+ * Returns 0 on success otherwise a negative errno code
+ */
+int ib_con_init(struct ib_con *con, struct rdma_cm_id *cm_id,
+               u32 max_send_sge,
+               ib_comp_handler comp_handler, void *ctx, int cq_vector,
+               u16 cq_size, u16 wr_queue_size, struct ib_session *session);
+
+int ibtrs_request_cq_notifications(struct ib_con *con);
+
+void ib_con_destroy(struct ib_con *con);
+
+/**
+ * ib_session_destroy() - Free a session
+ * The corresponding &ib_con must have been freed before.
+ */
+void ib_session_destroy(struct ib_session *session);
+
+int ib_get_max_wr_queue_size(struct ib_device *dev);
+
+int ibtrs_addr_to_str(const struct sockaddr_storage *addr, char *buf,
+                     size_t len);
+
+int ibtrs_heartbeat_timeout_validate(int timeout);
+
+/**
+ * kvec_length() - Total number of bytes covered by an kvec.
+ */
+static inline size_t kvec_length(const struct kvec *vec, size_t nr)
+{
+       size_t seg, ret = 0;
+
+       for (seg = 0; seg < nr; seg++)
+               ret += vec[seg].iov_len;
+       return ret;
+}
+
+/**
+ * copy_from_kvec() - Copy kvec to the buffer.
+ */
+static inline void copy_from_kvec(void *data, const struct kvec *vec,
+                                 size_t copy)
+{
+       size_t seg, len;
+
+       for (seg = 0; copy; seg++) {
+               len = min(vec[seg].iov_len, copy);
+               memcpy(data, vec[seg].iov_base, len);
+               data += len;
+               copy -= len;
+       }
+}
+
+static inline u64 timespec_to_ms(const struct timespec *ts)
+{
+       return timespec_to_ns(ts) / NSEC_PER_MSEC;
+}
+
+u64 timediff_cur_ms(u64 cur_ms);
+
+void *ibtrs_malloc(size_t size);
+void *ibtrs_zalloc(size_t size);
+
+#define STAT_STORE_FUNC(store, reset) \
+static ssize_t store##_store(struct kobject *kobj, \
+                           struct kobj_attribute *attr, \
+                           const char *buf, size_t count) \
+{ \
+       int ret = -EINVAL; \
+       struct ibtrs_session *sess = container_of(kobj, struct ibtrs_session, \
+                                                 kobj_stats); \
+\
+       if (sysfs_streq(buf, "1")) \
+               ret = reset(sess, true); \
+       else if (sysfs_streq(buf, "0"))\
+               ret = reset(sess, false); \
+       if (ret) \
+               return ret; \
+\
+       return count; \
+}
+
+#define STAT_SHOW_FUNC(show, print) \
+static ssize_t show##_show(struct kobject *kobj, \
+                          struct kobj_attribute *attr, \
+                          char *page) \
+{ \
+       struct ibtrs_session *sess = container_of(kobj, struct ibtrs_session, \
+                                                 kobj_stats); \
+\
+       return print(sess, page, PAGE_SIZE); \
+}
+
+#define STAT_ATTR(stat, print, reset) \
+STAT_STORE_FUNC(stat, reset) \
+STAT_SHOW_FUNC(stat, print) \
+static struct kobj_attribute stat##_attr = \
+               __ATTR(stat, 0644, \
+                      stat##_show, \
+                      stat##_store)
+
+#endif /*__IBTRS_H*/
-- 
2.7.4

Reply via email to