From: Jack Wang <jinpu.w...@profitbricks.com>

User module eg ibnbd_client will use this interface to transfer
data later.

Signed-off-by: Jack Wang <jinpu.w...@profitbricks.com>
Signed-off-by: Kleber Souza <kleber.so...@profitbricks.com>
Signed-off-by: Danil Kipnis <danil.kip...@profitbricks.com>
Signed-off-by: Roman Pen <roman.peny...@profitbricks.com>
---
 include/rdma/ibtrs_clt.h | 316 +++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 316 insertions(+)
 create mode 100644 include/rdma/ibtrs_clt.h

diff --git a/include/rdma/ibtrs_clt.h b/include/rdma/ibtrs_clt.h
new file mode 100644
index 0000000..4fc9b12
--- /dev/null
+++ b/include/rdma/ibtrs_clt.h
@@ -0,0 +1,316 @@
+/*
+ * InfiniBand Transport Layer
+ *
+ * Copyright (c) 2014 - 2017 ProfitBricks GmbH. All rights reserved.
+ * Authors: Fabian Holler < m...@fholler.de>
+ *          Jack Wang <jinpu.w...@profitbricks.com>
+ *         Kleber Souza <kleber.so...@profitbricks.com>
+ *         Danil Kipnis <danil.kip...@profitbricks.com>
+ *         Roman Pen <roman.peny...@profitbricks.com>
+ *          Milind Dumbare <milind.dumb...@gmail.com>
+ *
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions, and the following disclaimer,
+ *    without modification.
+ * 2. Redistributions in binary form must reproduce at minimum a disclaimer
+ *    substantially similar to the "NO WARRANTY" disclaimer below
+ *    ("Disclaimer") and any redistribution must be conditioned upon
+ *    including a substantially similar Disclaimer requirement for further
+ *    binary redistribution.
+ * 3. Neither the names of the above-listed copyright holders nor the names
+ *    of any contributors may be used to endorse or promote products derived
+ *    from this software without specific prior written permission.
+ *
+ * Alternatively, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") version 2 as published by the Free
+ * Software Foundation.
+ *
+ * NO WARRANTY
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * HOLDERS OR CONTRIBUTORS BE LIABLE FOR SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
+ * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGES.
+ *
+ */
+
+#if !defined(IBTRS_CLIENT_H)
+#define IBTRS_CLIENT_H
+
+#include <linux/scatterlist.h>
+
+struct ibtrs_session;
+
+/**
+ * ibtrs_clt_open() - Open a session to a ibtrs_server
+ * @addr: The IPv4, IPv6 or GID address of the peer
+ * @pdu_sz: Size of extra payload which can be accessed after tag allocation.
+ * @priv: Pointer passed back on &ibtrs_clt_ops->sess_ev() invocation
+ * @max_inflight_msg: Max. number of parallel inflight messages for the session
+ * @max_segments: Max. number of segments per IO request
+ * @reconnect_delay_sec: time between reconnect tries
+ * @max_reconnect_attempts: Number of times to reconnect on error before giving
+ *                         up, 0 for * disabled, -1 for forever
+ *
+ * Starts session establishment with the ibtrs_server. The function can block
+ * up to ~2000ms until it returns.
+ *
+ * Return a valid pointer on success otherwise PTR_ERR.
+ * -EINVAL:    The provided addr could not be resolved to an Infiniband
+ *             address, the route to the host could not be resolved or
+ *             ibtrs_clt_register() was not called before.
+ */
+struct ibtrs_session *ibtrs_clt_open(const struct sockaddr_storage *addr,
+                                    size_t pdu_sz, void *priv,
+                                    u8 reconnect_delay_sec, u16 max_segments,
+                                    s16 max_reconnect_attempts);
+
+/**
+ * ibtrs_clt_close() - Close a session
+ * @sess: Session handler, is freed on return
+ */
+int ibtrs_clt_close(struct ibtrs_session *sess);
+
+/**
+ * enum ibtrs_clt_rdma_ev - Events related to RDMA transfer operations
+ */
+enum ibtrs_clt_rdma_ev {
+       IBTRS_CLT_RDMA_EV_RDMA_REQUEST_WRITE_COMPL,
+       IBTRS_CLT_RDMA_EV_RDMA_WRITE_COMPL,
+};
+
+/**
+ * enum ibtrs_sess_ev - Events about connectivity state of a session
+ * @IBTRS_CLT_SESS_EV_RECONNECT                The session was reconnected.
+ * @IBTRS_CLT_SESS_EV_DISCONNECTED     The session was disconnected.
+ * @IBTRS_CLT_SESS_EV_MAX_RECONN_EXCEEDED Reconect attempts stopped because
+ *                                       max. number of reconnect attempts
+ *                                       are reached.
+ */
+enum ibtrs_clt_sess_ev {
+       IBTRS_CLT_SESS_EV_RECONNECT,
+       IBTRS_CLT_SESS_EV_DISCONNECTED,
+       IBTRS_CLT_SESS_EV_MAX_RECONN_EXCEEDED,
+};
+
+/**
+ * struct ibtrs_clt_ops - Callback functions of the user module
+ * @owner:             module that uses ibtrs_server
+ * @rdma_ev:           Event notifications for RDMA operations,
+ *      Context: in interrupt(soft irq). The function should be relatively 
fast.
+ *     @priv:                  user supplied data that was passed to
+ *                             ibtrs_clt_request_rdma_write() or
+ *                             ibtrs_clt_rdma_write() before
+ *     @ev:                    Occurred event
+ *     @errno:                 Result of corresponding operation,
+ *                             0 on success or negative ERRNO code on error
+ * @sess_ev:           Event notification for connection state changes
+ *     @priv:                  user supplied data that was passed to
+ *                             ibtrs_clt_open()
+ *     @ev:                    Occurred event
+ *     @errno:                 Result of corresponding operation,
+ *                             0 on success or negative ERRNO code on error
+ * @recv:              Event notification for infiniband message receival
+ *     @priv:                  user supplied data that was passed to
+ *                             ibtrs_clt_open()
+ *     @msg                    Received data
+ *     @len.                   Size of the @msg buffer
+ *
+ * The @recv and @rdma_ev are running on the same CPU that requested the RDMA
+ * operation before.
+ */
+
+typedef void (rdma_clt_ev_fn)(void *priv, enum ibtrs_clt_rdma_ev ev, int 
errno);
+typedef void (sess_clt_ev_fn)(void *priv, enum ibtrs_clt_sess_ev ev, int 
errno);
+typedef void (recv_clt_fn)(void *priv, const void *msg, size_t len);
+
+struct ibtrs_clt_ops {
+       struct module           *owner;
+       rdma_clt_ev_fn          *rdma_ev;
+       sess_clt_ev_fn          *sess_ev;
+       recv_clt_fn             *recv;
+};
+
+/**
+ * ibtrs_clt_register() - register a user module with ibtrs_client
+ * @ops:       callback functions to register
+ *
+ * Return:
+ * 0:          Success
+ * -ENOTSUPP:  Registration failed, max. number of supported user modules
+               reached
+ */
+int ibtrs_clt_register(const struct ibtrs_clt_ops *ops);
+
+/**
+ * ibtrs_clt_unregister() - unregister a module at ibtrs_client
+ * @ops:       struct that was passed before to ibtrs_clt_register()
+ *
+ * ibtrs_clt_unregister() must only be called after all session that were
+ * created by the user module were closed.
+ */
+void ibtrs_clt_unregister(const struct ibtrs_clt_ops *ops);
+
+enum {
+       IBTRS_TAG_NOWAIT = 0,
+       IBTRS_TAG_WAIT   = 1,
+};
+
+/**
+ * ibtrs_tag - tags the memory allocation for future RDMA operation
+ */
+struct ibtrs_tag {
+       unsigned int cpu_id;
+       unsigned int mem_id;
+       unsigned int mem_id_mask;
+};
+
+static inline struct ibtrs_tag *ibtrs_tag_from_pdu(void *pdu)
+{
+       return pdu - sizeof(struct ibtrs_tag);
+}
+
+static inline void *ibtrs_tag_to_pdu(struct ibtrs_tag *tag)
+{
+       return tag + 1;
+}
+
+/**
+ * ibtrs_get_tag() - allocates tag for future RDMA operation
+ * @sess:      Current session
+ * @cpu_id:    cpu_id to run
+ * @nr_bytes:  Number of bytes to consume per tag
+ * @wait:      Wait type
+ *
+ * Description:
+ *    Allocates tag for the following RDMA operation.  Tag is used
+ *    to preallocate all resources and to propagate memory pressure
+ *    up earlier.
+ *
+ * Context:
+ *    Can sleep if @wait == IBTRS_TAG_WAIT
+ */
+struct ibtrs_tag *ibtrs_get_tag(struct ibtrs_session *sess, int cpu_id,
+                               size_t nr_bytes, int wait);
+
+/**
+ * ibtrs_put_tag() - puts allocated tag
+ * @sess:      Current session
+ * @tag:       Tag to be freed
+ *
+ * Context:
+ *    Does not matter
+ */
+void ibtrs_put_tag(struct ibtrs_session *sess, struct ibtrs_tag *tag);
+
+/**
+ * ibtrs_clt_rdma_write() - Transfer data to the server via RDMA.
+ * @sess:      Session
+ * @tag:       Preallocated tag
+ * @priv:      User provided data, passed back on corresponding
+ *             @ibtrs_clt_ops->rdma_ev() event
+ * @vec:       User module message to transfer together with @sg.
+ *             Sum of len of all @vec elements limited to <= IO_MSG_SIZE
+ * @nr:                Number of elements in @vec.
+ * @data_len:  Size of data in @sg
+ * @sg:                data to transferred, 512B aligned in the receivers 
memory
+ * @sg_len:    number of elements in @sg array
+ *
+ * Return:
+ * 0:          Success
+ * <0:         Error
+ *
+ * On completion of the operation a %IBTRS_CLT_RDMA_EV_RDMA_WRITE_COMPL is
+ * generated. If an error happened on IBTRS layer for this operation a
+ * %IBTRS_CLT_RDMA_EV_ERROR is generated.
+ */
+int ibtrs_clt_rdma_write(struct ibtrs_session *sess, struct ibtrs_tag *tag,
+                        void *priv, const struct kvec *vec, size_t nr,
+                        size_t data_len, struct scatterlist *sg,
+                        unsigned int sg_len);
+
+/**
+ * ibtrs_clt_request_rdma_write() - Request data transfer from server via RDMA.
+ *
+ * @sess:      Session
+ * @tag:       Preallocated tag
+ * @priv:      User provided data, passed back on corresponding
+ *             @ibtrs_clt_ops->rdma_ev() event
+ * @vec:       Message that is send to server together with the request.
+ *             Sum of len of all @vec elements limited to <= IO_MSG_SIZE.
+ * @nr:                Number of elements in @vec.
+ * @result_len: Max. length of data that ibtrs_server will send back
+ * @recv_sg:   Pages in which the response of the server will be stored.
+ * @recv_sg_len: Number of elements in the @recv_sg
+ *
+ * Return:
+ * 0:          Success
+ * <0:         Error
+ *
+ * IBTRS Client will request a data transfer from Server to Client via RDMA.
+ * The data that the server will respond with will be stored in @recv_sg when
+ * the user receives an %IBTRS_CLT_RDMA_EV_RDMA_REQUEST_WRITE_COMPL event.
+ * If an error occurred on the IBTRS layer a %IBTRS_CLT_RDMA_EV_ERROR is
+ * generated instead
+ */
+int ibtrs_clt_request_rdma_write(struct ibtrs_session *sess,
+                                struct ibtrs_tag *tag, void *priv,
+                                const struct kvec *vec, size_t nr,
+                                size_t result_len,
+                                struct scatterlist *recv_sg,
+                                unsigned int recv_sg_len);
+
+/**
+ * ibtrs_clt_send() - Send data to server via an infiniband message.
+ * @sess:      Session
+ * @vec:       Data to transfer
+ * @nr:                Number of elements in @vec
+ *
+ * Return:
+ * 0:          Success
+ * <0:         Error:
+ *             -ECOMM          no connection to the server
+ *             -EINVAL         message size too big (500 bytes max)
+ *             -EAGAIN         run out of tx buffers - try again later
+ *             -<IB ERROR>     see mlx doc
+ *
+ * The operation is not confirmed. It is the responsibility of the user on the
+ * other side to send an acknowledgment if required.
+ */
+int ibtrs_clt_send(struct ibtrs_session *sess, const struct kvec *vec,
+                  size_t nr);
+
+/**
+ * ibtrs_attrs - IBTRS session attributes
+ */
+struct ibtrs_attrs {
+       u32     queue_depth;
+       u64     mr_page_mask;
+       u32     mr_page_size;
+       u32     mr_max_size;
+       u32     max_pages_per_mr;
+       u32     max_sge;
+       u32     max_io_size;
+       u8      hostname[MAXHOSTNAMELEN];
+};
+
+/**
+ * ibtrs_clt_query() - queries IBTRS session attributes
+ *
+ * Returns:
+ *    0 on success
+ *    -ECOMM           no connection to the server
+ */
+int ibtrs_clt_query(struct ibtrs_session *sess, struct ibtrs_attrs *attr);
+
+#endif
-- 
2.7.4

Reply via email to