---
 drivers/infiniband/hw/siw/siw_qp.c | 1007 ++++++++++++++++++++++++++++++++++++
 1 files changed, 1007 insertions(+), 0 deletions(-)
 create mode 100644 drivers/infiniband/hw/siw/siw_qp.c

diff --git a/drivers/infiniband/hw/siw/siw_qp.c 
b/drivers/infiniband/hw/siw/siw_qp.c
new file mode 100644
index 0000000..ef124eb
--- /dev/null
+++ b/drivers/infiniband/hw/siw/siw_qp.c
@@ -0,0 +1,1007 @@
+/*
+ * Software iWARP device driver for Linux
+ *
+ * Authors: Bernard Metzler <b...@zurich.ibm.com>
+ *          Fredy Neeser <n...@zurich.ibm.com>
+ *
+ * Copyright (c) 2008-2011, IBM Corporation
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * BSD license below:
+ *
+ *   Redistribution and use in source and binary forms, with or
+ *   without modification, are permitted provided that the following
+ *   conditions are met:
+ *
+ *   - Redistributions of source code must retain the above copyright notice,
+ *     this list of conditions and the following disclaimer.
+ *
+ *   - Redistributions in binary form must reproduce the above copyright
+ *     notice, this list of conditions and the following disclaimer in the
+ *     documentation and/or other materials provided with the distribution.
+ *
+ *   - Neither the name of IBM nor the names of its contributors may be
+ *     used to endorse or promote products derived from this software without
+ *     specific prior written permission.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/errno.h>
+#include <linux/types.h>
+#include <linux/net.h>
+#include <linux/file.h>
+#include <linux/scatterlist.h>
+#include <linux/highmem.h>
+#include <net/sock.h>
+#include <net/tcp_states.h>
+#include <net/tcp.h>
+
+#include <rdma/iw_cm.h>
+#include <rdma/ib_verbs.h>
+#include <rdma/ib_smi.h>
+#include <rdma/ib_user_verbs.h>
+#include <rdma/ib_umem.h>
+
+#include "siw.h"
+#include "siw_obj.h"
+#include "siw_cm.h"
+
+
+#if DPRINT_MASK > 0
+static char siw_qp_state_to_string[SIW_QP_STATE_COUNT][sizeof "TERMINATE"] = {
+       [SIW_QP_STATE_IDLE]             = "IDLE",
+       [SIW_QP_STATE_RTR]              = "RTR",
+       [SIW_QP_STATE_RTS]              = "RTS",
+       [SIW_QP_STATE_CLOSING]          = "CLOSING",
+       [SIW_QP_STATE_TERMINATE]        = "TERMINATE",
+       [SIW_QP_STATE_ERROR]            = "ERROR",
+       [SIW_QP_STATE_MORIBUND]         = "MORIBUND",
+       [SIW_QP_STATE_UNDEF]            = "UNDEF"
+};
+#endif
+
+/*
+ * iWARP (RDMAP, DDP and MPA) parameters as well as Softiwarp settings on a
+ * per-RDMAP message basis. Please keep order of initializer. All MPA len
+ * is initialized to minimum packet size.
+ */
+struct iwarp_msg_info iwarp_pktinfo[RDMAP_TERMINATE + 1] = { {
+       .hdr_len = sizeof(struct iwarp_rdma_write),
+       .ctrl.mpa_len = htons(sizeof(struct iwarp_rdma_write) - 2),
+       .ctrl.ddp_rdmap_ctrl = DDP_FLAG_TAGGED | DDP_FLAG_LAST
+               | cpu_to_be16(DDP_VERSION << 8)
+               | cpu_to_be16(RDMAP_VERSION << 6)
+               | cpu_to_be16(RDMAP_RDMA_WRITE),
+       .proc_data = siw_proc_write
+},
+{
+       .hdr_len = sizeof(struct iwarp_rdma_rreq),
+       .ctrl.mpa_len = htons(sizeof(struct iwarp_rdma_rreq) - 2),
+       .ctrl.ddp_rdmap_ctrl = DDP_FLAG_LAST
+               | cpu_to_be16(DDP_VERSION << 8)
+               | cpu_to_be16(RDMAP_VERSION << 6)
+               | cpu_to_be16(RDMAP_RDMA_READ_REQ),
+       .proc_data = siw_proc_rreq
+},
+{
+       .hdr_len = sizeof(struct iwarp_rdma_rresp),
+       .ctrl.mpa_len = htons(sizeof(struct iwarp_rdma_rresp) - 2),
+       .ctrl.ddp_rdmap_ctrl = DDP_FLAG_TAGGED | DDP_FLAG_LAST
+               | cpu_to_be16(DDP_VERSION << 8)
+               | cpu_to_be16(RDMAP_VERSION << 6)
+               | cpu_to_be16(RDMAP_RDMA_READ_RESP),
+       .proc_data = siw_proc_rresp
+},
+{
+       .hdr_len = sizeof(struct iwarp_send),
+       .ctrl.mpa_len = htons(sizeof(struct iwarp_send) - 2),
+       .ctrl.ddp_rdmap_ctrl = DDP_FLAG_LAST
+               | cpu_to_be16(DDP_VERSION << 8)
+               | cpu_to_be16(RDMAP_VERSION << 6)
+               | cpu_to_be16(RDMAP_SEND),
+       .proc_data = siw_proc_send
+},
+{
+       .hdr_len = sizeof(struct iwarp_send_inv),
+       .ctrl.mpa_len = htons(sizeof(struct iwarp_send_inv) - 2),
+       .ctrl.ddp_rdmap_ctrl = DDP_FLAG_LAST
+               | cpu_to_be16(DDP_VERSION << 8)
+               | cpu_to_be16(RDMAP_VERSION << 6)
+               | cpu_to_be16(RDMAP_SEND_INVAL),
+       .proc_data = siw_proc_unsupp
+},
+{
+       .hdr_len = sizeof(struct iwarp_send),
+       .ctrl.mpa_len = htons(sizeof(struct iwarp_send) - 2),
+       .ctrl.ddp_rdmap_ctrl = DDP_FLAG_LAST
+               | cpu_to_be16(DDP_VERSION << 8)
+               | cpu_to_be16(RDMAP_VERSION << 6)
+               | cpu_to_be16(RDMAP_SEND_SE),
+       .proc_data = siw_proc_send
+},
+{
+       .hdr_len = sizeof(struct iwarp_send_inv),
+       .ctrl.mpa_len = htons(sizeof(struct iwarp_send_inv) - 2),
+       .ctrl.ddp_rdmap_ctrl = DDP_FLAG_LAST
+               | cpu_to_be16(DDP_VERSION << 8)
+               | cpu_to_be16(RDMAP_VERSION << 6)
+               | cpu_to_be16(RDMAP_SEND_SE_INVAL),
+       .proc_data = siw_proc_unsupp
+},
+{
+       .hdr_len = sizeof(struct iwarp_terminate),
+       .ctrl.mpa_len = htons(sizeof(struct iwarp_terminate) - 2),
+       .ctrl.ddp_rdmap_ctrl = DDP_FLAG_LAST
+               | cpu_to_be16(DDP_VERSION << 8)
+               | cpu_to_be16(RDMAP_VERSION << 6)
+               | cpu_to_be16(RDMAP_TERMINATE),
+       .proc_data = siw_proc_terminate
+} };
+
+
+static void siw_qp_llp_data_ready(struct sock *sk, int flags)
+{
+       struct siw_qp           *qp;
+
+       read_lock(&sk->sk_callback_lock);
+
+       if (unlikely(!sk->sk_user_data || !sk_to_qp(sk))) {
+               dprint(DBG_ON, " No QP: %p\n", sk->sk_user_data);
+               goto done;
+       }
+       qp = sk_to_qp(sk);
+
+       if (down_read_trylock(&qp->state_lock)) {
+               read_descriptor_t       rd_desc = {.arg.data = qp, .count = 1};
+
+               dprint(DBG_SK|DBG_RX, "(QP%d): "
+                       "state (before tcp_read_sock)=%d, flags=%x\n",
+                       QP_ID(qp), qp->attrs.state, flags);
+
+               if (likely(qp->attrs.state == SIW_QP_STATE_RTS))
+                       /*
+                        * Implements data receive operation during
+                        * socket callback. TCP gracefully catches
+                        * the case where there is nothing to receive
+                        * (not calling siw_tcp_rx_data() then).
+                        */
+                       tcp_read_sock(sk, &rd_desc, siw_tcp_rx_data);
+
+               dprint(DBG_SK|DBG_RX, "(QP%d): "
+                       "state (after tcp_read_sock)=%d, flags=%x\n",
+                       QP_ID(qp), qp->attrs.state, flags);
+
+               up_read(&qp->state_lock);
+       } else {
+               dprint(DBG_SK|DBG_RX, "(QP%d): "
+                       "Unable to acquire state_lock\n", QP_ID(qp));
+       }
+done:
+       read_unlock(&sk->sk_callback_lock);
+}
+
+
+void siw_qp_llp_close(struct siw_qp *qp)
+{
+       dprint(DBG_CM, "(QP%d): Enter: SIW QP state = %s, cep=0x%p\n",
+               QP_ID(qp), siw_qp_state_to_string[qp->attrs.state],
+               qp->cep);
+
+       down_write(&qp->state_lock);
+
+       qp->rx_ctx.rx_suspend = 1;
+       qp->tx_ctx.tx_suspend = 1;
+       qp->attrs.llp_stream_handle = NULL;
+
+       switch (qp->attrs.state) {
+
+       case SIW_QP_STATE_RTS:
+       case SIW_QP_STATE_RTR:
+       case SIW_QP_STATE_IDLE:
+       case SIW_QP_STATE_TERMINATE:
+
+               qp->attrs.state = SIW_QP_STATE_ERROR;
+
+               break;
+       /*
+        * SIW_QP_STATE_CLOSING:
+        *
+        * This is a forced close. shall the QP be moved to
+        * ERROR or IDLE ?
+        */
+       case SIW_QP_STATE_CLOSING:
+               if (!TX_IDLE(qp))
+                       qp->attrs.state = SIW_QP_STATE_ERROR;
+               else
+                       qp->attrs.state = SIW_QP_STATE_IDLE;
+
+               break;
+
+       default:
+               dprint(DBG_CM, " No state transition needed: %d\n",
+                       qp->attrs.state);
+               break;
+       }
+       siw_sq_flush(qp);
+       siw_rq_flush(qp);
+
+       up_write(&qp->state_lock);
+
+       dprint(DBG_CM, "(QP%d): Exit: SIW QP state = %s\n",
+               QP_ID(qp), siw_qp_state_to_string[qp->attrs.state]);
+}
+
+
+/*
+ * socket callback routine informing about newly available send space.
+ * Function schedules SQ work for processing SQ items.
+ */
+static void siw_qp_llp_write_space(struct sock *sk)
+{
+       struct siw_qp   *qp = sk_to_qp(sk);
+
+       /*
+        * TODO:
+        * Resemble sk_stream_write_space() logic for iWARP constraints:
+        * Clear SOCK_NOSPACE only if sendspace may hold some reasonable
+        * sized FPDU.
+        */
+#ifdef SIW_TX_FULLSEGS
+       struct socket *sock = sk->sk_socket;
+       if (sk_stream_wspace(sk) >= (int)qp->tx_ctx.fpdu_len && sock) {
+               clear_bit(SOCK_NOSPACE, &sock->flags);
+               siw_sq_queue_work(qp);
+       }
+#else
+       sk_stream_write_space(sk);
+
+       if (!test_bit(SOCK_NOSPACE, &sk->sk_socket->flags))
+               siw_sq_queue_work(qp);
+#endif
+}
+
+static void siw_qp_socket_assoc(struct socket *s, struct siw_qp *qp)
+{
+       struct sock *sk = s->sk;
+
+       write_lock_bh(&sk->sk_callback_lock);
+
+       qp->attrs.llp_stream_handle = s;
+       s->sk->sk_data_ready = siw_qp_llp_data_ready;
+       s->sk->sk_write_space = siw_qp_llp_write_space;
+
+       write_unlock_bh(&sk->sk_callback_lock);
+}
+
+
+static int siw_qp_irq_init(struct siw_qp *qp, int size)
+{
+       struct siw_wqe *wqe = NULL;
+       int i = 0;
+
+       dprint(DBG_CM|DBG_WR, "(QP%d): irq size: %d\n", QP_ID(qp), i);
+       if (size <= 0)
+               return 0;
+
+       atomic_set(&qp->irq_space, size);
+
+       while (size--) {
+               wqe = kzalloc(sizeof(struct siw_wqe), GFP_KERNEL);
+               if (!wqe)
+                       break;
+
+               INIT_LIST_HEAD(&wqe->list);
+               list_add(&wqe->list, &qp->freeq);
+               i++;
+               SIW_INC_STAT_WQE;
+       }
+       if (!wqe) {
+               dprint(DBG_ON, "(QP%d): Failed\n", QP_ID(qp));
+               while (i--) {
+                       wqe = list_first_wqe(&qp->freeq);
+                       list_del(&wqe->list);
+                       kfree(wqe);
+                       SIW_DEC_STAT_WQE;
+               }
+               atomic_set(&qp->irq_space, 0);
+               return -ENOMEM;
+       }
+       return 0;
+}
+
+
+static void siw_send_terminate(struct siw_qp *qp)
+{
+       struct iwarp_terminate  pkt;
+
+       memset(&pkt, 0, sizeof pkt);
+       /*
+        * TODO: send TERMINATE
+        */
+       dprint(DBG_CM, "(QP%d): Todo\n", QP_ID(qp));
+}
+
+
+static int siw_qp_enable_crc(struct siw_qp *qp)
+{
+       struct siw_iwarp_rx *c_rx = &qp->rx_ctx;
+       struct siw_iwarp_tx *c_tx = &qp->tx_ctx;
+       int rv = 0;
+
+       c_tx->mpa_crc_hd.tfm = crypto_alloc_hash("crc32c", 0,
+                                                CRYPTO_ALG_ASYNC);
+       if (IS_ERR(c_tx->mpa_crc_hd.tfm)) {
+               rv = -PTR_ERR(c_tx->mpa_crc_hd.tfm);
+               goto out;
+       }
+       c_rx->mpa_crc_hd.tfm = crypto_alloc_hash("crc32c", 0,
+                                                CRYPTO_ALG_ASYNC);
+       if (IS_ERR(c_rx->mpa_crc_hd.tfm)) {
+               rv = -PTR_ERR(c_rx->mpa_crc_hd.tfm);
+               crypto_free_hash(c_tx->mpa_crc_hd.tfm);
+       }
+out:
+       if (rv)
+               dprint(DBG_ON, "(QP%d): Failed loading crc32c: error=%d.",
+                       QP_ID(qp), rv);
+       else
+               c_tx->crc_enabled = c_rx->crc_enabled = 1;
+
+       return rv;
+}
+
+
+/*
+ * caller holds qp->state_lock
+ */
+int
+siw_qp_modify(struct siw_qp *qp, struct siw_qp_attrs *attrs,
+             enum siw_qp_attr_mask mask)
+{
+       int     drop_conn = 0, rv = 0;
+
+       if (!mask)
+               return 0;
+
+       dprint(DBG_CM, "(QP%d)\n", QP_ID(qp));
+
+       if (mask != SIW_QP_ATTR_STATE) {
+               /*
+                * changes of qp attributes (maybe state, too)
+                */
+               if (mask & SIW_QP_ATTR_ACCESS_FLAGS) {
+
+                       if (attrs->flags & SIW_RDMA_BIND_ENABLED)
+                               qp->attrs.flags |= SIW_RDMA_BIND_ENABLED;
+                       else
+                               qp->attrs.flags &= ~SIW_RDMA_BIND_ENABLED;
+
+                       if (attrs->flags & SIW_RDMA_WRITE_ENABLED)
+                               qp->attrs.flags |= SIW_RDMA_WRITE_ENABLED;
+                       else
+                               qp->attrs.flags &= ~SIW_RDMA_WRITE_ENABLED;
+
+                       if (attrs->flags & SIW_RDMA_READ_ENABLED)
+                               qp->attrs.flags |= SIW_RDMA_READ_ENABLED;
+                       else
+                               qp->attrs.flags &= ~SIW_RDMA_WRITE_ENABLED;
+
+               }
+               /*
+                * TODO: what else ??
+                */
+       }
+       if (!(mask & SIW_QP_ATTR_STATE))
+               return 0;
+
+       dprint(DBG_CM, "(QP%d): SIW QP state: %s => %s\n", QP_ID(qp),
+               siw_qp_state_to_string[qp->attrs.state],
+               siw_qp_state_to_string[attrs->state]);
+
+
+       switch (qp->attrs.state) {
+
+       case SIW_QP_STATE_IDLE:
+       case SIW_QP_STATE_RTR:
+
+               switch (attrs->state) {
+
+               case SIW_QP_STATE_RTS:
+
+                       if (attrs->mpa.crc) {
+                               rv = siw_qp_enable_crc(qp);
+                               if (rv)
+                                       break;
+                       }
+                       if (!(mask & SIW_QP_ATTR_LLP_HANDLE)) {
+                               dprint(DBG_ON, "(QP%d): socket?\n", QP_ID(qp));
+                               rv = -EINVAL;
+                               break;
+                       }
+                       if (!(mask & SIW_QP_ATTR_MPA)) {
+                               dprint(DBG_ON, "(QP%d): MPA?\n", QP_ID(qp));
+                               rv = -EINVAL;
+                               break;
+                       }
+                       dprint(DBG_CM, "(QP%d): Enter RTS: "
+                               "peer 0x%08x, local 0x%08x\n", QP_ID(qp),
+                               qp->cep->llp.raddr.sin_addr.s_addr,
+                               qp->cep->llp.laddr.sin_addr.s_addr);
+                       /*
+                        * Initialize global iWARP TX state
+                        */
+                       qp->tx_ctx.ddp_msn[RDMAP_UNTAGGED_QN_SEND] = 0;
+                       qp->tx_ctx.ddp_msn[RDMAP_UNTAGGED_QN_RDMA_READ] = 0;
+                       qp->tx_ctx.ddp_msn[RDMAP_UNTAGGED_QN_TERMINATE] = 0;
+
+                       /*
+                        * Initialize global iWARP RX state
+                        */
+                       qp->rx_ctx.ddp_msn[RDMAP_UNTAGGED_QN_SEND] = 1;
+                       qp->rx_ctx.ddp_msn[RDMAP_UNTAGGED_QN_RDMA_READ] = 1;
+                       qp->rx_ctx.ddp_msn[RDMAP_UNTAGGED_QN_TERMINATE] = 1;
+
+                       /*
+                        * init IRD freequeue, caller has already checked
+                        * limits. Add one extra entry since after sending
+                        * the RResponse it may trigger another peer RRequest
+                        * before the RResponse goes back to free queue.
+                        */
+                       ++attrs->ird;
+                       rv = siw_qp_irq_init(qp, attrs->ird);
+                       if (rv)
+                               break;
+
+                       atomic_set(&qp->orq_space, attrs->ord);
+
+                       qp->attrs.ord = attrs->ord;
+                       qp->attrs.ird = attrs->ird;
+                       qp->attrs.mpa = attrs->mpa;
+                       /*
+                        * move socket rx and tx under qp's control
+                        */
+                       siw_qp_socket_assoc(attrs->llp_stream_handle, qp);
+
+                       qp->attrs.state = SIW_QP_STATE_RTS;
+                       /*
+                        * set initial mss
+                        */
+                       qp->tx_ctx.tcp_seglen =
+                               get_tcp_mss(attrs->llp_stream_handle->sk);
+
+                       break;
+
+               case SIW_QP_STATE_ERROR:
+                       siw_rq_flush(qp);
+                       qp->attrs.state = SIW_QP_STATE_ERROR;
+                       if (qp->cep) {
+                               siw_cep_put(qp->cep);
+                               qp->cep = NULL;
+                       }
+                       break;
+
+               case SIW_QP_STATE_RTR:
+                       /* ignore */
+                       break;
+
+               default:
+                       dprint(DBG_CM,
+                               " QP state transition undefined: %s => %s\n",
+                               siw_qp_state_to_string[qp->attrs.state],
+                               siw_qp_state_to_string[attrs->state]);
+                       break;
+               }
+               break;
+
+       case SIW_QP_STATE_RTS:
+
+               switch (attrs->state) {
+
+               case SIW_QP_STATE_CLOSING:
+                       /*
+                        * Verbs: move to IDLE if SQ and ORQ are empty.
+                        * Move to ERROR otherwise. But first of all we must
+                        * close the connection. So we keep CLOSING or ERROR
+                        * as a transient state, schedule connection drop work
+                        * and wait for the socket state change upcall to
+                        * come back closed.
+                        */
+                       if (TX_IDLE(qp))
+                               qp->attrs.state = SIW_QP_STATE_CLOSING;
+                       else {
+                               qp->attrs.state = SIW_QP_STATE_ERROR;
+                               siw_sq_flush(qp);
+                       }
+                       siw_rq_flush(qp);
+
+                       drop_conn = 1;
+                       break;
+
+               case SIW_QP_STATE_TERMINATE:
+                       qp->attrs.state = SIW_QP_STATE_TERMINATE;
+                       siw_send_terminate(qp);
+                       drop_conn = 1;
+
+                       break;
+
+               case SIW_QP_STATE_ERROR:
+                       /*
+                        * This is an emergency close.
+                        *
+                        * Any in progress transmit operation will get
+                        * cancelled.
+                        * This will likely result in a protocol failure,
+                        * if a TX operation is in transit. The caller
+                        * could unconditional wait to give the current
+                        * operation a chance to complete.
+                        * Esp., how to handle the non-empty IRQ case?
+                        * The peer was asking for data transfer at a valid
+                        * point in time.
+                        */
+                       siw_sq_flush(qp);
+                       siw_rq_flush(qp);
+                       qp->attrs.state = SIW_QP_STATE_ERROR;
+                       drop_conn = 1;
+
+                       break;
+
+               default:
+                       dprint(DBG_ON,
+                               " QP state transition undefined: %s => %s\n",
+                               siw_qp_state_to_string[qp->attrs.state],
+                               siw_qp_state_to_string[attrs->state]);
+                       break;
+               }
+               break;
+
+       case SIW_QP_STATE_TERMINATE:
+
+               switch (attrs->state) {
+
+               case SIW_QP_STATE_ERROR:
+                       siw_rq_flush(qp);
+                       qp->attrs.state = SIW_QP_STATE_ERROR;
+
+                       if (!TX_IDLE(qp))
+                               siw_sq_flush(qp);
+
+                       break;
+
+               default:
+                       dprint(DBG_ON,
+                               " QP state transition undefined: %s => %s\n",
+                               siw_qp_state_to_string[qp->attrs.state],
+                               siw_qp_state_to_string[attrs->state]);
+               }
+               break;
+
+       case SIW_QP_STATE_CLOSING:
+
+               switch (attrs->state) {
+
+               case SIW_QP_STATE_IDLE:
+                       BUG_ON(!TX_IDLE(qp));
+                       qp->attrs.state = SIW_QP_STATE_IDLE;
+
+                       break;
+
+               case SIW_QP_STATE_CLOSING:
+                       /*
+                        * The LLP may already moved the QP to closing
+                        * due to graceful peer close init
+                        */
+                       break;
+
+               case SIW_QP_STATE_ERROR:
+                       /*
+                        * QP was moved to CLOSING by LLP event
+                        * not yet seen by user.
+                        */
+                       qp->attrs.state = SIW_QP_STATE_ERROR;
+
+                       if (!TX_IDLE(qp))
+                               siw_sq_flush(qp);
+
+                       siw_rq_flush(qp);
+
+                       break;
+
+               default:
+                       dprint(DBG_CM,
+                               " QP state transition undefined: %s => %s\n",
+                               siw_qp_state_to_string[qp->attrs.state],
+                               siw_qp_state_to_string[attrs->state]);
+                       return -ECONNABORTED;
+               }
+               break;
+
+       default:
+               dprint(DBG_CM, " NOP: State: %d\n", qp->attrs.state);
+               break;
+       }
+       if (drop_conn)
+               siw_qp_cm_drop(qp, 0);
+
+       return rv;
+}
+
+struct ib_qp *siw_get_ofaqp(struct ib_device *ofa_dev, int id)
+{
+       struct siw_qp *qp =  siw_qp_id2obj(siw_dev_ofa2siw(ofa_dev), id);
+
+       dprint(DBG_OBJ, ": dev_name: %s, OFA QPID: %d, QP: %p\n",
+               ofa_dev->name, id, qp);
+       if (qp) {
+               /*
+                * siw_qp_id2obj() increments object reference count
+                */
+               siw_qp_put(qp);
+               dprint(DBG_OBJ, " QPID: %d\n", QP_ID(qp));
+               return &qp->ofa_qp;
+       }
+       return (struct ib_qp *)NULL;
+}
+
+/*
+ * siw_check_mem()
+ *
+ * Check protection domain, STAG state, access permissions and
+ * address range for memory object.
+ *
+ * @pd:                Protection Domain memory should belong to
+ * @mem:       memory to be checked
+ * @addr:      starting addr of mem
+ * @perms:     requested access permissions
+ * @len:       len of memory interval to be checked
+ *
+ */
+int siw_check_mem(struct siw_pd *pd, struct siw_mem *mem, u64 addr,
+                 enum siw_access_flags perms, int len)
+{
+       if (siw_mem2mr(mem)->pd != pd) {
+               dprint(DBG_WR|DBG_ON, "(PD%d): PD mismatch %p : %p\n",
+                       OBJ_ID(pd),
+                       siw_mem2mr(mem)->pd, pd);
+
+               return -EINVAL;
+       }
+       if (mem->stag_state == STAG_INVALID) {
+               dprint(DBG_WR|DBG_ON, "(PD%d): STAG 0x%08x invalid\n",
+                       OBJ_ID(pd), OBJ_ID(mem));
+               return -EPERM;
+       }
+       /*
+        * check access permissions
+        */
+       if ((mem->perms & perms) < perms) {
+               dprint(DBG_WR|DBG_ON, "(PD%d): "
+                       "INSUFFICIENT permissions 0x%08x : 0x%08x\n",
+                       OBJ_ID(pd), mem->perms, perms);
+               return -EPERM;
+       }
+       /*
+        * Check address interval: we relax check to allow memory shrinked
+        * from the start address _after_ placing or fetching len bytes.
+        * TODO: this relaxation is probably overdone
+        */
+       if (addr < mem->va || addr + len > mem->va + mem->len) {
+               dprint(DBG_WR|DBG_ON, "(PD%d): MEM interval len %d "
+                       "[0x%016llx, 0x%016llx) out of bounds "
+                       "[0x%016llx, 0x%016llx) for LKey=0x%08x\n",
+                       OBJ_ID(pd), len, (unsigned long long)addr,
+                       (unsigned long long)(addr + len),
+                       (unsigned long long)mem->va,
+                       (unsigned long long)(mem->va + mem->len),
+                       OBJ_ID(mem));
+
+               return -EINVAL;
+       }
+       return 0;
+}
+
+/*
+ * siw_check_sge()
+ *
+ * Check SGE for access rights in given interval
+ *
+ * @pd:                Protection Domain memory should belong to
+ * @sge:       SGE to be checked
+ * @perms:     requested access permissions
+ * @off:       starting offset in SGE
+ * @len:       len of memory interval to be checked
+ *
+ * NOTE: Function references each SGE's memory object (sge->mem)
+ * if not yet done. New reference is kept if check went ok and
+ * released if check failed. If sge->mem is already valid, no new
+ * lookup is being done and mem is not released it check fails.
+ */
+int
+siw_check_sge(struct siw_pd *pd, struct siw_sge *sge,
+             enum siw_access_flags perms, u32 off, int len)
+{
+       struct siw_dev  *sdev = pd->hdr.sdev;
+       struct siw_mem  *mem;
+       int             new_ref = 0, rv = 0;
+
+       if (len + off > sge->len) {
+               rv = -EPERM;
+               goto fail;
+       }
+       if (sge->mem.obj == NULL) {
+               mem = siw_mem_id2obj(sdev, sge->lkey >> 8);
+               if (!mem) {
+                       rv = -EINVAL;
+                       goto fail;
+               }
+               sge->mem.obj = mem;
+               new_ref = 1;
+       } else {
+               mem = sge->mem.obj;
+               new_ref = 0;
+       }
+       rv = siw_check_mem(pd, mem, sge->addr + off, perms, len);
+       if (rv)
+               goto fail;
+
+       return 0;
+
+fail:
+       if (new_ref) {
+               siw_mem_put(mem);
+               sge->mem.obj = NULL;
+       }
+       return rv;
+}
+
+
+/*
+ * siw_check_sgl()
+ *
+ * Check permissions for a list of SGE's (SGL)
+ *
+ * @pd:                Protection Domain SGL should belong to
+ * @sge:       List of SGE to be checked
+ * @perms:     requested access permissions
+ * @off:       starting offset in SGL
+ * @len:       len of memory interval to be checked
+ *
+ * Function checks only subinterval of SGL described by bytelen @len,
+ * check starts with byte offset @off which must be within
+ * the length of the first SGE.
+ *
+ * The caller is responsible for keeping @len + @off within
+ * the total byte len of the SGL.
+ */
+
+int siw_check_sgl(struct siw_pd *pd, struct siw_sge *sge,
+                 enum siw_access_flags perms, u32 off, int len)
+{
+       int     rv = 0;
+
+       dprint(DBG_WR, "(PD%d): Enter\n", OBJ_ID(pd));
+
+       BUG_ON(off >= sge->len);
+
+       while (len > 0) {
+               dprint(DBG_WR, "(PD%d): sge=%p, perms=0x%x, "
+                       "len=%d, off=%u, sge->len=%d\n",
+                       OBJ_ID(pd), sge, perms, len, off, sge->len);
+               /*
+                * rdma verbs: do not check stag for a zero length sge
+                */
+               if (sge->len == 0) {
+                       sge++;
+                       continue;
+               }
+
+               rv = siw_check_sge(pd, sge, perms, off, sge->len - off);
+               if (rv)
+                       break;
+
+               len -= sge->len - off;
+               off = 0;
+               sge++;
+       }
+       return rv;
+}
+
+int siw_crc_array(struct hash_desc *desc, u8 *start, size_t len)
+{
+       struct scatterlist sg;
+
+       sg_init_one(&sg, start, len);
+       return crypto_hash_update(desc, &sg, len);
+}
+
+int siw_crc_sg(struct hash_desc *desc, struct scatterlist *sg,
+              int off, int len)
+{
+       int rv;
+
+       if (off == 0)
+               rv = crypto_hash_update(desc, sg, len);
+       else {
+               struct scatterlist t_sg;
+
+               sg_init_table(&t_sg, 1);
+               sg_set_page(&t_sg, sg_page(sg), len, off);
+               rv = crypto_hash_update(desc, &t_sg, len);
+       }
+       return rv;
+}
+
+
+/*
+ * siw_sq_flush()
+ *
+ * Flush SQ and ORRQ entries to CQ.
+ * IRRQ entries are silently dropped.
+ *
+ * TODO: Add termination code for in-progress WQE.
+ * TODO: an in-progress WQE may have been partially
+ *       processed. It should be enforced, that transmission
+ *       of a started DDP segment must be completed if possible
+ *       by any chance.
+ *
+ * Must be called with qp state write lock held.
+ * Therefore, SQ and ORQ lock must not be taken.
+ */
+void siw_sq_flush(struct siw_qp *qp)
+{
+       struct list_head        *pos, *n;
+       struct siw_wqe          *wqe = tx_wqe(qp);
+       struct siw_cq           *cq = qp->scq;
+       int                     async_event = 0;
+
+       dprint(DBG_OBJ|DBG_CM|DBG_WR, "(QP%d): Enter\n", QP_ID(qp));
+
+       /*
+        * flush the in-progress wqe, if there.
+        */
+       if (wqe) {
+               /*
+                * TODO: Add iWARP Termination code
+                */
+               tx_wqe(qp) = NULL;
+
+               dprint(DBG_WR,
+                       " (QP%d): Flush current WQE %p, type %d\n",
+                       QP_ID(qp), wqe, wr_type(wqe));
+
+               if (wr_type(wqe) == SIW_WR_RDMA_READ_RESP) {
+                       siw_wqe_put(wqe);
+                       wqe = NULL;
+               } else if (wr_type(wqe) != SIW_WR_RDMA_READ_REQ)
+                       /*
+                        *  A RREQUEST is already on the ORRQ
+                        */
+                       list_add_tail(&wqe->list, &qp->orq);
+       }
+       if (!list_empty(&qp->irq))
+               list_for_each_safe(pos, n, &qp->irq) {
+                       wqe = list_entry_wqe(pos);
+                       dprint(DBG_WR,
+                               " (QP%d): Flush IRQ WQE %p, status %d\n",
+                               QP_ID(qp), wqe, wqe->wr_status);
+                       list_del(&wqe->list);
+                       siw_wqe_put(wqe);
+               }
+
+       if (!list_empty(&qp->orq))
+               list_for_each_safe(pos, n, &qp->orq) {
+                       wqe = list_entry_wqe(pos);
+                       dprint(DBG_WR,
+                               " (QP%d): Flush ORQ WQE %p, type %d,"
+                               " status %d\n", QP_ID(qp), wqe, wr_type(wqe),
+                               wqe->wr_status);
+                       if (wqe->wr_status != SR_WR_DONE) {
+                               async_event = 1;
+                               wqe->wc_status = IB_WC_WR_FLUSH_ERR;
+                               wqe->wr_status = SR_WR_DONE;
+                       }
+                       if (cq) {
+                               lock_cq(cq);
+                               list_move_tail(&wqe->list, &cq->queue);
+                               /* TODO: enforce CQ limits */
+                               atomic_inc(&cq->qlen);
+                               unlock_cq(cq);
+                       } else {
+                               list_del(&wqe->list);
+                               siw_wqe_put(wqe);
+                       }
+               }
+       if (!list_empty(&qp->sq))
+               async_event = 1;
+               list_for_each_safe(pos, n, &qp->sq) {
+                       wqe = list_entry_wqe(pos);
+                       dprint(DBG_WR,
+                               " (QP%d): Flush SQ WQE %p, type %d\n",
+                               QP_ID(qp), wqe, wr_type(wqe));
+                       if (cq) {
+                               wqe->wc_status = IB_WC_WR_FLUSH_ERR;
+                               wqe->wr_status = SR_WR_DONE;
+                               lock_cq(cq);
+                               list_move_tail(&wqe->list, &cq->queue);
+                               /* TODO: enforce CQ limits */
+                               atomic_inc(&cq->qlen);
+                               unlock_cq(cq);
+                       } else  {
+                               list_del(&wqe->list);
+                               siw_wqe_put(wqe);
+                       }
+               }
+       atomic_set(&qp->sq_space, qp->attrs.sq_size);
+
+       if (wqe != NULL && cq != NULL && cq->ofa_cq.comp_handler != NULL)
+               (*cq->ofa_cq.comp_handler)(&cq->ofa_cq, cq->ofa_cq.cq_context);
+
+       if (async_event)
+               siw_qp_event(qp, IB_EVENT_SQ_DRAINED);
+}
+
+/*
+ * siw_rq_flush()
+ *
+ * Flush recv queue entries to cq. An in-progress WQE may have some bytes
+ * processed (wqe->processed).
+ *
+ * Must be called with qp state write lock held.
+ * Therefore, RQ lock must not be taken.
+ */
+void siw_rq_flush(struct siw_qp *qp)
+{
+       struct list_head        *pos, *n;
+       struct siw_wqe          *wqe;
+       struct siw_cq           *cq;
+
+       dprint(DBG_OBJ|DBG_CM|DBG_WR, "(QP%d): Enter\n", QP_ID(qp));
+
+       /*
+        * Flush an in-progess WQE if present
+        */
+       if (rx_wqe(qp)) {
+               if (__rdmap_opcode(&qp->rx_ctx.hdr.ctrl) != RDMAP_RDMA_WRITE)
+                       list_add(&rx_wqe(qp)->list, &qp->rq);
+               else
+                       siw_mem_put(rx_mem(qp));
+
+               rx_wqe(qp) = NULL;
+       }
+       if (list_empty(&qp->rq))
+               return;
+
+       cq = qp->rcq;
+
+       list_for_each_safe(pos, n, &qp->rq) {
+               wqe = list_entry_wqe(pos);
+               list_del_init(&wqe->list);
+               if (cq) {
+                       wqe->wc_status = IB_WC_WR_FLUSH_ERR;
+                       lock_cq(cq);
+                       list_add_tail(&wqe->list, &cq->queue);
+                       /* TODO: enforce CQ limits */
+                       atomic_inc(&cq->qlen);
+                       unlock_cq(cq);
+               } else
+                       siw_wqe_put(wqe);
+
+               if (!qp->srq)
+                       atomic_inc(&qp->rq_space);
+               else
+                       atomic_inc(&qp->srq->space);
+
+       }
+       if (cq != NULL && cq->ofa_cq.comp_handler != NULL)
+               (*cq->ofa_cq.comp_handler)(&cq->ofa_cq, cq->ofa_cq.cq_context);
+}
-- 
1.5.4.3

--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Reply via email to