Author: np
Date: Tue Apr 12 21:34:04 2016
New Revision: 297879
URL: https://svnweb.freebsd.org/changeset/base/297879

Log:
  Add fastreg support to krping (ported from upstream).
  
  Submitted by: Krishnamraju Eraparaju @ Chelsio
  Sponsored by: Chelsio Communications
  Differential Revision:        https://reviews.freebsd.org/D5777

Modified:
  head/sys/contrib/rdma/krping/krping.c

Modified: head/sys/contrib/rdma/krping/krping.c
==============================================================================
--- head/sys/contrib/rdma/krping/krping.c       Tue Apr 12 21:29:06 2016        
(r297878)
+++ head/sys/contrib/rdma/krping/krping.c       Tue Apr 12 21:34:04 2016        
(r297879)
@@ -56,6 +56,7 @@ __FBSDID("$FreeBSD$");
 extern int krping_debug;
 #define DEBUG_LOG(cb, x...) if (krping_debug) krping_printf((cb)->cookie, x)
 #define PRINTF(cb, x...) krping_printf((cb)->cookie, x)
+#define BIND_INFO 1
 
 MODULE_AUTHOR("Steve Wise");
 MODULE_DESCRIPTION("RDMA ping client/server");
@@ -99,7 +100,7 @@ static const struct krping_option krping
        {"poll", OPT_NOPARAM, 'P'},
        {"local_dma_lkey", OPT_NOPARAM, 'Z'},
        {"read_inv", OPT_NOPARAM, 'R'},
-       {"fr", OPT_NOPARAM, 'f'},
+       {"fr", OPT_INT, 'f'},
        {NULL, 0, 0}
 };
 
@@ -232,6 +233,7 @@ struct krping_cb {
        int txdepth;                    /* SQ depth */
        int local_dma_lkey;             /* use 0 for lkey */
        int frtest;                     /* fastreg test */
+       int testnum;
 
        /* CM stuff */
        struct rdma_cm_id *cm_id;       /* connection on client side,*/
@@ -365,11 +367,7 @@ static void krping_cq_event_handler(stru
                PRINTF(cb, "cq completion in ERROR state\n");
                return;
        }
-       if (cb->frtest) {
-               PRINTF(cb, "cq completion event in frtest!\n");
-               return;
-       }
-       if (!cb->wlat && !cb->rlat && !cb->bw)
+       if (!cb->wlat && !cb->rlat && !cb->bw && !cb->frtest)
                ib_req_notify_cq(cb->cq, IB_CQ_NEXT_COMP);
        while ((ret = ib_poll_cq(cb->cq, 1, &wc)) == 1) {
                if (wc.status) {
@@ -411,7 +409,7 @@ static void krping_cq_event_handler(stru
                        DEBUG_LOG(cb, "recv completion\n");
                        cb->stats.recv_bytes += sizeof(cb->recv_buf);
                        cb->stats.recv_msgs++;
-                       if (cb->wlat || cb->rlat || cb->bw)
+                       if (cb->wlat || cb->rlat || cb->bw || cb->frtest)
                                ret = server_recv(cb, &wc);
                        else
                                ret = cb->server ? server_recv(cb, &wc) :
@@ -464,7 +462,7 @@ static int krping_accept(struct krping_c
                return ret;
        }
 
-       if (!cb->wlat && !cb->rlat && !cb->bw) {
+       if (!cb->wlat && !cb->rlat && !cb->bw && !cb->frtest) {
                wait_event_interruptible(cb->sem, cb->state >= CONNECTED);
                if (cb->state == ERROR) {
                        PRINTF(cb, "wait for CONNECTED state %d\n", 
@@ -502,7 +500,7 @@ static void krping_setup_wr(struct krpin
        cb->sq_wr.sg_list = &cb->send_sgl;
        cb->sq_wr.num_sge = 1;
 
-       if (cb->server || cb->wlat || cb->rlat || cb->bw) {
+       if (cb->server || cb->wlat || cb->rlat || cb->bw || cb->frtest) {
                cb->rdma_sgl.addr = cb->rdma_dma_addr;
                if (cb->mem == MR)
                        cb->rdma_sgl.lkey = cb->rdma_mr->lkey;
@@ -531,7 +529,11 @@ static void krping_setup_wr(struct krpin
        case MW:
                cb->bind_attr.wr_id = 0xabbaabba;
                cb->bind_attr.send_flags = 0; /* unsignaled */
+#ifdef BIND_INFO
                cb->bind_attr.bind_info.length = cb->size;
+#else
+               cb->bind_attr.length = cb->size;
+#endif
                break;
        default:
                break;
@@ -646,7 +648,7 @@ static int krping_setup_buffers(struct k
                        buf.size = cb->size;
                        iovbase = cb->rdma_dma_addr;
                        cb->rdma_mr = ib_reg_phys_mr(cb->pd, &buf, 1, 
-                                            IB_ACCESS_LOCAL_WRITE|
+                                               IB_ACCESS_LOCAL_WRITE|
                                             IB_ACCESS_REMOTE_READ| 
                                             IB_ACCESS_REMOTE_WRITE, 
                                             &iovbase);
@@ -665,7 +667,7 @@ static int krping_setup_buffers(struct k
                }
        }
 
-       if (!cb->server || cb->wlat || cb->rlat || cb->bw) {
+       if (!cb->server || cb->wlat || cb->rlat || cb->bw || cb->frtest) {
 
                cb->start_buf = kmalloc(cb->size, GFP_KERNEL);
                if (!cb->start_buf) {
@@ -682,9 +684,9 @@ static int krping_setup_buffers(struct k
                if (cb->mem == MR || cb->mem == MW) {
                        unsigned flags = IB_ACCESS_REMOTE_READ;
 
-                       if (cb->wlat || cb->rlat || cb->bw) {
+                       if (cb->wlat || cb->rlat || cb->bw || cb->frtest) {
                                flags |= IB_ACCESS_LOCAL_WRITE |
-                                   IB_ACCESS_REMOTE_WRITE;
+                                       IB_ACCESS_REMOTE_WRITE;
                        }
 
                        buf.addr = cb->start_dma_addr;
@@ -907,15 +909,33 @@ static u32 krping_rdma_rkey(struct krpin
                 * Update the MW with new buf info.
                 */
                if (buf == (u64)cb->start_dma_addr) {
+#ifdef BIND_INFO
                        cb->bind_attr.bind_info.mw_access_flags = 
IB_ACCESS_REMOTE_READ;
                        cb->bind_attr.bind_info.mr = cb->start_mr;
+#else
+                       cb->bind_attr.mw_access_flags = IB_ACCESS_REMOTE_READ;
+                       cb->bind_attr.mr = cb->start_mr;
+#endif
                } else {
+#ifdef BIND_INFO
                        cb->bind_attr.bind_info.mw_access_flags = 
IB_ACCESS_REMOTE_WRITE;
                        cb->bind_attr.bind_info.mr = cb->rdma_mr;
+#else
+                       cb->bind_attr.mw_access_flags = IB_ACCESS_REMOTE_WRITE;
+                       cb->bind_attr.mr = cb->rdma_mr;
+#endif
                }
+#ifdef BIND_INFO
                cb->bind_attr.bind_info.addr = buf;
+#else
+               cb->bind_attr.addr = buf;
+#endif
                DEBUG_LOG(cb, "binding mw rkey 0x%x to buf %llx mr rkey 0x%x\n",
+#ifdef BIND_INFO
                        cb->mw->rkey, buf, cb->bind_attr.bind_info.mr->rkey);
+#else
+                       cb->mw->rkey, buf, cb->bind_attr.mr->rkey);
+#endif
                ret = ib_bind_mw(cb->qp, cb->mw, &cb->bind_attr);
                if (ret) {
                        PRINTF(cb, "bind mw error %d\n", ret);
@@ -950,7 +970,7 @@ static void krping_format_send(struct kr
         * advertising the rdma buffer.  Server side
         * sends have no data.
         */
-       if (!cb->server || cb->wlat || cb->rlat || cb->bw) {
+       if (!cb->server || cb->wlat || cb->rlat || cb->bw || cb->frtest) {
                rkey = krping_rdma_rkey(cb, buf, !cb->server_invalidate);
                info->buf = htonll(buf);
                info->rkey = htonl(rkey);
@@ -980,7 +1000,6 @@ static void krping_test_server(struct kr
                cb->rdma_sq_wr.wr.rdma.remote_addr = cb->remote_addr;
                cb->rdma_sq_wr.sg_list->length = cb->remote_len;
                cb->rdma_sgl.lkey = krping_rdma_rkey(cb, cb->rdma_dma_addr, 1);
-               cb->rdma_sq_wr.next = NULL;
 
                /* Issue RDMA Read. */
                if (cb->read_inv)
@@ -1484,7 +1503,6 @@ static void krping_rlat_test_server(stru
                PRINTF(cb, "send completiong error %d\n", wc.status);
                return;
        }
-
        wait_event_interruptible(cb->sem, cb->state == ERROR);
 }
 
@@ -1557,9 +1575,10 @@ static void krping_bw_test_server(struct
        wait_event_interruptible(cb->sem, cb->state == ERROR);
 }
 
-static int fastreg_supported(struct krping_cb *cb)
+static int fastreg_supported(struct krping_cb *cb, int server)
 {
-       struct ib_device *dev = cb->child_cm_id->device;
+       struct ib_device *dev = server?cb->child_cm_id->device:
+                                       cb->cm_id->device;
        struct ib_device_attr attr;
        int ret;
 
@@ -1610,158 +1629,259 @@ static int krping_bind_server(struct krp
                return -1;
        }
 
-       if (cb->mem == FASTREG && !fastreg_supported(cb))
+       if (cb->mem == FASTREG && !fastreg_supported(cb, 1))
                return -EINVAL;
 
        return 0;
 }
 
-static void krping_run_server(struct krping_cb *cb)
+/*
+ * sq-depth worth of fastreg + 0B read-inv pairs, reposting them as the reads
+ * complete.
+ * NOTE: every 9 seconds we sleep for 1 second to keep the kernel happy.
+ */
+static void krping_fr_test5(struct krping_cb *cb)
 {
-       struct ib_recv_wr *bad_wr;
+       struct ib_fast_reg_page_list **pl;
+       struct ib_send_wr *fr, *read, *bad;
+       struct ib_wc wc;
+       struct ib_sge *sgl;
+       u8 key = 0;
+       struct ib_mr **mr;
+       u8 **buf;
+       dma_addr_t *dma_addr;
+       int i;
        int ret;
+       int plen = (((cb->size - 1) & PAGE_MASK) + PAGE_SIZE) >> PAGE_SHIFT;
+       time_t start;
+       int count = 0;
+       int scnt;
+       int depth = cb->txdepth >> 1;
 
-       ret = krping_bind_server(cb);
-       if (ret)
+       if (!depth) {
+               PRINTF(cb, "txdepth must be > 1 for this test!\n");
                return;
-
-       ret = krping_setup_qp(cb, cb->child_cm_id);
-       if (ret) {
-               PRINTF(cb, "setup_qp failed: %d\n", ret);
-               goto err0;
        }
 
-       ret = krping_setup_buffers(cb);
-       if (ret) {
-               PRINTF(cb, "krping_setup_buffers failed: %d\n", ret);
+       pl = kzalloc(sizeof *pl * depth, GFP_KERNEL);
+       DEBUG_LOG(cb, "%s pl %p size %lu\n", __func__, pl, sizeof *pl * depth);
+       mr = kzalloc(sizeof *mr * depth, GFP_KERNEL);
+       DEBUG_LOG(cb, "%s mr %p size %lu\n", __func__, mr, sizeof *mr * depth);
+       fr = kzalloc(sizeof *fr * depth, GFP_KERNEL);
+       DEBUG_LOG(cb, "%s fr %p size %lu\n", __func__, fr, sizeof *fr * depth);
+       sgl = kzalloc(sizeof *sgl * depth, GFP_KERNEL);
+       DEBUG_LOG(cb, "%s sgl %p size %lu\n", __func__, sgl, sizeof *sgl * 
depth);
+       read = kzalloc(sizeof *read * depth, GFP_KERNEL);
+       DEBUG_LOG(cb, "%s read %p size %lu\n", __func__, read, sizeof *read * 
depth);
+       buf = kzalloc(sizeof *buf * depth, GFP_KERNEL);
+       DEBUG_LOG(cb, "%s buf %p size %lu\n", __func__, buf, sizeof *buf * 
depth);
+       dma_addr = kzalloc(sizeof *dma_addr * depth, GFP_KERNEL);
+       DEBUG_LOG(cb, "%s dma_addr %p size %lu\n", __func__, dma_addr, sizeof 
*dma_addr * depth);
+       if (!pl || !mr || !fr || !read || !sgl || !buf || !dma_addr) {
+               PRINTF(cb, "kzalloc failed\n");
                goto err1;
        }
 
-       ret = ib_post_recv(cb->qp, &cb->rq_wr, &bad_wr);
-       if (ret) {
-               PRINTF(cb, "ib_post_recv failed: %d\n", ret);
-               goto err2;
-       }
-
-       ret = krping_accept(cb);
-       if (ret) {
-               PRINTF(cb, "connect error %d\n", ret);
-               goto err2;
-       }
-
-       if (cb->wlat)
-               krping_wlat_test_server(cb);
-       else if (cb->rlat)
-               krping_rlat_test_server(cb);
-       else if (cb->bw)
-               krping_bw_test_server(cb);
-       else
-               krping_test_server(cb);
-       rdma_disconnect(cb->child_cm_id);
-err2:
-       krping_free_buffers(cb);
-err1:
-       krping_free_qp(cb);
-err0:
-       rdma_destroy_id(cb->child_cm_id);
-}
-
-static void krping_test_client(struct krping_cb *cb)
-{
-       int ping, start, cc, i, ret;
-       struct ib_send_wr *bad_wr;
-       unsigned char c;
-
-       start = 65;
-       for (ping = 0; !cb->count || ping < cb->count; ping++) {
-               cb->state = RDMA_READ_ADV;
-
-               /* Put some ascii text in the buffer. */
-               cc = sprintf(cb->start_buf, "rdma-ping-%d: ", ping);
-               for (i = cc, c = start; i < cb->size; i++) {
-                       cb->start_buf[i] = c;
-                       c++;
-                       if (c > 122)
-                               c = 65;
+       for (scnt = 0; scnt < depth; scnt++) {
+               pl[scnt] = ib_alloc_fast_reg_page_list(cb->qp->device, plen);
+               if (IS_ERR(pl[scnt])) {
+                       PRINTF(cb, "alloc_fr_page_list failed %ld\n",
+                              PTR_ERR(pl[scnt]));
+                       goto err2;
                }
-               start++;
-               if (start > 122)
-                       start = 65;
-               cb->start_buf[cb->size - 1] = 0;
+               DEBUG_LOG(cb, "%s pl[%u] %p\n", __func__, scnt, pl[scnt]);
 
-               krping_format_send(cb, cb->start_dma_addr);
-               if (cb->state == ERROR) {
-                       PRINTF(cb, "krping_format_send failed\n");
-                       break;
-               }
-               ret = ib_post_send(cb->qp, &cb->sq_wr, &bad_wr);
-               if (ret) {
-                       PRINTF(cb, "post send error %d\n", ret);
-                       break;
+               mr[scnt] = ib_alloc_fast_reg_mr(cb->pd, plen);
+               if (IS_ERR(mr[scnt])) {
+                       PRINTF(cb, "alloc_fr failed %ld\n",
+                              PTR_ERR(mr[scnt]));
+                       goto err2;
                }
+               DEBUG_LOG(cb, "%s mr[%u] %p\n", __func__, scnt, mr[scnt]);
+               ib_update_fast_reg_key(mr[scnt], ++key);
 
-               /* Wait for server to ACK */
-               wait_event_interruptible(cb->sem, cb->state >= RDMA_WRITE_ADV);
-               if (cb->state != RDMA_WRITE_ADV) {
-                       PRINTF(cb, 
-                              "wait for RDMA_WRITE_ADV state %d\n",
-                              cb->state);
-                       break;
+               buf[scnt] = kmalloc(cb->size, GFP_KERNEL);
+               if (!buf[scnt]) {
+                       PRINTF(cb, "kmalloc failed\n");
+                       ret = -ENOMEM;
+                       goto err2;
                }
-
-               krping_format_send(cb, cb->rdma_dma_addr);
-               ret = ib_post_send(cb->qp, &cb->sq_wr, &bad_wr);
+               DEBUG_LOG(cb, "%s buf[%u] %p\n", __func__, scnt, buf[scnt]);
+               dma_addr[scnt] = dma_map_single(cb->pd->device->dma_device,
+                                                  buf[scnt], cb->size,
+                                                  DMA_BIDIRECTIONAL);
+               if (dma_mapping_error(cb->pd->device->dma_device,
+                   dma_addr[scnt])) {
+                       PRINTF(cb, "dma_map failed\n");
+                       ret = -ENOMEM;
+                       goto err2;
+               }
+               DEBUG_LOG(cb, "%s dma_addr[%u] %p\n", __func__, scnt, (void 
*)dma_addr[scnt]);
+               for (i=0; i<plen; i++) {
+                       pl[scnt]->page_list[i] = ((unsigned long)dma_addr[scnt] 
& PAGE_MASK) + (i * PAGE_SIZE);
+                       DEBUG_LOG(cb, "%s pl[%u]->page_list[%u] 0x%llx\n",
+                                 __func__, scnt, i,  pl[scnt]->page_list[i]);
+               }
+
+               sgl[scnt].lkey = mr[scnt]->rkey;
+               sgl[scnt].length = cb->size;
+               sgl[scnt].addr = (u64)buf[scnt];
+               DEBUG_LOG(cb, "%s sgl[%u].lkey 0x%x length %u addr 0x%llx\n",
+                         __func__, scnt,  sgl[scnt].lkey, sgl[scnt].length,
+                         sgl[scnt].addr);
+
+               fr[scnt].opcode = IB_WR_FAST_REG_MR;
+               fr[scnt].wr_id = scnt;
+               fr[scnt].send_flags = 0;
+               fr[scnt].wr.fast_reg.page_shift = PAGE_SHIFT;
+               fr[scnt].wr.fast_reg.length = cb->size;
+               fr[scnt].wr.fast_reg.page_list = pl[scnt];
+               fr[scnt].wr.fast_reg.page_list_len = plen;
+               fr[scnt].wr.fast_reg.iova_start = (u64)buf[scnt];
+               fr[scnt].wr.fast_reg.access_flags = IB_ACCESS_REMOTE_WRITE | 
IB_ACCESS_LOCAL_WRITE;
+               fr[scnt].wr.fast_reg.rkey = mr[scnt]->rkey;
+               fr[scnt].next = &read[scnt];
+               read[scnt].opcode = IB_WR_RDMA_READ_WITH_INV;
+               read[scnt].wr_id = scnt;
+               read[scnt].send_flags = IB_SEND_SIGNALED;
+               read[scnt].wr.rdma.rkey = cb->remote_rkey;
+               read[scnt].wr.rdma.remote_addr = cb->remote_addr;
+               read[scnt].num_sge = 1;
+               read[scnt].sg_list = &sgl[scnt];
+               ret = ib_post_send(cb->qp, &fr[scnt], &bad);
                if (ret) {
-                       PRINTF(cb, "post send error %d\n", ret);
-                       break;
+                       PRINTF(cb, "ib_post_send failed %d\n", ret);
+                       goto err2;
                }
+       }
 
-               /* Wait for the server to say the RDMA Write is complete. */
-               wait_event_interruptible(cb->sem, 
-                                        cb->state >= RDMA_WRITE_COMPLETE);
-               if (cb->state != RDMA_WRITE_COMPLETE) {
-                       PRINTF(cb, 
-                              "wait for RDMA_WRITE_COMPLETE state %d\n",
-                              cb->state);
+       start = time_uptime;
+       DEBUG_LOG(cb, "%s starting IO.\n", __func__);
+       while (!cb->count || cb->server || count < cb->count) {
+               if ((time_uptime - start) >= 9) {
+                       DEBUG_LOG(cb, "%s pausing 1 tick! count %u\n", __func__,
+                                 count);
+                       wait_event_interruptible_timeout(cb->sem,
+                                                        cb->state == ERROR,
+                                                        1);
+                       if (cb->state == ERROR)
+                               break;
+                       start = time_uptime;
+               }
+               do {
+                       ret = ib_poll_cq(cb->cq, 1, &wc);
+                       if (ret < 0) {
+                               PRINTF(cb, "ib_poll_cq failed %d\n",
+                                      ret);
+                               goto err2;
+                       }
+                       if (ret == 1) {
+                               if (wc.status) {
+                                       PRINTF(cb,
+                                              "completion error %u wr_id %lld "
+                                              "opcode %d\n", wc.status,
+                                              wc.wr_id, wc.opcode);
+                                       goto err2;
+                               }
+                               count++;
+                               if (count == cb->count)
+                                       break;
+                               ib_update_fast_reg_key(mr[wc.wr_id], ++key);
+                               fr[wc.wr_id].wr.fast_reg.rkey =
+                                       mr[wc.wr_id]->rkey;
+                               sgl[wc.wr_id].lkey = mr[wc.wr_id]->rkey;
+                               ret = ib_post_send(cb->qp, &fr[wc.wr_id], &bad);
+                               if (ret) {
+                                       PRINTF(cb,
+                                              "ib_post_send failed %d\n", ret);
+                                       goto err2;
+                               }
+                       } else if (krping_sigpending()) {
+                               PRINTF(cb, "signal!\n");
+                               goto err2;
+                       }
+               } while (ret == 1);
+       }
+       DEBUG_LOG(cb, "%s done!\n", __func__);
+err2:
+       DEBUG_LOG(cb, "sleeping 1 second\n");
+       wait_event_interruptible_timeout(cb->sem, cb->state == ERROR, HZ);
+       DEBUG_LOG(cb, "draining the cq...\n");
+       do {
+               ret = ib_poll_cq(cb->cq, 1, &wc);
+               if (ret < 0) {
+                       PRINTF(cb, "ib_poll_cq failed %d\n", ret);
                        break;
                }
-
-               if (cb->validate)
-                       if (memcmp(cb->start_buf, cb->rdma_buf, cb->size)) {
-                               PRINTF(cb, "data mismatch!\n");
-                               break;
+               if (ret == 1) {
+                       if (wc.status) {
+                               PRINTF(cb, "completion error %u "
+                                      "opcode %u\n", wc.status, wc.opcode);
                        }
+               }
+       } while (ret == 1);
 
-               if (cb->verbose) {
-                       if (strlen(cb->rdma_buf) > 128) {
-                               char msgbuf[128];
-
-                               strlcpy(msgbuf, cb->rdma_buf, sizeof(msgbuf));
-                               PRINTF(cb, "ping data stripped: %s\n",
-                                      msgbuf);
-                       } else
-                               PRINTF(cb, "ping data: %s\n", cb->rdma_buf);
+       DEBUG_LOG(cb, "destroying fr mrs!\n");
+       for (scnt = 0; scnt < depth; scnt++) {
+               if (mr[scnt]) {
+                       ib_dereg_mr(mr[scnt]);
+                       DEBUG_LOG(cb, "%s dereg mr %p\n", __func__, mr[scnt]);
+               }
+       }
+       DEBUG_LOG(cb, "unmapping/freeing bufs!\n");
+       for (scnt = 0; scnt < depth; scnt++) {
+               if (buf[scnt]) {
+                       dma_unmap_single(cb->pd->device->dma_device,
+                                        dma_addr[scnt], cb->size,
+                                        DMA_BIDIRECTIONAL);
+                       kfree(buf[scnt]);
+                       DEBUG_LOG(cb, "%s unmap/free buf %p dma_addr %p\n", 
__func__, buf[scnt], (void *)dma_addr[scnt]);
+               }
+       }
+       DEBUG_LOG(cb, "destroying fr page lists!\n");
+       for (scnt = 0; scnt < depth; scnt++) {
+               if (pl[scnt]) {
+                       DEBUG_LOG(cb, "%s free pl %p\n", __func__, pl[scnt]);
+                       ib_free_fast_reg_page_list(pl[scnt]);
                }
-#ifdef SLOW_KRPING
-               wait_event_interruptible_timeout(cb->sem, cb->state == ERROR, 
HZ);
-#endif
        }
+err1:
+       if (pl)
+               kfree(pl);
+       if (mr)
+               kfree(mr);
+       if (fr)
+               kfree(fr);
+       if (read)
+               kfree(read);
+       if (sgl)
+               kfree(sgl);
+       if (buf)
+               kfree(buf);
+       if (dma_addr)
+               kfree(dma_addr);
+}
+static void krping_fr_test_server(struct krping_cb *cb)
+{
+       DEBUG_LOG(cb, "%s waiting for disconnect...\n", __func__);
+       wait_event_interruptible(cb->sem, cb->state == ERROR);
 }
 
-static void krping_rlat_test_client(struct krping_cb *cb)
+static void krping_fr_test5_server(struct krping_cb *cb)
 {
        struct ib_send_wr *bad_wr;
        struct ib_wc wc;
        int ret;
 
-       cb->state = RDMA_READ_ADV;
+       /* Spin waiting for client's Start STAG/TO/Len */
+       while (cb->state < RDMA_READ_ADV) {
+               krping_cq_event_handler(cb->cq, cb);
+       }
+       DEBUG_LOG(cb, "%s client STAG %x TO 0x%llx\n", __func__,
+                 cb->remote_rkey, cb->remote_addr);
 
        /* Send STAG/TO/Len to client */
        krping_format_send(cb, cb->start_dma_addr);
-       if (cb->state == ERROR) {
-               PRINTF(cb, "krping_format_send failed\n");
-               return;
-       }
        ret = ib_post_send(cb->qp, &cb->sq_wr, &bad_wr);
        if (ret) {
                PRINTF(cb, "post send error %d\n", ret);
@@ -1775,84 +1895,31 @@ static void krping_rlat_test_client(stru
                return;
        }
        if (wc.status) {
-               PRINTF(cb, "send completion error %d\n", wc.status);
+               PRINTF(cb, "send completiong error %d\n", wc.status);
                return;
        }
 
-       /* Spin waiting for server's Start STAG/TO/Len */
-       while (cb->state < RDMA_WRITE_ADV) {
-               krping_cq_event_handler(cb->cq, cb);
-       }
-
-#if 0
-{
-       int i;
-       struct timeval start, stop;
-       time_t sec;
-       suseconds_t usec;
-       unsigned long long elapsed;
-       struct ib_wc wc;
-       struct ib_send_wr *bad_wr;
-       int ne;
-       
-       cb->rdma_sq_wr.opcode = IB_WR_RDMA_WRITE;
-       cb->rdma_sq_wr.wr.rdma.rkey = cb->remote_rkey;
-       cb->rdma_sq_wr.wr.rdma.remote_addr = cb->remote_addr;
-       cb->rdma_sq_wr.sg_list->length = 0;
-       cb->rdma_sq_wr.num_sge = 0;
-
-       microtime(&start);
-       for (i=0; i < 100000; i++) {
-               if (ib_post_send(cb->qp, &cb->rdma_sq_wr, &bad_wr)) {
-                       PRINTF(cb, "Couldn't post send\n");
-                       return;
-               }
-               do {
-                       ne = ib_poll_cq(cb->cq, 1, &wc);
-               } while (ne == 0);
-               if (ne < 0) {
-                       PRINTF(cb, "poll CQ failed %d\n", ne);
-                       return;
-               }
-               if (wc.status != IB_WC_SUCCESS) {
-                       PRINTF(cb, "Completion wth error at %s:\n",
-                               cb->server ? "server" : "client");
-                       PRINTF(cb, "Failed status %d: wr_id %d\n",
-                               wc.status, (int) wc.wr_id);
-                       return;
-               }
-       }
-       microtime(&stop);
-       
-       if (stop.tv_usec < start.tv_usec) {
-               stop.tv_usec += 1000000;
-               stop.tv_sec  -= 1;
-       }
-       sec     = stop.tv_sec - start.tv_sec;
-       usec    = stop.tv_usec - start.tv_usec;
-       elapsed = sec * 1000000 + usec;
-       PRINTF(cb, "0B-write-lat iters 100000 usec %llu\n", elapsed);
-}
-#endif
-
-       rlat_test(cb);
+       if (cb->duplex)
+               krping_fr_test5(cb);
+       DEBUG_LOG(cb, "%s waiting for disconnect...\n", __func__);
+       wait_event_interruptible(cb->sem, cb->state == ERROR);
 }
 
-static void krping_wlat_test_client(struct krping_cb *cb)
+static void krping_fr_test5_client(struct krping_cb *cb)
 {
-       struct ib_send_wr *bad_wr;
+       struct ib_send_wr *bad;
        struct ib_wc wc;
        int ret;
 
        cb->state = RDMA_READ_ADV;
 
-       /* Send STAG/TO/Len to client */
+       /* Send STAG/TO/Len to server */
        krping_format_send(cb, cb->start_dma_addr);
        if (cb->state == ERROR) {
                PRINTF(cb, "krping_format_send failed\n");
                return;
        }
-       ret = ib_post_send(cb->qp, &cb->sq_wr, &bad_wr);
+       ret = ib_post_send(cb->qp, &cb->sq_wr, &bad);
        if (ret) {
                PRINTF(cb, "post send error %d\n", ret);
                return;
@@ -1873,15 +1940,619 @@ static void krping_wlat_test_client(stru
        while (cb->state < RDMA_WRITE_ADV) {
                krping_cq_event_handler(cb->cq, cb);
        }
+       DEBUG_LOG(cb, "%s server STAG %x TO 0x%llx\n", __func__, 
cb->remote_rkey, cb->remote_addr);
 
-       wlat_test(cb);
+       return krping_fr_test5(cb);
 }
 
-static void krping_bw_test_client(struct krping_cb *cb)
+/*
+ * sq-depth worth of write + fastreg + inv, reposting them as the invs
+ * complete.
+ * NOTE: every 9 seconds we sleep for 1 second to keep the kernel happy.
+ * If a count is given, then the last IO will have a bogus lkey in the
+ * write work request.  This reproduces a fw bug where the connection
+ * will get stuck if a fastreg is processed while the ulptx is failing
+ * the bad write.
+ */
+static void krping_fr_test6(struct krping_cb *cb)
 {
-       struct ib_send_wr *bad_wr;
+       struct ib_fast_reg_page_list **pl;
+       struct ib_send_wr *fr, *write, *inv, *bad;
        struct ib_wc wc;
-       int ret;
+       struct ib_sge *sgl;
+       u8 key = 0;
+       struct ib_mr **mr;
+       u8 **buf;
+       dma_addr_t *dma_addr;
+       int i;
+       int ret;
+       int plen = (((cb->size - 1) & PAGE_MASK) + PAGE_SIZE) >> PAGE_SHIFT;
+       unsigned long start;
+       int count = 0;
+       int scnt;
+       int depth = cb->txdepth  / 3;
+
+       if (!depth) {
+               PRINTF(cb, "txdepth must be > 3 for this test!\n");
+               return;
+       }
+
+       pl = kzalloc(sizeof *pl * depth, GFP_KERNEL);
+       DEBUG_LOG(cb, "%s pl %p size %lu\n", __func__, pl, sizeof *pl * depth);
+
+       mr = kzalloc(sizeof *mr * depth, GFP_KERNEL);
+       DEBUG_LOG(cb, "%s mr %p size %lu\n", __func__, mr, sizeof *mr * depth);
+
+       fr = kzalloc(sizeof *fr * depth, GFP_KERNEL);
+       DEBUG_LOG(cb, "%s fr %p size %lu\n", __func__, fr, sizeof *fr * depth);
+
+       sgl = kzalloc(sizeof *sgl * depth, GFP_KERNEL);
+       DEBUG_LOG(cb, "%s sgl %p size %lu\n", __func__, sgl, sizeof *sgl * 
depth);
+
+       write = kzalloc(sizeof *write * depth, GFP_KERNEL);
+       DEBUG_LOG(cb, "%s read %p size %lu\n", __func__, write, sizeof *write * 
depth);
+
+       inv = kzalloc(sizeof *inv * depth, GFP_KERNEL);
+       DEBUG_LOG(cb, "%s inv %p size %lu\n", __func__, inv, sizeof *inv * 
depth);
+
+       buf = kzalloc(sizeof *buf * depth, GFP_KERNEL);
+       DEBUG_LOG(cb, "%s buf %p size %lu\n", __func__, buf, sizeof *buf * 
depth);
+
+       dma_addr = kzalloc(sizeof *dma_addr * depth, GFP_KERNEL);
+       DEBUG_LOG(cb, "%s dma_addr %p size %lu\n", __func__, dma_addr, sizeof 
*dma_addr * depth);
+
+       if (!pl || !mr || !fr || !write || !sgl || !buf || !dma_addr) {
+               PRINTF(cb, "kzalloc failed\n");
+               goto err1;
+       }
+
+       for (scnt = 0; scnt < depth; scnt++) {
+               pl[scnt] = ib_alloc_fast_reg_page_list(cb->qp->device, plen);
+               if (IS_ERR(pl[scnt])) {
+                       PRINTF(cb, "alloc_fr_page_list failed %ld\n",
+                              PTR_ERR(pl[scnt]));
+                       goto err2;
+               }
+               DEBUG_LOG(cb, "%s pl[%u] %p\n", __func__, scnt, pl[scnt]);
+
+               mr[scnt] = ib_alloc_fast_reg_mr(cb->pd, plen);
+               if (IS_ERR(mr[scnt])) {
+                       PRINTF(cb, "alloc_fr failed %ld\n",
+                              PTR_ERR(mr[scnt]));
+                       goto err2;
+               }
+               DEBUG_LOG(cb, "%s mr[%u] %p\n", __func__, scnt, mr[scnt]);
+               ib_update_fast_reg_key(mr[scnt], ++key);
+
+               buf[scnt] = kmalloc(cb->size, GFP_KERNEL);
+               if (!buf[scnt]) {
+                       PRINTF(cb, "kmalloc failed\n");
+                       ret = -ENOMEM;
+                       goto err2;
+               }
+               DEBUG_LOG(cb, "%s buf[%u] %p\n", __func__, scnt, buf[scnt]);
+               dma_addr[scnt] = dma_map_single(cb->pd->device->dma_device,
+                                                  buf[scnt], cb->size,
+                                                  DMA_BIDIRECTIONAL);
+               if (dma_mapping_error(cb->pd->device->dma_device,
+                   dma_addr[scnt])) {
+                       PRINTF(cb, "dma_map failed\n");
+                       ret = -ENOMEM;
+                       goto err2;
+               }
+               DEBUG_LOG(cb, "%s dma_addr[%u] %p\n", __func__, scnt, (void 
*)dma_addr[scnt]);
+               for (i=0; i<plen; i++) {
+                       pl[scnt]->page_list[i] = ((unsigned long)dma_addr[scnt] 
& PAGE_MASK) + (i * PAGE_SIZE);
+                       DEBUG_LOG(cb, "%s pl[%u]->page_list[%u] 0x%llx\n",
+                                 __func__, scnt, i,  pl[scnt]->page_list[i]);
+               }
+
+               write[scnt].opcode = IB_WR_RDMA_WRITE;
+               write[scnt].wr_id = scnt;
+               write[scnt].wr.rdma.rkey = cb->remote_rkey;
+               write[scnt].wr.rdma.remote_addr = cb->remote_addr;
+               write[scnt].num_sge = 1;
+               write[scnt].sg_list = &cb->rdma_sgl;
+               write[scnt].sg_list->length = cb->size;
+               write[scnt].next = &fr[scnt];
+
+               fr[scnt].opcode = IB_WR_FAST_REG_MR;
+               fr[scnt].wr_id = scnt;
+               fr[scnt].wr.fast_reg.page_shift = PAGE_SHIFT;
+               fr[scnt].wr.fast_reg.length = cb->size;
+               fr[scnt].wr.fast_reg.page_list = pl[scnt];
+               fr[scnt].wr.fast_reg.page_list_len = plen;
+               fr[scnt].wr.fast_reg.iova_start = (u64)buf[scnt];
+               fr[scnt].wr.fast_reg.access_flags = IB_ACCESS_REMOTE_WRITE | 
IB_ACCESS_LOCAL_WRITE;
+               fr[scnt].wr.fast_reg.rkey = mr[scnt]->rkey;
+               fr[scnt].next = &inv[scnt];
+
+               inv[scnt].opcode = IB_WR_LOCAL_INV;
+               inv[scnt].send_flags = IB_SEND_SIGNALED;
+               inv[scnt].ex.invalidate_rkey = mr[scnt]->rkey;
+
+               ret = ib_post_send(cb->qp, &write[scnt], &bad);
+               if (ret) {
+                       PRINTF(cb, "ib_post_send failed %d\n", ret);
+                       goto err2;
+               }
+       }
+
+       start = time_uptime;
+       DEBUG_LOG(cb, "%s starting IO.\n", __func__);
+       while (!cb->count || cb->server || count < cb->count) {
+               if ((time_uptime - start) >= 9) {
+                       DEBUG_LOG(cb, "%s pausing 1 tick! count %u\n", __func__,
+                                 count);
+                       wait_event_interruptible_timeout(cb->sem,
+                                                        cb->state == ERROR,
+                                                        1);
+                       if (cb->state == ERROR)
+                               break;
+                       start = time_uptime;
+               }
+               do {
+                       ret = ib_poll_cq(cb->cq, 1, &wc);
+                       if (ret < 0) {
+                               PRINTF(cb, "ib_poll_cq failed %d\n",
+                                      ret);
+                               goto err2;
+                       }
+                       if (ret == 1) {
+                               if (wc.status) {
+                                       PRINTF(cb,
+                                              "completion error %u wr_id %lld "
+                                              "opcode %d\n", wc.status,
+                                              wc.wr_id, wc.opcode);
+                                       goto err2;
+                               }
+                               count++;
+                               if (count == (cb->count -1))
+                                       cb->rdma_sgl.lkey = 0x00dead;
+                               if (count == cb->count)
+                                       break;
+                               ib_update_fast_reg_key(mr[wc.wr_id], ++key);
+                               fr[wc.wr_id].wr.fast_reg.rkey =
+                                       mr[wc.wr_id]->rkey;
+                               inv[wc.wr_id].ex.invalidate_rkey =
+                                       mr[wc.wr_id]->rkey;
+                               ret = ib_post_send(cb->qp, &write[wc.wr_id], 
&bad);
+                               if (ret) {
+                                       PRINTF(cb,
+                                              "ib_post_send failed %d\n", ret);
+                                       goto err2;
+                               }
+                       } else if (krping_sigpending()){
+                               PRINTF(cb, "signal!\n");
+                               goto err2;
+                       }
+               } while (ret == 1);
+       }
+       DEBUG_LOG(cb, "%s done!\n", __func__);
+err2:
+       DEBUG_LOG(cb, "sleeping 1 second\n");
+       wait_event_interruptible_timeout(cb->sem, cb->state == ERROR, HZ);
+       DEBUG_LOG(cb, "draining the cq...\n");
+       do {
+               ret = ib_poll_cq(cb->cq, 1, &wc);
+               if (ret < 0) {
+                       PRINTF(cb, "ib_poll_cq failed %d\n", ret);
+                       break;
+               }
+               if (ret == 1) {
+                       if (wc.status) {
+                               PRINTF(cb, "completion error %u "
+                                      "opcode %u\n", wc.status, wc.opcode);
+                       }
+               }
+       } while (ret == 1);
+
+       DEBUG_LOG(cb, "destroying fr mrs!\n");
+       for (scnt = 0; scnt < depth; scnt++) {
+               if (mr[scnt]) {
+                       ib_dereg_mr(mr[scnt]);
+                       DEBUG_LOG(cb, "%s dereg mr %p\n", __func__, mr[scnt]);
+               }
+       }
+       DEBUG_LOG(cb, "unmapping/freeing bufs!\n");
+       for (scnt = 0; scnt < depth; scnt++) {
+               if (buf[scnt]) {
+                       dma_unmap_single(cb->pd->device->dma_device,
+                                        dma_addr[scnt], cb->size,
+                                        DMA_BIDIRECTIONAL);
+                       kfree(buf[scnt]);
+                       DEBUG_LOG(cb, "%s unmap/free buf %p dma_addr %p\n", 
__func__, buf[scnt], (void *)dma_addr[scnt]);
+               }
+       }
+       DEBUG_LOG(cb, "destroying fr page lists!\n");
+       for (scnt = 0; scnt < depth; scnt++) {
+               if (pl[scnt]) {
+                       DEBUG_LOG(cb, "%s free pl %p\n", __func__, pl[scnt]);
+                       ib_free_fast_reg_page_list(pl[scnt]);
+               }
+       }
+err1:
+       if (pl)
+               kfree(pl);
+       if (mr)
+               kfree(mr);
+       if (fr)
+               kfree(fr);
+       if (write)
+               kfree(write);
+       if (inv)
+               kfree(inv);
+       if (sgl)
+               kfree(sgl);
+       if (buf)
+               kfree(buf);
+       if (dma_addr)
+               kfree(dma_addr);
+}
+
+static void krping_fr_test6_server(struct krping_cb *cb)
+{
+       struct ib_send_wr *bad_wr;
+       struct ib_wc wc;
+       int ret;
+
+       /* Spin waiting for client's Start STAG/TO/Len */
+       while (cb->state < RDMA_READ_ADV) {
+               krping_cq_event_handler(cb->cq, cb);
+       }
+       DEBUG_LOG(cb, "%s client STAG %x TO 0x%llx\n", __func__,
+                 cb->remote_rkey, cb->remote_addr);
+
+       /* Send STAG/TO/Len to client */
+       krping_format_send(cb, cb->start_dma_addr);
+       ret = ib_post_send(cb->qp, &cb->sq_wr, &bad_wr);
+       if (ret) {
+               PRINTF(cb, "post send error %d\n", ret);
+               return;
+       }
+
+       /* Spin waiting for send completion */
+       while ((ret = ib_poll_cq(cb->cq, 1, &wc) == 0));
+       if (ret < 0) {
+               PRINTF(cb, "poll error %d\n", ret);
+               return;
+       }
+       if (wc.status) {
+               PRINTF(cb, "send completiong error %d\n", wc.status);
+               return;
+       }
+
+       if (cb->duplex)
+               krping_fr_test6(cb);
+       DEBUG_LOG(cb, "%s waiting for disconnect...\n", __func__);
+       wait_event_interruptible(cb->sem, cb->state == ERROR);
+}
+
+static void krping_fr_test6_client(struct krping_cb *cb)
+{
+       struct ib_send_wr *bad;
+       struct ib_wc wc;
+       int ret;
+
+       cb->state = RDMA_READ_ADV;
+
+       /* Send STAG/TO/Len to server */
+       krping_format_send(cb, cb->start_dma_addr);
+       if (cb->state == ERROR) {
+               PRINTF(cb, "krping_format_send failed\n");
+               return;
+       }
+       ret = ib_post_send(cb->qp, &cb->sq_wr, &bad);
+       if (ret) {
+               PRINTF(cb, "post send error %d\n", ret);
+               return;
+       }
+
+       /* Spin waiting for send completion */
+       while ((ret = ib_poll_cq(cb->cq, 1, &wc) == 0));
+       if (ret < 0) {
+               PRINTF(cb, "poll error %d\n", ret);
+               return;
+       }
+       if (wc.status) {
+               PRINTF(cb, "send completion error %d\n", wc.status);
+               return;
+       }
+
+       /* Spin waiting for server's Start STAG/TO/Len */
+       while (cb->state < RDMA_WRITE_ADV) {
+               krping_cq_event_handler(cb->cq, cb);
+       }
+       DEBUG_LOG(cb, "%s server STAG %x TO 0x%llx\n", __func__, 
cb->remote_rkey, cb->remote_addr);
+
+       return krping_fr_test6(cb);
+}
+
+static void krping_run_server(struct krping_cb *cb)
+{
+       struct ib_recv_wr *bad_wr;
+       int ret;
+
+       ret = krping_bind_server(cb);
+       if (ret)
+               return;
+
+       ret = krping_setup_qp(cb, cb->child_cm_id);
+       if (ret) {
+               PRINTF(cb, "setup_qp failed: %d\n", ret);
+               goto err0;
+       }
+

*** DIFF OUTPUT TRUNCATED AT 1000 LINES ***
_______________________________________________
svn-src-all@freebsd.org mailing list
https://lists.freebsd.org/mailman/listinfo/svn-src-all
To unsubscribe, send any mail to "svn-src-all-unsubscr...@freebsd.org"

Reply via email to