From: Nicolas Morey-Chaisemartin <nmoreychaisemar...@suse.com>

commit 2b5715fc17386a6223490d5b8f08d031999b0c0b upstream.

The current code computes a number of channels per SRP target and spreads
them equally across all online NUMA nodes.  Each channel is then assigned
a CPU within this node.

In the case of unbalanced, or even unpopulated nodes, some channels do not
get a CPU associated and thus do not get connected.  This causes the SRP
connection to fail.

This patch solves the issue by rewriting channel computation and
allocation:

- Drop channel to node/CPU association as it had no real effect on
  locality but added unnecessary complexity.

- Tweak the number of channels allocated to reduce CPU contention when
  possible:
  - Up to one channel per CPU (instead of up to 4 by node)
  - At least 4 channels per node, unless ch_count module parameter is
    used.

Link: https://lore.kernel.org/r/9cb4d9d3-30ad-2276-7eff-e85f7ddfb...@suse.com
Signed-off-by: Nicolas Morey-Chaisemartin <nmoreychaisemar...@suse.com>
Reviewed-by: Bart Van Assche <bvanass...@acm.org>
Signed-off-by: Jason Gunthorpe <j...@nvidia.com>
Cc: Yi Zhang <yi.zh...@redhat.com>
Signed-off-by: Greg Kroah-Hartman <gre...@linuxfoundation.org>
---
 drivers/infiniband/ulp/srp/ib_srp.c |  116 ++++++++++++++----------------------
 1 file changed, 48 insertions(+), 68 deletions(-)

--- a/drivers/infiniband/ulp/srp/ib_srp.c
+++ b/drivers/infiniband/ulp/srp/ib_srp.c
@@ -3628,7 +3628,7 @@ static ssize_t srp_create_target(struct
        struct srp_rdma_ch *ch;
        struct srp_device *srp_dev = host->srp_dev;
        struct ib_device *ibdev = srp_dev->dev;
-       int ret, node_idx, node, cpu, i;
+       int ret, i, ch_idx;
        unsigned int max_sectors_per_mr, mr_per_cmd = 0;
        bool multich = false;
        uint32_t max_iu_len;
@@ -3753,81 +3753,61 @@ static ssize_t srp_create_target(struct
                goto out;
 
        ret = -ENOMEM;
-       if (target->ch_count == 0)
+       if (target->ch_count == 0) {
                target->ch_count =
-                       max_t(unsigned int, num_online_nodes(),
-                             min(ch_count ?:
-                                         min(4 * num_online_nodes(),
-                                             ibdev->num_comp_vectors),
-                                 num_online_cpus()));
+                       min(ch_count ?:
+                               max(4 * num_online_nodes(),
+                                   ibdev->num_comp_vectors),
+                               num_online_cpus());
+       }
+
        target->ch = kcalloc(target->ch_count, sizeof(*target->ch),
                             GFP_KERNEL);
        if (!target->ch)
                goto out;
 
-       node_idx = 0;
-       for_each_online_node(node) {
-               const int ch_start = (node_idx * target->ch_count /
-                                     num_online_nodes());
-               const int ch_end = ((node_idx + 1) * target->ch_count /
-                                   num_online_nodes());
-               const int cv_start = node_idx * ibdev->num_comp_vectors /
-                                    num_online_nodes();
-               const int cv_end = (node_idx + 1) * ibdev->num_comp_vectors /
-                                  num_online_nodes();
-               int cpu_idx = 0;
-
-               for_each_online_cpu(cpu) {
-                       if (cpu_to_node(cpu) != node)
-                               continue;
-                       if (ch_start + cpu_idx >= ch_end)
-                               continue;
-                       ch = &target->ch[ch_start + cpu_idx];
-                       ch->target = target;
-                       ch->comp_vector = cv_start == cv_end ? cv_start :
-                               cv_start + cpu_idx % (cv_end - cv_start);
-                       spin_lock_init(&ch->lock);
-                       INIT_LIST_HEAD(&ch->free_tx);
-                       ret = srp_new_cm_id(ch);
-                       if (ret)
-                               goto err_disconnect;
-
-                       ret = srp_create_ch_ib(ch);
-                       if (ret)
-                               goto err_disconnect;
-
-                       ret = srp_alloc_req_data(ch);
-                       if (ret)
-                               goto err_disconnect;
-
-                       ret = srp_connect_ch(ch, max_iu_len, multich);
-                       if (ret) {
-                               char dst[64];
-
-                               if (target->using_rdma_cm)
-                                       snprintf(dst, sizeof(dst), "%pIS",
-                                                &target->rdma_cm.dst);
-                               else
-                                       snprintf(dst, sizeof(dst), "%pI6",
-                                                target->ib_cm.orig_dgid.raw);
-                               shost_printk(KERN_ERR, target->scsi_host,
-                                            PFX "Connection %d/%d to %s 
failed\n",
-                                            ch_start + cpu_idx,
-                                            target->ch_count, dst);
-                               if (node_idx == 0 && cpu_idx == 0) {
-                                       goto free_ch;
-                               } else {
-                                       srp_free_ch_ib(target, ch);
-                                       srp_free_req_data(target, ch);
-                                       target->ch_count = ch - target->ch;
-                                       goto connected;
-                               }
+       for (ch_idx = 0; ch_idx < target->ch_count; ++ch_idx) {
+               ch = &target->ch[ch_idx];
+               ch->target = target;
+               ch->comp_vector = ch_idx % ibdev->num_comp_vectors;
+               spin_lock_init(&ch->lock);
+               INIT_LIST_HEAD(&ch->free_tx);
+               ret = srp_new_cm_id(ch);
+               if (ret)
+                       goto err_disconnect;
+
+               ret = srp_create_ch_ib(ch);
+               if (ret)
+                       goto err_disconnect;
+
+               ret = srp_alloc_req_data(ch);
+               if (ret)
+                       goto err_disconnect;
+
+               ret = srp_connect_ch(ch, max_iu_len, multich);
+               if (ret) {
+                       char dst[64];
+
+                       if (target->using_rdma_cm)
+                               snprintf(dst, sizeof(dst), "%pIS",
+                                       &target->rdma_cm.dst);
+                       else
+                               snprintf(dst, sizeof(dst), "%pI6",
+                                       target->ib_cm.orig_dgid.raw);
+                       shost_printk(KERN_ERR, target->scsi_host,
+                               PFX "Connection %d/%d to %s failed\n",
+                               ch_idx,
+                               target->ch_count, dst);
+                       if (ch_idx == 0) {
+                               goto free_ch;
+                       } else {
+                               srp_free_ch_ib(target, ch);
+                               srp_free_req_data(target, ch);
+                               target->ch_count = ch - target->ch;
+                               goto connected;
                        }
-
-                       multich = true;
-                       cpu_idx++;
                }
-               node_idx++;
+               multich = true;
        }
 
 connected:


Reply via email to