Author: hselasky
Date: Tue Dec  1 13:03:09 2020
New Revision: 368223
URL: https://svnweb.freebsd.org/changeset/base/368223

Log:
  MFC r357294:
  Widen EPOCH(9) usage in mlx5en(4).
  
  Make completion event path mostly lockless using EPOCH(9).
  
  Implement a mechanism using EPOCH(9) which allows us to make
  the callback path for completion events mostly lockless.
  
  Simplify draining callback events using epoch_wait().
  
  While at it make sure all receive completion callbacks are
  covered by the network EPOCH(9), because this is required
  when calling if_input() and ether_input() after r357012.
  
  Sponsored by: Mellanox Technologies

Modified:
  stable/12/sys/dev/mlx5/cq.h
  stable/12/sys/dev/mlx5/driver.h
  stable/12/sys/dev/mlx5/mlx5_core/mlx5_cq.c
  stable/12/sys/dev/mlx5/mlx5_en/mlx5_en_main.c
Directory Properties:
  stable/12/   (props changed)

Modified: stable/12/sys/dev/mlx5/cq.h
==============================================================================
--- stable/12/sys/dev/mlx5/cq.h Tue Dec  1 12:52:15 2020        (r368222)
+++ stable/12/sys/dev/mlx5/cq.h Tue Dec  1 13:03:09 2020        (r368223)
@@ -38,8 +38,6 @@ struct mlx5_core_cq {
        int                     cqe_sz;
        __be32                 *set_ci_db;
        __be32                 *arm_db;
-       atomic_t                refcount;
-       struct completion       free;
        unsigned                vector;
        int                     irqn;
        void (*comp)            (struct mlx5_core_cq *);

Modified: stable/12/sys/dev/mlx5/driver.h
==============================================================================
--- stable/12/sys/dev/mlx5/driver.h     Tue Dec  1 12:52:15 2020        
(r368222)
+++ stable/12/sys/dev/mlx5/driver.h     Tue Dec  1 13:03:09 2020        
(r368223)
@@ -515,21 +515,17 @@ struct mlx5_core_health {
        struct workqueue_struct        *wq_cmd;
 };
 
-#ifdef RATELIMIT
-#define        MLX5_CQ_LINEAR_ARRAY_SIZE       (128 * 1024)
-#else
 #define        MLX5_CQ_LINEAR_ARRAY_SIZE       1024
-#endif
 
 struct mlx5_cq_linear_array_entry {
-       spinlock_t      lock;
        struct mlx5_core_cq * volatile cq;
 };
 
 struct mlx5_cq_table {
        /* protect radix tree
         */
-       spinlock_t              lock;
+       spinlock_t              writerlock;
+       atomic_t                writercount;
        struct radix_tree_root  tree;
        struct mlx5_cq_linear_array_entry 
linear_array[MLX5_CQ_LINEAR_ARRAY_SIZE];
 };

Modified: stable/12/sys/dev/mlx5/mlx5_core/mlx5_cq.c
==============================================================================
--- stable/12/sys/dev/mlx5/mlx5_core/mlx5_cq.c  Tue Dec  1 12:52:15 2020        
(r368222)
+++ stable/12/sys/dev/mlx5/mlx5_core/mlx5_cq.c  Tue Dec  1 13:03:09 2020        
(r368223)
@@ -33,72 +33,92 @@
 #include <dev/mlx5/cq.h>
 #include "mlx5_core.h"
 
+#include <sys/epoch.h>
+#include <net/if_var.h>
+
+static void
+mlx5_cq_table_write_lock(struct mlx5_cq_table *table)
+{
+
+       atomic_inc(&table->writercount);
+       /* make sure all see the updated writercount */
+       NET_EPOCH_WAIT();
+       spin_lock(&table->writerlock);
+}
+
+static void
+mlx5_cq_table_write_unlock(struct mlx5_cq_table *table)
+{
+
+       spin_unlock(&table->writerlock);
+       atomic_dec(&table->writercount);
+       /* drain all pending CQ callers */
+       NET_EPOCH_WAIT();
+}
+
 void mlx5_cq_completion(struct mlx5_core_dev *dev, u32 cqn)
 {
-       struct mlx5_core_cq *cq;
        struct mlx5_cq_table *table = &dev->priv.cq_table;
+       struct mlx5_core_cq *cq;
+       struct epoch_tracker et;
+       bool do_lock;
 
-       if (cqn < MLX5_CQ_LINEAR_ARRAY_SIZE) {
-               struct mlx5_cq_linear_array_entry *entry;
+       NET_EPOCH_ENTER_ET(et);
 
-               entry = &table->linear_array[cqn];
-               spin_lock(&entry->lock);
-               cq = entry->cq;
-               if (cq == NULL) {
-                       mlx5_core_warn(dev,
-                           "Completion event for bogus CQ 0x%x\n", cqn);
-               } else {
-                       ++cq->arm_sn;
-                       cq->comp(cq);
-               }
-               spin_unlock(&entry->lock);
-               return;
-       }
+       do_lock = atomic_read(&table->writercount) != 0;
+       if (unlikely(do_lock))
+               spin_lock(&table->writerlock);
 
-       spin_lock(&table->lock);
-       cq = radix_tree_lookup(&table->tree, cqn);
-       if (likely(cq))
-               atomic_inc(&cq->refcount);
-       spin_unlock(&table->lock);
+       if (likely(cqn < MLX5_CQ_LINEAR_ARRAY_SIZE))
+               cq = table->linear_array[cqn].cq;
+       else
+               cq = radix_tree_lookup(&table->tree, cqn);
 
-       if (!cq) {
-               mlx5_core_warn(dev, "Completion event for bogus CQ 0x%x\n", 
cqn);
-               return;
+       if (unlikely(do_lock))
+               spin_unlock(&table->writerlock);
+
+       if (likely(cq != NULL)) {
+               ++cq->arm_sn;
+               cq->comp(cq);
+       } else {
+               mlx5_core_warn(dev,
+                   "Completion event for bogus CQ 0x%x\n", cqn);
        }
 
-       ++cq->arm_sn;
-
-       cq->comp(cq);
-
-       if (atomic_dec_and_test(&cq->refcount))
-               complete(&cq->free);
+       NET_EPOCH_EXIT_ET(et);
 }
 
 void mlx5_cq_event(struct mlx5_core_dev *dev, u32 cqn, int event_type)
 {
        struct mlx5_cq_table *table = &dev->priv.cq_table;
        struct mlx5_core_cq *cq;
+       struct epoch_tracker et;
+       bool do_lock;
 
-       spin_lock(&table->lock);
+       NET_EPOCH_ENTER_ET(et);
 
-       cq = radix_tree_lookup(&table->tree, cqn);
-       if (cq)
-               atomic_inc(&cq->refcount);
+       do_lock = atomic_read(&table->writercount) != 0;
+       if (unlikely(do_lock))
+               spin_lock(&table->writerlock);
 
-       spin_unlock(&table->lock);
+       if (likely(cqn < MLX5_CQ_LINEAR_ARRAY_SIZE))
+               cq = table->linear_array[cqn].cq;
+       else
+               cq = radix_tree_lookup(&table->tree, cqn);
 
-       if (!cq) {
-               mlx5_core_warn(dev, "Async event for bogus CQ 0x%x\n", cqn);
-               return;
+       if (unlikely(do_lock))
+               spin_unlock(&table->writerlock);
+
+       if (likely(cq != NULL)) {
+               cq->event(cq, event_type);
+       } else {
+               mlx5_core_warn(dev,
+                   "Asynchronous event for bogus CQ 0x%x\n", cqn);
        }
 
-       cq->event(cq, event_type);
-
-       if (atomic_dec_and_test(&cq->refcount))
-               complete(&cq->free);
+       NET_EPOCH_EXIT_ET(et);
 }
 
-
 int mlx5_core_create_cq(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq,
                        u32 *in, int inlen, u32 *out, int outlen)
 {
@@ -116,24 +136,16 @@ int mlx5_core_create_cq(struct mlx5_core_dev *dev, str
        cq->cqn = MLX5_GET(create_cq_out, out, cqn);
        cq->cons_index = 0;
        cq->arm_sn     = 0;
-       atomic_set(&cq->refcount, 1);
-       init_completion(&cq->free);
 
-       spin_lock_irq(&table->lock);
+       mlx5_cq_table_write_lock(table);
        err = radix_tree_insert(&table->tree, cq->cqn, cq);
-       spin_unlock_irq(&table->lock);
+       if (likely(err == 0 && cq->cqn < MLX5_CQ_LINEAR_ARRAY_SIZE))
+               table->linear_array[cq->cqn].cq = cq;
+       mlx5_cq_table_write_unlock(table);
+
        if (err)
                goto err_cmd;
 
-       if (cq->cqn < MLX5_CQ_LINEAR_ARRAY_SIZE) {
-               struct mlx5_cq_linear_array_entry *entry;
-
-               entry = &table->linear_array[cq->cqn];
-               spin_lock_irq(&entry->lock);
-               entry->cq = cq;
-               spin_unlock_irq(&entry->lock);
-       }
-
        cq->pid = curthread->td_proc->p_pid;
 
        return 0;
@@ -152,44 +164,24 @@ int mlx5_core_destroy_cq(struct mlx5_core_dev *dev, st
        u32 out[MLX5_ST_SZ_DW(destroy_cq_out)] = {0};
        u32 in[MLX5_ST_SZ_DW(destroy_cq_in)] = {0};
        struct mlx5_core_cq *tmp;
-       int err;
 
-       if (cq->cqn < MLX5_CQ_LINEAR_ARRAY_SIZE) {
-               struct mlx5_cq_linear_array_entry *entry;
-
-               entry = &table->linear_array[cq->cqn];
-               spin_lock_irq(&entry->lock);
-               entry->cq = NULL;
-               spin_unlock_irq(&entry->lock);
-       }
-
-       spin_lock_irq(&table->lock);
+       mlx5_cq_table_write_lock(table);
+       if (likely(cq->cqn < MLX5_CQ_LINEAR_ARRAY_SIZE))
+               table->linear_array[cq->cqn].cq = NULL;
        tmp = radix_tree_delete(&table->tree, cq->cqn);
-       spin_unlock_irq(&table->lock);
-       if (!tmp) {
+       mlx5_cq_table_write_unlock(table);
+
+       if (unlikely(tmp == NULL)) {
                mlx5_core_warn(dev, "cq 0x%x not found in tree\n", cq->cqn);
                return -EINVAL;
-       }
-       if (tmp != cq) {
-               mlx5_core_warn(dev, "corruption on srqn 0x%x\n", cq->cqn);
+       } else if (unlikely(tmp != cq)) {
+               mlx5_core_warn(dev, "corrupted cqn 0x%x\n", cq->cqn);
                return -EINVAL;
        }
 
        MLX5_SET(destroy_cq_in, in, opcode, MLX5_CMD_OP_DESTROY_CQ);
        MLX5_SET(destroy_cq_in, in, cqn, cq->cqn);
-       err = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
-       if (err)
-               goto out;
-
-       synchronize_irq(cq->irqn);
-
-       if (atomic_dec_and_test(&cq->refcount))
-               complete(&cq->free);
-       wait_for_completion(&cq->free);
-
-out:
-
-       return err;
+       return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
 }
 EXPORT_SYMBOL(mlx5_core_destroy_cq);
 
@@ -259,17 +251,12 @@ int mlx5_core_modify_cq_moderation_mode(struct mlx5_co
 int mlx5_init_cq_table(struct mlx5_core_dev *dev)
 {
        struct mlx5_cq_table *table = &dev->priv.cq_table;
-       int err;
-       int x;
 
        memset(table, 0, sizeof(*table));
-       spin_lock_init(&table->lock);
-       for (x = 0; x != MLX5_CQ_LINEAR_ARRAY_SIZE; x++)
-               spin_lock_init(&table->linear_array[x].lock);
+       spin_lock_init(&table->writerlock);
        INIT_RADIX_TREE(&table->tree, GFP_ATOMIC);
-       err = 0;
 
-       return err;
+       return 0;
 }
 
 void mlx5_cleanup_cq_table(struct mlx5_core_dev *dev)

Modified: stable/12/sys/dev/mlx5/mlx5_en/mlx5_en_main.c
==============================================================================
--- stable/12/sys/dev/mlx5/mlx5_en/mlx5_en_main.c       Tue Dec  1 12:52:15 
2020        (r368222)
+++ stable/12/sys/dev/mlx5/mlx5_en/mlx5_en_main.c       Tue Dec  1 13:03:09 
2020        (r368223)
@@ -2171,6 +2171,7 @@ mlx5e_open_channel(struct mlx5e_priv *priv,
     struct mlx5e_channel_param *cparam,
     struct mlx5e_channel *c)
 {
+       struct epoch_tracker et;
        int i, err;
 
        /* zero non-persistant data */
@@ -2198,7 +2199,9 @@ mlx5e_open_channel(struct mlx5e_priv *priv,
                goto err_close_sqs;
 
        /* poll receive queue initially */
+       NET_EPOCH_ENTER_ET(et);
        c->rq.cq.mcq.comp(&c->rq.cq.mcq);
+       NET_EPOCH_EXIT_ET(et);
 
        return (0);
 
@@ -3740,6 +3743,7 @@ static void
 mlx5e_disable_rx_dma(struct mlx5e_channel *ch)
 {
        struct mlx5e_rq *rq = &ch->rq;
+       struct epoch_tracker et;
        int err;
 
        mtx_lock(&rq->mtx);
@@ -3755,7 +3759,9 @@ mlx5e_disable_rx_dma(struct mlx5e_channel *ch)
 
        while (!mlx5_wq_ll_is_empty(&rq->wq)) {
                msleep(1);
+               NET_EPOCH_ENTER_ET(et);
                rq->cq.mcq.comp(&rq->cq.mcq);
+               NET_EPOCH_EXIT_ET(et);
        }
 
        /*
@@ -3773,6 +3779,7 @@ static void
 mlx5e_enable_rx_dma(struct mlx5e_channel *ch)
 {
        struct mlx5e_rq *rq = &ch->rq;
+       struct epoch_tracker et;
        int err;
 
        rq->wq.wqe_ctr = 0;
@@ -3785,7 +3792,9 @@ mlx5e_enable_rx_dma(struct mlx5e_channel *ch)
 
        rq->enabled = 1;
 
+       NET_EPOCH_ENTER_ET(et);
        rq->cq.mcq.comp(&rq->cq.mcq);
+       NET_EPOCH_EXIT_ET(et);
 }
 
 void
_______________________________________________
svn-src-all@freebsd.org mailing list
https://lists.freebsd.org/mailman/listinfo/svn-src-all
To unsubscribe, send any mail to "svn-src-all-unsubscr...@freebsd.org"

Reply via email to