The branch stable/14 has been updated by kib:

URL: 
https://cgit.FreeBSD.org/src/commit/?id=52164cb3bd38ef23c108149e4a91042da204a2f3

commit 52164cb3bd38ef23c108149e4a91042da204a2f3
Author:     Raed Salem <[email protected]>
AuthorDate: 2023-02-20 16:10:29 +0000
Commit:     Konstantin Belousov <[email protected]>
CommitDate: 2023-11-22 01:40:28 +0000

    mlx5: add fs_counters
    
    (cherry picked from commit 35bbcf0916992d77fe1521962db42b3106a701fb)
---
 sys/dev/mlx5/driver.h                     |  43 ++
 sys/dev/mlx5/fs.h                         |  16 +-
 sys/dev/mlx5/mlx5_core/fs_core.h          |   5 +
 sys/dev/mlx5/mlx5_core/mlx5_fc_cmd.c      | 102 ++++
 sys/dev/mlx5/mlx5_core/mlx5_fc_cmd.h      |  54 +++
 sys/dev/mlx5/mlx5_core/mlx5_fs_counters.c | 758 ++++++++++++++++++++++++++++++
 sys/dev/mlx5/mlx5_core/mlx5_fs_tree.c     |   5 +
 sys/dev/mlx5/mlx5_ifc.h                   |  22 +-
 sys/modules/mlx5/Makefile                 |   2 +
 9 files changed, 1003 insertions(+), 4 deletions(-)

diff --git a/sys/dev/mlx5/driver.h b/sys/dev/mlx5/driver.h
index cb1a2907a443..9daa1235bd9c 100644
--- a/sys/dev/mlx5/driver.h
+++ b/sys/dev/mlx5/driver.h
@@ -50,6 +50,7 @@
 #define MLX5_MAX_NUMBER_OF_VFS 128
 
 #define MLX5_INVALID_QUEUE_HANDLE 0xffffffff
+#define MLX5_ST_SZ_BYTES(typ) (sizeof(struct mlx5_ifc_##typ##_bits) / 8)
 
 enum {
        MLX5_BOARD_ID_LEN = 64,
@@ -269,6 +270,36 @@ struct mlx5_traffic_counter {
        u64         octets;
 };
 
+struct mlx5_fc_pool {
+       struct mlx5_core_dev *dev;
+       struct mutex pool_lock; /* protects pool lists */
+       struct list_head fully_used;
+       struct list_head partially_used;
+       struct list_head unused;
+       int available_fcs;
+       int used_fcs;
+       int threshold;
+};
+
+struct mlx5_fc_stats {
+       spinlock_t counters_idr_lock; /* protects counters_idr */
+       struct idr counters_idr;
+       struct list_head counters;
+       struct llist_head addlist;
+       struct llist_head dellist;
+
+       struct workqueue_struct *wq;
+       struct delayed_work work;
+       unsigned long next_query;
+       unsigned long sampling_interval; /* jiffies */
+       u32 *bulk_query_out;
+       int bulk_query_len;
+       size_t num_counters;
+       bool bulk_query_alloc_failed;
+       unsigned long next_bulk_query_alloc;
+       struct mlx5_fc_pool fc_pool;
+};
+
 enum mlx5_cmd_mode {
        MLX5_CMD_MODE_POLLING,
        MLX5_CMD_MODE_EVENTS
@@ -607,6 +638,7 @@ struct mlx5_priv {
 
        struct mlx5_bfreg_data          bfregs;
        struct mlx5_uars_page          *uar;
+       struct mlx5_fc_stats            fc_stats;
 };
 
 enum mlx5_device_state {
@@ -963,6 +995,17 @@ int mlx5_cmd_exec_cb(struct mlx5_async_ctx *ctx, void *in, 
int in_size,
                     struct mlx5_async_work *work);
 int mlx5_cmd_exec(struct mlx5_core_dev *dev, void *in, int in_size, void *out,
                  int out_size);
+#define mlx5_cmd_exec_inout(dev, ifc_cmd, in, out)                             
\
+       ({                                                                     \
+               mlx5_cmd_exec(dev, in, MLX5_ST_SZ_BYTES(ifc_cmd##_in), out,    \
+                             MLX5_ST_SZ_BYTES(ifc_cmd##_out));                \
+       })
+
+#define mlx5_cmd_exec_in(dev, ifc_cmd, in)                                     
\
+       ({                                                                     \
+               u32 _out[MLX5_ST_SZ_DW(ifc_cmd##_out)] = {};                   \
+               mlx5_cmd_exec_inout(dev, ifc_cmd, in, _out);                   \
+       })
 int mlx5_cmd_exec_polling(struct mlx5_core_dev *dev, void *in, int in_size,
                          void *out, int out_size);
 int mlx5_cmd_alloc_uar(struct mlx5_core_dev *dev, u32 *uarn);
diff --git a/sys/dev/mlx5/fs.h b/sys/dev/mlx5/fs.h
index f62716d806d0..65d38b9ee67a 100644
--- a/sys/dev/mlx5/fs.h
+++ b/sys/dev/mlx5/fs.h
@@ -254,7 +254,6 @@ bool fs_match_exact_mask(
                void *mask1,
                void *mask2);
 /**********end API for sniffer**********/
-
 struct mlx5_modify_hdr *mlx5_modify_header_alloc(struct mlx5_core_dev *dev,
                                                 enum mlx5_flow_namespace_type 
ns_type,
                                                 u8 num_actions,
@@ -275,4 +274,19 @@ struct mlx5_pkt_reformat 
*mlx5_packet_reformat_alloc(struct mlx5_core_dev *dev,
                                                     enum 
mlx5_flow_namespace_type ns_type);
 void mlx5_packet_reformat_dealloc(struct mlx5_core_dev *dev,
                                          struct mlx5_pkt_reformat 
*pkt_reformat);
+/********** Flow counters API **********/
+struct mlx5_fc;
+struct mlx5_fc *mlx5_fc_create(struct mlx5_core_dev *dev, bool aging);
+
+/* As mlx5_fc_create() but doesn't queue stats refresh thread. */
+struct mlx5_fc *mlx5_fc_create_ex(struct mlx5_core_dev *dev, bool aging);
+
+void mlx5_fc_destroy(struct mlx5_core_dev *dev, struct mlx5_fc *counter);
+u64 mlx5_fc_query_lastuse(struct mlx5_fc *counter);
+void mlx5_fc_query_cached(struct mlx5_fc *counter,
+                          u64 *bytes, u64 *packets, u64 *lastuse);
+int mlx5_fc_query(struct mlx5_core_dev *dev, struct mlx5_fc *counter,
+                  u64 *packets, u64 *bytes);
+u32 mlx5_fc_id(struct mlx5_fc *counter);
+/******* End of Flow counters API ******/
 #endif
diff --git a/sys/dev/mlx5/mlx5_core/fs_core.h b/sys/dev/mlx5/mlx5_core/fs_core.h
index a9273fdab61c..05757f493469 100644
--- a/sys/dev/mlx5/mlx5_core/fs_core.h
+++ b/sys/dev/mlx5/mlx5_core/fs_core.h
@@ -323,4 +323,9 @@ int mlx5_cmd_packet_reformat_alloc(struct mlx5_core_dev 
*dev,
                                   struct mlx5_pkt_reformat *pkt_reformat);
 void mlx5_cmd_packet_reformat_dealloc(struct mlx5_core_dev *dev,
                                      struct mlx5_pkt_reformat *pkt_reformat);
+int mlx5_init_fc_stats(struct mlx5_core_dev *dev);
+void mlx5_cleanup_fc_stats(struct mlx5_core_dev *dev);
+void mlx5_fc_queue_stats_work(struct mlx5_core_dev *dev,
+                             struct delayed_work *dwork,
+                             unsigned long delay);
 #endif
diff --git a/sys/dev/mlx5/mlx5_core/mlx5_fc_cmd.c 
b/sys/dev/mlx5/mlx5_core/mlx5_fc_cmd.c
new file mode 100644
index 000000000000..f3410249e67f
--- /dev/null
+++ b/sys/dev/mlx5/mlx5_core/mlx5_fc_cmd.c
@@ -0,0 +1,102 @@
+/*-
+ * Copyright (c) 2022 NVIDIA corporation & affiliates.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS `AS IS' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#include <dev/mlx5/driver.h>
+#include <dev/mlx5/device.h>
+#include <dev/mlx5/mlx5_ifc.h>
+#include <dev/mlx5/mlx5_core/mlx5_fc_cmd.h>
+#include <dev/mlx5/mlx5_core/mlx5_core.h>
+
+int mlx5_cmd_fc_bulk_alloc(struct mlx5_core_dev *dev,
+                          enum mlx5_fc_bulk_alloc_bitmask alloc_bitmask,
+                          u32 *id)
+{
+       u32 out[MLX5_ST_SZ_DW(alloc_flow_counter_out)] = {};
+       u32 in[MLX5_ST_SZ_DW(alloc_flow_counter_in)] = {};
+       int err;
+
+       MLX5_SET(alloc_flow_counter_in, in, opcode,
+                MLX5_CMD_OP_ALLOC_FLOW_COUNTER);
+       MLX5_SET(alloc_flow_counter_in, in, flow_counter_bulk, alloc_bitmask);
+
+       err = mlx5_cmd_exec_inout(dev, alloc_flow_counter, in, out);
+       if (!err)
+               *id = MLX5_GET(alloc_flow_counter_out, out, flow_counter_id);
+       return err;
+}
+
+int mlx5_cmd_fc_alloc(struct mlx5_core_dev *dev, u32 *id)
+{
+       return mlx5_cmd_fc_bulk_alloc(dev, 0, id);
+}
+
+int mlx5_cmd_fc_free(struct mlx5_core_dev *dev, u32 id)
+{
+       u32 in[MLX5_ST_SZ_DW(dealloc_flow_counter_in)] = {};
+
+       MLX5_SET(dealloc_flow_counter_in, in, opcode,
+                MLX5_CMD_OP_DEALLOC_FLOW_COUNTER);
+       MLX5_SET(dealloc_flow_counter_in, in, flow_counter_id, id);
+       return mlx5_cmd_exec_in(dev, dealloc_flow_counter, in);
+}
+
+int mlx5_cmd_fc_query(struct mlx5_core_dev *dev, u32 id,
+                     u64 *packets, u64 *bytes)
+{
+       u32 out[MLX5_ST_SZ_BYTES(query_flow_counter_out) +
+               MLX5_ST_SZ_BYTES(traffic_counter)] = {};
+       u32 in[MLX5_ST_SZ_DW(query_flow_counter_in)] = {};
+       void *stats;
+       int err = 0;
+
+       MLX5_SET(query_flow_counter_in, in, opcode,
+                MLX5_CMD_OP_QUERY_FLOW_COUNTER);
+       MLX5_SET(query_flow_counter_in, in, op_mod, 0);
+       MLX5_SET(query_flow_counter_in, in, flow_counter_id, id);
+       err = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
+       if (err)
+               return err;
+
+       stats = MLX5_ADDR_OF(query_flow_counter_out, out, flow_statistics);
+       *packets = MLX5_GET64(traffic_counter, stats, packets);
+       *bytes = MLX5_GET64(traffic_counter, stats, octets);
+       return 0;
+}
+
+int mlx5_cmd_fc_bulk_query(struct mlx5_core_dev *dev, u32 base_id, int 
bulk_len,
+                          u32 *out)
+{
+       int outlen = mlx5_cmd_fc_get_bulk_query_out_len(bulk_len);
+       u32 in[MLX5_ST_SZ_DW(query_flow_counter_in)] = {};
+
+       MLX5_SET(query_flow_counter_in, in, opcode,
+                MLX5_CMD_OP_QUERY_FLOW_COUNTER);
+       MLX5_SET(query_flow_counter_in, in, flow_counter_id, base_id);
+       MLX5_SET(query_flow_counter_in, in, num_of_counters, bulk_len);
+       return mlx5_cmd_exec(dev, in, sizeof(in), out, outlen);
+}
+
diff --git a/sys/dev/mlx5/mlx5_core/mlx5_fc_cmd.h 
b/sys/dev/mlx5/mlx5_core/mlx5_fc_cmd.h
new file mode 100644
index 000000000000..3adebb3ca94c
--- /dev/null
+++ b/sys/dev/mlx5/mlx5_core/mlx5_fc_cmd.h
@@ -0,0 +1,54 @@
+/*
+ * Copyright (c) 2023, NVIDIA Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef _MLX5_FC_CMD_
+#define _MLX5_FC_CMD_
+
+#include "fs_core.h"
+
+int mlx5_cmd_fc_alloc(struct mlx5_core_dev *dev, u32 *id);
+int mlx5_cmd_fc_bulk_alloc(struct mlx5_core_dev *dev,
+                          enum mlx5_fc_bulk_alloc_bitmask alloc_bitmask,
+                          u32 *id);
+int mlx5_cmd_fc_free(struct mlx5_core_dev *dev, u32 id);
+int mlx5_cmd_fc_query(struct mlx5_core_dev *dev, u32 id,
+                     u64 *packets, u64 *bytes);
+
+int mlx5_cmd_fc_bulk_query(struct mlx5_core_dev *dev, u32 base_id, int 
bulk_len,
+                          u32 *out);
+static inline int mlx5_cmd_fc_get_bulk_query_out_len(int bulk_len)
+{
+       return MLX5_ST_SZ_BYTES(query_flow_counter_out) +
+               MLX5_ST_SZ_BYTES(traffic_counter) * bulk_len;
+}
+
+#endif
diff --git a/sys/dev/mlx5/mlx5_core/mlx5_fs_counters.c 
b/sys/dev/mlx5/mlx5_core/mlx5_fs_counters.c
new file mode 100644
index 000000000000..7214c5256388
--- /dev/null
+++ b/sys/dev/mlx5/mlx5_core/mlx5_fs_counters.c
@@ -0,0 +1,758 @@
+/*-
+ * Copyright (c) 2013-2019, Mellanox Technologies, Ltd.  All rights reserved.
+ * Copyright (c) 2022 NVIDIA corporation & affiliates.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS `AS IS' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#include <dev/mlx5/driver.h>
+#include <dev/mlx5/fs.h>
+#include <linux/rbtree.h>
+#include <dev/mlx5/mlx5_core/mlx5_core.h>
+#include <dev/mlx5/mlx5_core/fs_core.h>
+#include <dev/mlx5/mlx5_core/mlx5_fc_cmd.h>
+
+#define MLX5_FC_STATS_PERIOD msecs_to_jiffies(1000)
+#define MLX5_FC_BULK_QUERY_ALLOC_PERIOD msecs_to_jiffies(180 * 1000)
+/* Max number of counters to query in bulk read is 32K */
+#define MLX5_SW_MAX_COUNTERS_BULK BIT(15)
+#define MLX5_INIT_COUNTERS_BULK 8
+#define MLX5_FC_POOL_MAX_THRESHOLD BIT(18)
+#define MLX5_FC_POOL_USED_BUFF_RATIO 10
+
+struct mlx5_fc_cache {
+       u64 packets;
+       u64 bytes;
+       u64 lastuse;
+};
+
+struct mlx5_fc {
+       struct list_head list;
+       struct llist_node addlist;
+       struct llist_node dellist;
+
+       /* last{packets,bytes} members are used when calculating the delta since
+        * last reading
+        */
+       u64 lastpackets;
+       u64 lastbytes;
+
+       struct mlx5_fc_bulk *bulk;
+       u32 id;
+       bool aging;
+
+       struct mlx5_fc_cache cache ____cacheline_aligned_in_smp;
+};
+
+static void mlx5_fc_pool_init(struct mlx5_fc_pool *fc_pool, struct 
mlx5_core_dev *dev);
+static void mlx5_fc_pool_cleanup(struct mlx5_fc_pool *fc_pool);
+static struct mlx5_fc *mlx5_fc_pool_acquire_counter(struct mlx5_fc_pool 
*fc_pool);
+static void mlx5_fc_pool_release_counter(struct mlx5_fc_pool *fc_pool, struct 
mlx5_fc *fc);
+
+/* locking scheme:
+ *
+ * It is the responsibility of the user to prevent concurrent calls or bad
+ * ordering to mlx5_fc_create(), mlx5_fc_destroy() and accessing a reference
+ * to struct mlx5_fc.
+ * e.g en_tc.c is protected by RTNL lock of its caller, and will never call a
+ * dump (access to struct mlx5_fc) after a counter is destroyed.
+ *
+ * access to counter list:
+ * - create (user context)
+ *   - mlx5_fc_create() only adds to an addlist to be used by
+ *     mlx5_fc_stats_work(). addlist is a lockless single linked list
+ *     that doesn't require any additional synchronization when adding single
+ *     node.
+ *   - spawn thread to do the actual destroy
+ *
+ * - destroy (user context)
+ *   - add a counter to lockless dellist
+ *   - spawn thread to do the actual del
+ *
+ * - dump (user context)
+ *   user should not call dump after destroy
+ *
+ * - query (single thread workqueue context)
+ *   destroy/dump - no conflict (see destroy)
+ *   query/dump - packets and bytes might be inconsistent (since update is not
+ *                atomic)
+ *   query/create - no conflict (see create)
+ *   since every create/destroy spawn the work, only after necessary time has
+ *   elapsed, the thread will actually query the hardware.
+ */
+
+static struct list_head *mlx5_fc_counters_lookup_next(struct mlx5_core_dev 
*dev,
+                                                     u32 id)
+{
+       struct mlx5_fc_stats *fc_stats = &dev->priv.fc_stats;
+       struct mlx5_fc *counter;
+       int next_id = id + 1;
+
+       rcu_read_lock();
+       /* skip counters that are in idr, but not yet in counters list */
+       while ((counter = idr_get_next(&fc_stats->counters_idr, &next_id)) != 
NULL &&
+               list_empty(&counter->list))
+               next_id++;
+       rcu_read_unlock();
+
+       return counter ? &counter->list : &fc_stats->counters;
+}
+
+static void mlx5_fc_stats_insert(struct mlx5_core_dev *dev,
+                                struct mlx5_fc *counter)
+{
+       struct list_head *next = mlx5_fc_counters_lookup_next(dev, counter->id);
+
+       list_add_tail(&counter->list, next);
+}
+
+static void mlx5_fc_stats_remove(struct mlx5_core_dev *dev,
+                                struct mlx5_fc *counter)
+{
+       struct mlx5_fc_stats *fc_stats = &dev->priv.fc_stats;
+
+       list_del(&counter->list);
+
+       spin_lock(&fc_stats->counters_idr_lock);
+       WARN_ON(!idr_remove(&fc_stats->counters_idr, counter->id));
+       spin_unlock(&fc_stats->counters_idr_lock);
+}
+
+static int get_init_bulk_query_len(struct mlx5_core_dev *dev)
+{
+       return min_t(int, MLX5_INIT_COUNTERS_BULK,
+                    (1 << MLX5_CAP_GEN(dev, log_max_flow_counter_bulk)));
+}
+
+static int get_max_bulk_query_len(struct mlx5_core_dev *dev)
+{
+       return min_t(int, MLX5_SW_MAX_COUNTERS_BULK,
+                    (1 << MLX5_CAP_GEN(dev, log_max_flow_counter_bulk)));
+}
+
+static void update_counter_cache(int index, u32 *bulk_raw_data,
+                                struct mlx5_fc_cache *cache)
+{
+       void *stats = MLX5_ADDR_OF(query_flow_counter_out, bulk_raw_data,
+                            flow_statistics[index]);
+       u64 packets = MLX5_GET64(traffic_counter, stats, packets);
+       u64 bytes = MLX5_GET64(traffic_counter, stats, octets);
+
+       if (cache->packets == packets)
+               return;
+
+       cache->packets = packets;
+       cache->bytes = bytes;
+       cache->lastuse = jiffies;
+}
+
+static void mlx5_fc_stats_query_counter_range(struct mlx5_core_dev *dev,
+                                             struct mlx5_fc *first,
+                                             u32 last_id)
+{
+       struct mlx5_fc_stats *fc_stats = &dev->priv.fc_stats;
+       bool query_more_counters = (first->id <= last_id);
+       int cur_bulk_len = fc_stats->bulk_query_len;
+       u32 *data = fc_stats->bulk_query_out;
+       struct mlx5_fc *counter = first;
+       u32 bulk_base_id;
+       int bulk_len;
+       int err;
+
+       while (query_more_counters) {
+               /* first id must be aligned to 4 when using bulk query */
+               bulk_base_id = counter->id & ~0x3;
+
+               /* number of counters to query inc. the last counter */
+               bulk_len = min_t(int, cur_bulk_len,
+                                ALIGN(last_id - bulk_base_id + 1, 4));
+
+               err = mlx5_cmd_fc_bulk_query(dev, bulk_base_id, bulk_len,
+                                            data);
+               if (err) {
+                       mlx5_core_err(dev, "Error doing bulk query: %d\n", err);
+                       return;
+               }
+               query_more_counters = false;
+
+               list_for_each_entry_from(counter, &fc_stats->counters, list) {
+                       int counter_index = counter->id - bulk_base_id;
+                       struct mlx5_fc_cache *cache = &counter->cache;
+
+                       if (counter->id >= bulk_base_id + bulk_len) {
+                               query_more_counters = true;
+                               break;
+                       }
+
+                       update_counter_cache(counter_index, data, cache);
+               }
+       }
+}
+
+static void mlx5_fc_free(struct mlx5_core_dev *dev, struct mlx5_fc *counter)
+{
+       mlx5_cmd_fc_free(dev, counter->id);
+       kfree(counter);
+}
+
+static void mlx5_fc_release(struct mlx5_core_dev *dev, struct mlx5_fc *counter)
+{
+       struct mlx5_fc_stats *fc_stats = &dev->priv.fc_stats;
+
+       if (counter->bulk)
+               mlx5_fc_pool_release_counter(&fc_stats->fc_pool, counter);
+       else
+               mlx5_fc_free(dev, counter);
+}
+
+static void mlx5_fc_stats_bulk_query_size_increase(struct mlx5_core_dev *dev)
+{
+       struct mlx5_fc_stats *fc_stats = &dev->priv.fc_stats;
+       int max_bulk_len = get_max_bulk_query_len(dev);
+       unsigned long now = jiffies;
+       u32 *bulk_query_out_tmp;
+       int max_out_len;
+
+       if (fc_stats->bulk_query_alloc_failed &&
+           time_before(now, fc_stats->next_bulk_query_alloc))
+               return;
+
+       max_out_len = mlx5_cmd_fc_get_bulk_query_out_len(max_bulk_len);
+       bulk_query_out_tmp = kzalloc(max_out_len, GFP_KERNEL);
+       if (!bulk_query_out_tmp) {
+               mlx5_core_warn(dev,
+                              "Can't increase flow counters bulk query buffer 
size, insufficient memory, bulk_size(%d)\n",
+                              max_bulk_len);
+               fc_stats->bulk_query_alloc_failed = true;
+               fc_stats->next_bulk_query_alloc =
+                       now + MLX5_FC_BULK_QUERY_ALLOC_PERIOD;
+               return;
+       }
+
+       kfree(fc_stats->bulk_query_out);
+       fc_stats->bulk_query_out = bulk_query_out_tmp;
+       fc_stats->bulk_query_len = max_bulk_len;
+       if (fc_stats->bulk_query_alloc_failed) {
+               mlx5_core_info(dev,
+                              "Flow counters bulk query buffer size increased, 
bulk_size(%d)\n",
+                              max_bulk_len);
+               fc_stats->bulk_query_alloc_failed = false;
+       }
+}
+
+static void mlx5_fc_stats_work(struct work_struct *work)
+{
+       struct mlx5_core_dev *dev = container_of(work, struct mlx5_core_dev,
+                                                priv.fc_stats.work.work);
+       struct mlx5_fc_stats *fc_stats = &dev->priv.fc_stats;
+       /* Take dellist first to ensure that counters cannot be deleted before
+        * they are inserted.
+        */
+       struct llist_node *dellist = llist_del_all(&fc_stats->dellist);
+       struct llist_node *addlist = llist_del_all(&fc_stats->addlist);
+       struct mlx5_fc *counter = NULL, *last = NULL, *tmp;
+       unsigned long now = jiffies;
+
+       if (addlist || !list_empty(&fc_stats->counters))
+               queue_delayed_work(fc_stats->wq, &fc_stats->work,
+                                  fc_stats->sampling_interval);
+
+       llist_for_each_entry(counter, addlist, addlist) {
+               mlx5_fc_stats_insert(dev, counter);
+               fc_stats->num_counters++;
+       }
+
+       llist_for_each_entry_safe(counter, tmp, dellist, dellist) {
+               mlx5_fc_stats_remove(dev, counter);
+
+               mlx5_fc_release(dev, counter);
+               fc_stats->num_counters--;
+       }
+
+       if (fc_stats->bulk_query_len < get_max_bulk_query_len(dev) &&
+           fc_stats->num_counters > get_init_bulk_query_len(dev))
+               mlx5_fc_stats_bulk_query_size_increase(dev);
+
+       if (time_before(now, fc_stats->next_query) ||
+           list_empty(&fc_stats->counters))
+               return;
+       last = list_last_entry(&fc_stats->counters, struct mlx5_fc, list);
+
+       counter = list_first_entry(&fc_stats->counters, struct mlx5_fc,
+                                  list);
+       if (counter)
+               mlx5_fc_stats_query_counter_range(dev, counter, last->id);
+
+       fc_stats->next_query = now + fc_stats->sampling_interval;
+}
+
+static struct mlx5_fc *mlx5_fc_single_alloc(struct mlx5_core_dev *dev)
+{
+       struct mlx5_fc *counter;
+       int err;
+
+       counter = kzalloc(sizeof(*counter), GFP_KERNEL);
+       if (!counter)
+               return ERR_PTR(-ENOMEM);
+
+       err = mlx5_cmd_fc_alloc(dev, &counter->id);
+       if (err) {
+               kfree(counter);
+               return ERR_PTR(err);
+       }
+
+       return counter;
+}
+
+static struct mlx5_fc *mlx5_fc_acquire(struct mlx5_core_dev *dev, bool aging)
+{
+       struct mlx5_fc_stats *fc_stats = &dev->priv.fc_stats;
+       struct mlx5_fc *counter;
+
+       if (aging && MLX5_CAP_GEN(dev, flow_counter_bulk_alloc) != 0) {
+               counter = mlx5_fc_pool_acquire_counter(&fc_stats->fc_pool);
+               if (!IS_ERR(counter))
+                       return counter;
+       }
+
+       return mlx5_fc_single_alloc(dev);
+}
+
+struct mlx5_fc *mlx5_fc_create_ex(struct mlx5_core_dev *dev, bool aging)
+{
+       struct mlx5_fc *counter = mlx5_fc_acquire(dev, aging);
+       struct mlx5_fc_stats *fc_stats = &dev->priv.fc_stats;
+       int err = 0;
+
+       if (IS_ERR(counter))
+               return counter;
+
+       INIT_LIST_HEAD(&counter->list);
+       counter->aging = aging;
+
+       if (aging) {
+               u32 id = counter->id;
+
+               counter->cache.lastuse = jiffies;
+               counter->lastbytes = counter->cache.bytes;
+               counter->lastpackets = counter->cache.packets;
+
+               idr_preload(GFP_KERNEL);
+               spin_lock(&fc_stats->counters_idr_lock);
+
+               err = idr_alloc(&fc_stats->counters_idr, counter, id, id + 1,
+                               GFP_NOWAIT);
+
+               spin_unlock(&fc_stats->counters_idr_lock);
+               idr_preload_end();
+               if (err < 0 || err != id)
+                       goto err_out_alloc;
+
+               llist_add(&counter->addlist, &fc_stats->addlist);
+       }
+
+       return counter;
+
+err_out_alloc:
+       mlx5_fc_release(dev, counter);
+       return ERR_PTR(err);
+}
+
+struct mlx5_fc *mlx5_fc_create(struct mlx5_core_dev *dev, bool aging)
+{
+       struct mlx5_fc *counter = mlx5_fc_create_ex(dev, aging);
+       struct mlx5_fc_stats *fc_stats = &dev->priv.fc_stats;
+
+       if (aging)
+               mod_delayed_work(fc_stats->wq, &fc_stats->work, 0);
+       return counter;
+}
+EXPORT_SYMBOL(mlx5_fc_create);
+
+u32 mlx5_fc_id(struct mlx5_fc *counter)
+{
+       return counter->id;
+}
+EXPORT_SYMBOL(mlx5_fc_id);
+
+void mlx5_fc_destroy(struct mlx5_core_dev *dev, struct mlx5_fc *counter)
+{
+       struct mlx5_fc_stats *fc_stats = &dev->priv.fc_stats;
+
+       if (!counter)
+               return;
+
+       if (counter->aging) {
+               llist_add(&counter->dellist, &fc_stats->dellist);
+               mod_delayed_work(fc_stats->wq, &fc_stats->work, 0);
+               return;
+       }
+
+       mlx5_fc_release(dev, counter);
+}
+EXPORT_SYMBOL(mlx5_fc_destroy);
+
+int mlx5_init_fc_stats(struct mlx5_core_dev *dev)
+{
+       struct mlx5_fc_stats *fc_stats = &dev->priv.fc_stats;
+       int init_bulk_len;
+       int init_out_len;
+
+       spin_lock_init(&fc_stats->counters_idr_lock);
+       idr_init(&fc_stats->counters_idr);
+       INIT_LIST_HEAD(&fc_stats->counters);
+       init_llist_head(&fc_stats->addlist);
+       init_llist_head(&fc_stats->dellist);
+
+       init_bulk_len = get_init_bulk_query_len(dev);
+       init_out_len = mlx5_cmd_fc_get_bulk_query_out_len(init_bulk_len);
+       fc_stats->bulk_query_out = kzalloc(init_out_len, GFP_KERNEL);
+       if (!fc_stats->bulk_query_out)
+               return -ENOMEM;
+       fc_stats->bulk_query_len = init_bulk_len;
+
+       fc_stats->wq = create_singlethread_workqueue("mlx5_fc");
+       if (!fc_stats->wq)
+               goto err_wq_create;
+
+       fc_stats->sampling_interval = MLX5_FC_STATS_PERIOD;
+       INIT_DELAYED_WORK(&fc_stats->work, mlx5_fc_stats_work);
+
+       mlx5_fc_pool_init(&fc_stats->fc_pool, dev);
+       return 0;
+
+err_wq_create:
+       kfree(fc_stats->bulk_query_out);
+       return -ENOMEM;
+}
+
+void mlx5_cleanup_fc_stats(struct mlx5_core_dev *dev)
+{
+       struct mlx5_fc_stats *fc_stats = &dev->priv.fc_stats;
+       struct llist_node *tmplist;
+       struct mlx5_fc *counter;
+       struct mlx5_fc *tmp;
+
+       if (!dev->priv.fc_stats.wq)
+               return;
+
+       cancel_delayed_work_sync(&dev->priv.fc_stats.work);
+       destroy_workqueue(dev->priv.fc_stats.wq);
+       dev->priv.fc_stats.wq = NULL;
+
+       tmplist = llist_del_all(&fc_stats->addlist);
+       llist_for_each_entry_safe(counter, tmp, tmplist, addlist)
+               mlx5_fc_release(dev, counter);
+
+       list_for_each_entry_safe(counter, tmp, &fc_stats->counters, list)
+               mlx5_fc_release(dev, counter);
+
+       mlx5_fc_pool_cleanup(&fc_stats->fc_pool);
+       idr_destroy(&fc_stats->counters_idr);
+       kfree(fc_stats->bulk_query_out);
+}
+
+int mlx5_fc_query(struct mlx5_core_dev *dev, struct mlx5_fc *counter,
+                 u64 *packets, u64 *bytes)
+{
+       return mlx5_cmd_fc_query(dev, counter->id, packets, bytes);
+}
+EXPORT_SYMBOL(mlx5_fc_query);
+
+u64 mlx5_fc_query_lastuse(struct mlx5_fc *counter)
+{
+       return counter->cache.lastuse;
+}
+
+void mlx5_fc_query_cached(struct mlx5_fc *counter,
+                         u64 *bytes, u64 *packets, u64 *lastuse)
+{
+       struct mlx5_fc_cache c;
+
+       c = counter->cache;
+
+       *bytes = c.bytes - counter->lastbytes;
+       *packets = c.packets - counter->lastpackets;
+       *lastuse = c.lastuse;
+
+       counter->lastbytes = c.bytes;
+       counter->lastpackets = c.packets;
+}
+
+void mlx5_fc_queue_stats_work(struct mlx5_core_dev *dev,
+                             struct delayed_work *dwork,
+                             unsigned long delay)
+{
+       struct mlx5_fc_stats *fc_stats = &dev->priv.fc_stats;
+
+       queue_delayed_work(fc_stats->wq, dwork, delay);
+}
+
+void mlx5_fc_update_sampling_interval(struct mlx5_core_dev *dev,
+                                     unsigned long interval)
+{
+       struct mlx5_fc_stats *fc_stats = &dev->priv.fc_stats;
+
+       fc_stats->sampling_interval = min_t(unsigned long, interval,
+                                           fc_stats->sampling_interval);
+}
+
+/* Flow counter bluks */
+
+struct mlx5_fc_bulk {
+       struct list_head pool_list;
+       u32 base_id;
+       int bulk_len;
+       unsigned long *bitmask;
+       struct mlx5_fc fcs[];
+};
+
+static void mlx5_fc_init(struct mlx5_fc *counter, struct mlx5_fc_bulk *bulk,
+                        u32 id)
+{
+       counter->bulk = bulk;
+       counter->id = id;
+}
+
+static int mlx5_fc_bulk_get_free_fcs_amount(struct mlx5_fc_bulk *bulk)
+{
+       return bitmap_weight(bulk->bitmask, bulk->bulk_len);
+}
+
+static struct mlx5_fc_bulk *mlx5_fc_bulk_create(struct mlx5_core_dev *dev)
+{
+       enum mlx5_fc_bulk_alloc_bitmask alloc_bitmask;
+       struct mlx5_fc_bulk *bulk;
+       int err = -ENOMEM;
+       int bulk_len;
+       u32 base_id;
+       int i;
+
+       alloc_bitmask = MLX5_CAP_GEN(dev, flow_counter_bulk_alloc);
+       bulk_len = alloc_bitmask > 0 ? MLX5_FC_BULK_NUM_FCS(alloc_bitmask) : 1;
+
+       bulk = kvzalloc(struct_size(bulk, fcs, bulk_len), GFP_KERNEL);
+       if (!bulk)
+               goto err_alloc_bulk;
+
+       bulk->bitmask = kvcalloc(BITS_TO_LONGS(bulk_len), sizeof(unsigned long),
+                                GFP_KERNEL);
+       if (!bulk->bitmask)
+               goto err_alloc_bitmask;
+
+       err = mlx5_cmd_fc_bulk_alloc(dev, alloc_bitmask, &base_id);
+       if (err)
+               goto err_mlx5_cmd_bulk_alloc;
+
+       bulk->base_id = base_id;
+       bulk->bulk_len = bulk_len;
+       for (i = 0; i < bulk_len; i++) {
+               mlx5_fc_init(&bulk->fcs[i], bulk, base_id + i);
+               set_bit(i, bulk->bitmask);
+       }
+
+       return bulk;
+
+err_mlx5_cmd_bulk_alloc:
+       kvfree(bulk->bitmask);
+err_alloc_bitmask:
+       kvfree(bulk);
+err_alloc_bulk:
+       return ERR_PTR(err);
+}
+
+static int
+mlx5_fc_bulk_destroy(struct mlx5_core_dev *dev, struct mlx5_fc_bulk *bulk)
+{
+       if (mlx5_fc_bulk_get_free_fcs_amount(bulk) < bulk->bulk_len) {
+               mlx5_core_err(dev, "Freeing bulk before all counters were 
released\n");
+               return -EBUSY;
+       }
+
+       mlx5_cmd_fc_free(dev, bulk->base_id);
+       kvfree(bulk->bitmask);
+       kvfree(bulk);
+
+       return 0;
+}
+
+static struct mlx5_fc *mlx5_fc_bulk_acquire_fc(struct mlx5_fc_bulk *bulk)
+{
+       int free_fc_index = find_first_bit(bulk->bitmask, bulk->bulk_len);
+
+       if (free_fc_index >= bulk->bulk_len)
+               return ERR_PTR(-ENOSPC);
+
+       clear_bit(free_fc_index, bulk->bitmask);
+       return &bulk->fcs[free_fc_index];
+}
+
+static int mlx5_fc_bulk_release_fc(struct mlx5_fc_bulk *bulk, struct mlx5_fc 
*fc)
+{
+       int fc_index = fc->id - bulk->base_id;
+
+       if (test_bit(fc_index, bulk->bitmask))
+               return -EINVAL;
+
+       set_bit(fc_index, bulk->bitmask);
+       return 0;
+}
+
+/* Flow counters pool API */
+
+static void mlx5_fc_pool_init(struct mlx5_fc_pool *fc_pool, struct 
mlx5_core_dev *dev)
+{
+       fc_pool->dev = dev;
+       mutex_init(&fc_pool->pool_lock);
+       INIT_LIST_HEAD(&fc_pool->fully_used);
+       INIT_LIST_HEAD(&fc_pool->partially_used);
+       INIT_LIST_HEAD(&fc_pool->unused);
+       fc_pool->available_fcs = 0;
+       fc_pool->used_fcs = 0;
+       fc_pool->threshold = 0;
+}
+
+static void mlx5_fc_pool_cleanup(struct mlx5_fc_pool *fc_pool)
+{
+       struct mlx5_core_dev *dev = fc_pool->dev;
+       struct mlx5_fc_bulk *bulk;
+       struct mlx5_fc_bulk *tmp;
+
+       list_for_each_entry_safe(bulk, tmp, &fc_pool->fully_used, pool_list)
+               mlx5_fc_bulk_destroy(dev, bulk);
+       list_for_each_entry_safe(bulk, tmp, &fc_pool->partially_used, pool_list)
+               mlx5_fc_bulk_destroy(dev, bulk);
+       list_for_each_entry_safe(bulk, tmp, &fc_pool->unused, pool_list)
+               mlx5_fc_bulk_destroy(dev, bulk);
+}
+
+static void mlx5_fc_pool_update_threshold(struct mlx5_fc_pool *fc_pool)
+{
+       fc_pool->threshold = min_t(int, MLX5_FC_POOL_MAX_THRESHOLD,
+                                  fc_pool->used_fcs / 
MLX5_FC_POOL_USED_BUFF_RATIO);
+}
+
+static struct mlx5_fc_bulk *
+mlx5_fc_pool_alloc_new_bulk(struct mlx5_fc_pool *fc_pool)
+{
+       struct mlx5_core_dev *dev = fc_pool->dev;
+       struct mlx5_fc_bulk *new_bulk;
+
+       new_bulk = mlx5_fc_bulk_create(dev);
+       if (!IS_ERR(new_bulk))
+               fc_pool->available_fcs += new_bulk->bulk_len;
+       mlx5_fc_pool_update_threshold(fc_pool);
+       return new_bulk;
+}
*** 175 LINES SKIPPED ***


Reply via email to