From: Cosmin Ratiu <[email protected]>
In commit [1] the concept of the root node in the qos hierarchy was
removed due to a bug with how tx_share worked. The side effect is that
in many places, there are now corner cases related to parent handling.
However, since that change, support for tc_bw was added and now, with
upcoming cross-esw support, the code is about to become even more
complicated, increasing the number of such corner cases.
Bring back the concept of the root node, to which all esw vports and
nodes are connected to. This benefits multiple operations which can
assume there's always a valid parent and don't have to do ternary
gymnastics to determine the correct esw to talk to.
As side effect, there's no longer a need to store the groups in the
qos domain, since normalization can simply iterate over all children of
the root node. Normalization gets simplified as a result.
There should be no functionality changes as a result of this change.
[1] commit 330f0f6713a3 ("net/mlx5: Remove default QoS group and attach
vports directly to root TSAR")
Signed-off-by: Cosmin Ratiu <[email protected]>
Reviewed-by: Carolina Jubran <[email protected]>
Signed-off-by: Tariq Toukan <[email protected]>
---
.../net/ethernet/mellanox/mlx5/core/esw/qos.c | 183 ++++++++----------
.../net/ethernet/mellanox/mlx5/core/eswitch.h | 3 +-
2 files changed, 78 insertions(+), 108 deletions(-)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.c
b/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.c
index 4781a1a42f1a..12cc9bb8d08b 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.c
@@ -15,8 +15,6 @@
struct mlx5_qos_domain {
/* Serializes access to all qos changes in the qos domain. */
struct mutex lock;
- /* List of all mlx5_esw_sched_nodes. */
- struct list_head nodes;
};
static void esw_qos_lock(struct mlx5_eswitch *esw)
@@ -43,7 +41,6 @@ static struct mlx5_qos_domain *esw_qos_domain_alloc(void)
return NULL;
mutex_init(&qos_domain->lock);
- INIT_LIST_HEAD(&qos_domain->nodes);
return qos_domain;
}
@@ -62,6 +59,7 @@ static void esw_qos_domain_release(struct mlx5_eswitch *esw)
}
enum sched_node_type {
+ SCHED_NODE_TYPE_ROOT,
SCHED_NODE_TYPE_VPORTS_TSAR,
SCHED_NODE_TYPE_VPORT,
SCHED_NODE_TYPE_TC_ARBITER_TSAR,
@@ -106,18 +104,6 @@ struct mlx5_esw_sched_node {
u32 tc_bw[DEVLINK_RATE_TCS_MAX];
};
-static void esw_qos_node_attach_to_parent(struct mlx5_esw_sched_node *node)
-{
- if (!node->parent) {
- /* Root children are assigned a depth level of 2. */
- node->level = 2;
- list_add_tail(&node->entry, &node->esw->qos.domain->nodes);
- } else {
- node->level = node->parent->level + 1;
- list_add_tail(&node->entry, &node->parent->children);
- }
-}
-
static int esw_qos_num_tcs(struct mlx5_core_dev *dev)
{
int num_tcs = mlx5_max_tc(dev) + 1;
@@ -125,14 +111,14 @@ static int esw_qos_num_tcs(struct mlx5_core_dev *dev)
return num_tcs < DEVLINK_RATE_TCS_MAX ? num_tcs : DEVLINK_RATE_TCS_MAX;
}
-static void
-esw_qos_node_set_parent(struct mlx5_esw_sched_node *node, struct
mlx5_esw_sched_node *parent)
+static void esw_qos_node_set_parent(struct mlx5_esw_sched_node *node,
+ struct mlx5_esw_sched_node *parent)
{
- list_del_init(&node->entry);
node->parent = parent;
- if (parent)
- node->esw = parent->esw;
- esw_qos_node_attach_to_parent(node);
+ node->esw = parent->esw;
+ node->level = parent->level + 1;
+ list_del(&node->entry);
+ list_add_tail(&node->entry, &parent->children);
}
static void esw_qos_nodes_set_parent(struct list_head *nodes,
@@ -321,22 +307,19 @@ static int esw_qos_create_rate_limit_element(struct
mlx5_esw_sched_node *node,
return esw_qos_node_create_sched_element(node, sched_ctx, extack);
}
-static u32 esw_qos_calculate_min_rate_divider(struct mlx5_eswitch *esw,
- struct mlx5_esw_sched_node
*parent)
+static u32
+esw_qos_calculate_min_rate_divider(struct mlx5_esw_sched_node *parent)
{
- struct list_head *nodes = parent ? &parent->children :
&esw->qos.domain->nodes;
- u32 fw_max_bw_share = MLX5_CAP_QOS(esw->dev, max_tsar_bw_share);
+ u32 fw_max_bw_share = MLX5_CAP_QOS(parent->esw->dev, max_tsar_bw_share);
struct mlx5_esw_sched_node *node;
u32 max_guarantee = 0;
/* Find max min_rate across all nodes.
* This will correspond to fw_max_bw_share in the final bw_share
calculation.
*/
- list_for_each_entry(node, nodes, entry) {
- if (node->esw == esw && node->ix != esw->qos.root_tsar_ix &&
- node->min_rate > max_guarantee)
+ list_for_each_entry(node, &parent->children, entry)
+ if (node->min_rate > max_guarantee)
max_guarantee = node->min_rate;
- }
if (max_guarantee)
return max_t(u32, max_guarantee / fw_max_bw_share, 1);
@@ -368,18 +351,13 @@ static void esw_qos_update_sched_node_bw_share(struct
mlx5_esw_sched_node *node,
esw_qos_sched_elem_config(node, node->max_rate, bw_share, extack);
}
-static void esw_qos_normalize_min_rate(struct mlx5_eswitch *esw,
- struct mlx5_esw_sched_node *parent,
+static void esw_qos_normalize_min_rate(struct mlx5_esw_sched_node *parent,
struct netlink_ext_ack *extack)
{
- struct list_head *nodes = parent ? &parent->children :
&esw->qos.domain->nodes;
- u32 divider = esw_qos_calculate_min_rate_divider(esw, parent);
+ u32 divider = esw_qos_calculate_min_rate_divider(parent);
struct mlx5_esw_sched_node *node;
- list_for_each_entry(node, nodes, entry) {
- if (node->esw != esw || node->ix == esw->qos.root_tsar_ix)
- continue;
-
+ list_for_each_entry(node, &parent->children, entry) {
/* Vports TC TSARs don't have a minimum rate configured,
* so there's no need to update the bw_share on them.
*/
@@ -391,7 +369,7 @@ static void esw_qos_normalize_min_rate(struct mlx5_eswitch
*esw,
if (list_empty(&node->children))
continue;
- esw_qos_normalize_min_rate(node->esw, node, extack);
+ esw_qos_normalize_min_rate(node, extack);
}
}
@@ -412,14 +390,11 @@ static u32 esw_qos_calculate_tc_bw_divider(u32 *tc_bw)
static int esw_qos_set_node_min_rate(struct mlx5_esw_sched_node *node,
u32 min_rate, struct netlink_ext_ack
*extack)
{
- struct mlx5_eswitch *esw = node->esw;
-
if (min_rate == node->min_rate)
return 0;
node->min_rate = min_rate;
- esw_qos_normalize_min_rate(esw, node->parent, extack);
-
+ esw_qos_normalize_min_rate(node->parent, extack);
return 0;
}
@@ -472,8 +447,7 @@ esw_qos_vport_create_sched_element(struct
mlx5_esw_sched_node *vport_node,
SCHEDULING_CONTEXT_ELEMENT_TYPE_VPORT);
attr = MLX5_ADDR_OF(scheduling_context, sched_ctx, element_attributes);
MLX5_SET(vport_element, attr, vport_number, vport_node->vport->vport);
- MLX5_SET(scheduling_context, sched_ctx, parent_element_id,
- parent ? parent->ix : vport_node->esw->qos.root_tsar_ix);
+ MLX5_SET(scheduling_context, sched_ctx, parent_element_id, parent->ix);
MLX5_SET(scheduling_context, sched_ctx, max_average_bw,
vport_node->max_rate);
@@ -513,7 +487,7 @@ esw_qos_vport_tc_create_sched_element(struct
mlx5_esw_sched_node *vport_tc_node,
}
static struct mlx5_esw_sched_node *
-__esw_qos_alloc_node(struct mlx5_eswitch *esw, u32 tsar_ix, enum
sched_node_type type,
+__esw_qos_alloc_node(u32 tsar_ix, enum sched_node_type type,
struct mlx5_esw_sched_node *parent)
{
struct mlx5_esw_sched_node *node;
@@ -522,20 +496,12 @@ __esw_qos_alloc_node(struct mlx5_eswitch *esw, u32
tsar_ix, enum sched_node_type
if (!node)
return NULL;
- node->esw = esw;
node->ix = tsar_ix;
node->type = type;
- node->parent = parent;
INIT_LIST_HEAD(&node->children);
- esw_qos_node_attach_to_parent(node);
- if (!parent) {
- /* The caller is responsible for inserting the node into the
- * parent list if necessary. This function can also be used with
- * a NULL parent, which doesn't necessarily indicate that it
- * refers to the root scheduling element.
- */
- list_del_init(&node->entry);
- }
+ INIT_LIST_HEAD(&node->entry);
+ if (parent)
+ esw_qos_node_set_parent(node, parent);
return node;
}
@@ -570,7 +536,7 @@ static int esw_qos_create_vports_tc_node(struct
mlx5_esw_sched_node *parent,
SCHEDULING_HIERARCHY_E_SWITCH))
return -EOPNOTSUPP;
- vports_tc_node = __esw_qos_alloc_node(parent->esw, 0,
+ vports_tc_node = __esw_qos_alloc_node(0,
SCHED_NODE_TYPE_VPORTS_TC_TSAR,
parent);
if (!vports_tc_node) {
@@ -665,7 +631,6 @@ static int esw_qos_create_tc_arbiter_sched_elem(
struct netlink_ext_ack *extack)
{
u32 tsar_ctx[MLX5_ST_SZ_DW(scheduling_context)] = {};
- u32 tsar_parent_ix;
void *attr;
if (!mlx5_qos_tsar_type_supported(tc_arbiter_node->esw->dev,
@@ -678,10 +643,8 @@ static int esw_qos_create_tc_arbiter_sched_elem(
attr = MLX5_ADDR_OF(scheduling_context, tsar_ctx, element_attributes);
MLX5_SET(tsar_element, attr, tsar_type, TSAR_ELEMENT_TSAR_TYPE_TC_ARB);
- tsar_parent_ix = tc_arbiter_node->parent ? tc_arbiter_node->parent->ix :
- tc_arbiter_node->esw->qos.root_tsar_ix;
MLX5_SET(scheduling_context, tsar_ctx, parent_element_id,
- tsar_parent_ix);
+ tc_arbiter_node->parent->ix);
MLX5_SET(scheduling_context, tsar_ctx, element_type,
SCHEDULING_CONTEXT_ELEMENT_TYPE_TSAR);
MLX5_SET(scheduling_context, tsar_ctx, max_average_bw,
@@ -694,37 +657,36 @@ static int esw_qos_create_tc_arbiter_sched_elem(
}
static struct mlx5_esw_sched_node *
-__esw_qos_create_vports_sched_node(struct mlx5_eswitch *esw, struct
mlx5_esw_sched_node *parent,
+__esw_qos_create_vports_sched_node(struct mlx5_esw_sched_node *parent,
struct netlink_ext_ack *extack)
{
struct mlx5_esw_sched_node *node;
- u32 tsar_ix;
int err;
+ u32 ix;
- err = esw_qos_create_node_sched_elem(esw->dev, esw->qos.root_tsar_ix, 0,
- 0, &tsar_ix);
+ err = esw_qos_create_node_sched_elem(parent->esw->dev, parent->ix, 0, 0,
+ &ix);
if (err) {
NL_SET_ERR_MSG_MOD(extack, "E-Switch create TSAR for node
failed");
return ERR_PTR(err);
}
- node = __esw_qos_alloc_node(esw, tsar_ix, SCHED_NODE_TYPE_VPORTS_TSAR,
parent);
+ node = __esw_qos_alloc_node(ix, SCHED_NODE_TYPE_VPORTS_TSAR, parent);
if (!node) {
NL_SET_ERR_MSG_MOD(extack, "E-Switch alloc node failed");
err = -ENOMEM;
goto err_alloc_node;
}
- list_add_tail(&node->entry, &esw->qos.domain->nodes);
- esw_qos_normalize_min_rate(esw, NULL, extack);
- trace_mlx5_esw_node_qos_create(esw->dev, node, node->ix);
+ esw_qos_normalize_min_rate(parent, extack);
+ trace_mlx5_esw_node_qos_create(parent->esw->dev, node, node->ix);
return node;
err_alloc_node:
- if (mlx5_destroy_scheduling_element_cmd(esw->dev,
+ if (mlx5_destroy_scheduling_element_cmd(parent->esw->dev,
SCHEDULING_HIERARCHY_E_SWITCH,
- tsar_ix))
+ ix))
NL_SET_ERR_MSG_MOD(extack, "E-Switch destroy TSAR for node
failed");
return ERR_PTR(err);
}
@@ -746,7 +708,7 @@ esw_qos_create_vports_sched_node(struct mlx5_eswitch *esw,
struct netlink_ext_ac
if (err)
return ERR_PTR(err);
- node = __esw_qos_create_vports_sched_node(esw, NULL, extack);
+ node = __esw_qos_create_vports_sched_node(esw->qos.root, extack);
if (IS_ERR(node))
esw_qos_put(esw);
@@ -762,38 +724,47 @@ static void __esw_qos_destroy_node(struct
mlx5_esw_sched_node *node, struct netl
trace_mlx5_esw_node_qos_destroy(esw->dev, node, node->ix);
esw_qos_destroy_node(node, extack);
- esw_qos_normalize_min_rate(esw, NULL, extack);
+ esw_qos_normalize_min_rate(esw->qos.root, extack);
}
static int esw_qos_create(struct mlx5_eswitch *esw, struct netlink_ext_ack
*extack)
{
struct mlx5_core_dev *dev = esw->dev;
+ struct mlx5_esw_sched_node *root;
+ u32 root_ix;
int err;
if (!MLX5_CAP_GEN(dev, qos) || !MLX5_CAP_QOS(dev, esw_scheduling))
return -EOPNOTSUPP;
- err = esw_qos_create_node_sched_elem(esw->dev, 0, 0, 0,
- &esw->qos.root_tsar_ix);
+ err = esw_qos_create_node_sched_elem(esw->dev, 0, 0, 0, &root_ix);
if (err) {
esw_warn(dev, "E-Switch create root TSAR failed (%d)\n", err);
return err;
}
+ root = __esw_qos_alloc_node(root_ix, SCHED_NODE_TYPE_ROOT, NULL);
+ if (!root) {
+ esw_warn(dev, "E-Switch create root node failed\n");
+ err = -ENOMEM;
+ goto out_err;
+ }
+ root->esw = esw;
+ root->level = 1;
+ esw->qos.root = root;
refcount_set(&esw->qos.refcnt, 1);
return 0;
+out_err:
+ mlx5_destroy_scheduling_element_cmd(dev, SCHEDULING_HIERARCHY_E_SWITCH,
+ root_ix);
+ return err;
}
static void esw_qos_destroy(struct mlx5_eswitch *esw)
{
- int err;
-
- err = mlx5_destroy_scheduling_element_cmd(esw->dev,
- SCHEDULING_HIERARCHY_E_SWITCH,
- esw->qos.root_tsar_ix);
- if (err)
- esw_warn(esw->dev, "E-Switch destroy root TSAR failed (%d)\n",
err);
+ esw_qos_destroy_node(esw->qos.root, NULL);
+ esw->qos.root = NULL;
}
static int esw_qos_get(struct mlx5_eswitch *esw, struct netlink_ext_ack
*extack)
@@ -866,8 +837,7 @@ esw_qos_create_vport_tc_sched_node(struct mlx5_vport *vport,
u8 tc = vports_tc_node->tc;
int err;
- vport_tc_node = __esw_qos_alloc_node(vport_node->esw, 0,
- SCHED_NODE_TYPE_VPORT_TC,
+ vport_tc_node = __esw_qos_alloc_node(0, SCHED_NODE_TYPE_VPORT_TC,
vports_tc_node);
if (!vport_tc_node)
return -ENOMEM;
@@ -959,7 +929,7 @@ esw_qos_vport_tc_enable(struct mlx5_vport *vport, enum
sched_node_type type,
/* Increase the parent's level by 2 to account for both the
* TC arbiter and the vports TC scheduling element.
*/
- new_level = (parent ? parent->level : 2) + 2;
+ new_level = parent->level + 2;
max_level = 1 << MLX5_CAP_QOS(vport_node->esw->dev,
log_esw_max_sched_depth);
if (new_level > max_level) {
@@ -997,7 +967,7 @@ esw_qos_vport_tc_enable(struct mlx5_vport *vport, enum
sched_node_type type,
err_sched_nodes:
if (type == SCHED_NODE_TYPE_RATE_LIMITER) {
esw_qos_node_destroy_sched_element(vport_node, NULL);
- esw_qos_node_attach_to_parent(vport_node);
+ esw_qos_node_set_parent(vport_node, vport_node->parent);
} else {
esw_qos_tc_arbiter_scheduling_teardown(vport_node, NULL);
}
@@ -1055,7 +1025,7 @@ static void esw_qos_vport_disable(struct mlx5_vport
*vport, struct netlink_ext_a
vport_node->bw_share = 0;
memset(vport_node->tc_bw, 0, sizeof(vport_node->tc_bw));
list_del_init(&vport_node->entry);
- esw_qos_normalize_min_rate(vport_node->esw, vport_node->parent, extack);
+ esw_qos_normalize_min_rate(vport_node->parent, extack);
trace_mlx5_esw_vport_qos_destroy(vport_node->esw->dev, vport);
}
@@ -1068,7 +1038,7 @@ static int esw_qos_vport_enable(struct mlx5_vport *vport,
struct mlx5_esw_sched_node *vport_node = vport->qos.sched_node;
int err;
- esw_assert_qos_lock_held(vport->dev->priv.eswitch);
+ esw_assert_qos_lock_held(vport_node->esw);
esw_qos_node_set_parent(vport_node, parent);
if (type == SCHED_NODE_TYPE_VPORT)
@@ -1079,7 +1049,7 @@ static int esw_qos_vport_enable(struct mlx5_vport *vport,
return err;
vport_node->type = type;
- esw_qos_normalize_min_rate(vport_node->esw, parent, extack);
+ esw_qos_normalize_min_rate(parent, extack);
trace_mlx5_esw_vport_qos_create(vport->dev, vport, vport_node->max_rate,
vport_node->bw_share);
@@ -1092,7 +1062,6 @@ static int mlx5_esw_qos_vport_enable(struct mlx5_vport
*vport, enum sched_node_t
{
struct mlx5_eswitch *esw = vport->dev->priv.eswitch;
struct mlx5_esw_sched_node *sched_node;
- struct mlx5_eswitch *parent_esw;
int err;
esw_assert_qos_lock_held(esw);
@@ -1100,14 +1069,13 @@ static int mlx5_esw_qos_vport_enable(struct mlx5_vport
*vport, enum sched_node_t
if (err)
return err;
- parent_esw = parent ? parent->esw : esw;
- sched_node = __esw_qos_alloc_node(parent_esw, 0, type, parent);
+ if (!parent)
+ parent = esw->qos.root;
+ sched_node = __esw_qos_alloc_node(0, type, parent);
if (!sched_node) {
esw_qos_put(esw);
return -ENOMEM;
}
- if (!parent)
- list_add_tail(&sched_node->entry, &esw->qos.domain->nodes);
sched_node->max_rate = max_rate;
sched_node->min_rate = min_rate;
@@ -1147,7 +1115,8 @@ void mlx5_esw_qos_vport_disable(struct mlx5_vport *vport)
goto unlock;
parent = vport->qos.sched_node->parent;
- WARN(parent, "Disabling QoS on port before detaching it from node");
+ WARN(parent != esw->qos.root,
+ "Disabling QoS on port before detaching it from node");
mlx5_esw_qos_vport_disable_locked(vport);
unlock:
@@ -1319,11 +1288,9 @@ static int esw_qos_switch_tc_arbiter_node_to_vports(
struct mlx5_esw_sched_node *node,
struct netlink_ext_ack *extack)
{
- u32 parent_tsar_ix = node->parent ?
- node->parent->ix : node->esw->qos.root_tsar_ix;
int err;
- err = esw_qos_create_node_sched_elem(node->esw->dev, parent_tsar_ix,
+ err = esw_qos_create_node_sched_elem(node->esw->dev, node->parent->ix,
node->max_rate, node->bw_share,
&node->ix);
if (err) {
@@ -1378,8 +1345,8 @@ esw_qos_move_node(struct mlx5_esw_sched_node *curr_node)
{
struct mlx5_esw_sched_node *new_node;
- new_node = __esw_qos_alloc_node(curr_node->esw, curr_node->ix,
- curr_node->type, NULL);
+ new_node = __esw_qos_alloc_node(curr_node->ix, curr_node->type,
+ curr_node->parent);
if (!new_node)
return ERR_PTR(-ENOMEM);
@@ -1888,7 +1855,9 @@ mlx5_esw_qos_vport_update_parent(struct mlx5_vport *vport,
err = mlx5_esw_qos_vport_enable(vport, type, parent, 0, 0,
extack);
} else if (vport->qos.sched_node) {
- err = esw_qos_vport_update_parent(vport, parent, extack);
+ err = esw_qos_vport_update_parent(vport,
+ parent ? : esw->qos.root,
+ extack);
}
esw_qos_unlock(esw);
return err;
@@ -1959,7 +1928,7 @@ mlx5_esw_qos_node_validate_set_parent(struct
mlx5_esw_sched_node *node,
return -EOPNOTSUPP;
}
- new_level = parent ? parent->level + 1 : 2;
+ new_level = parent->level + 1;
if (node->type == SCHED_NODE_TYPE_TC_ARBITER_TSAR) {
/* Increase by one to account for the vports TC scheduling
* element.
@@ -2010,14 +1979,12 @@ static int esw_qos_vports_node_update_parent(struct
mlx5_esw_sched_node *node,
{
struct mlx5_esw_sched_node *curr_parent = node->parent;
struct mlx5_eswitch *esw = node->esw;
- u32 parent_ix;
int err;
- parent_ix = parent ? parent->ix : node->esw->qos.root_tsar_ix;
mlx5_destroy_scheduling_element_cmd(esw->dev,
SCHEDULING_HIERARCHY_E_SWITCH,
node->ix);
- err = esw_qos_create_node_sched_elem(esw->dev, parent_ix,
+ err = esw_qos_create_node_sched_elem(esw->dev, parent->ix,
node->max_rate, 0, &node->ix);
if (err) {
NL_SET_ERR_MSG_MOD(extack,
@@ -2050,6 +2017,8 @@ static int mlx5_esw_qos_node_update_parent(struct
mlx5_esw_sched_node *node,
esw_qos_lock(esw);
curr_parent = node->parent;
+ if (!parent)
+ parent = esw->qos.root;
if (node->type == SCHED_NODE_TYPE_TC_ARBITER_TSAR) {
err = esw_qos_tc_arbiter_node_update_parent(node, parent,
extack);
@@ -2060,8 +2029,8 @@ static int mlx5_esw_qos_node_update_parent(struct
mlx5_esw_sched_node *node,
if (err)
goto out;
- esw_qos_normalize_min_rate(esw, curr_parent, extack);
- esw_qos_normalize_min_rate(esw, parent, extack);
+ esw_qos_normalize_min_rate(curr_parent, extack);
+ esw_qos_normalize_min_rate(parent, extack);
out:
esw_qos_unlock(esw);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
index a5a02b26b80b..9b3949a64784 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
@@ -390,8 +390,9 @@ struct mlx5_eswitch {
struct {
/* Initially 0, meaning no QoS users and QoS is disabled. */
refcount_t refcnt;
- u32 root_tsar_ix;
struct mlx5_qos_domain *domain;
+ /* The root node of the hierarchy. */
+ struct mlx5_esw_sched_node *root;
} qos;
struct mlx5_esw_bridge_offloads *br_offloads;
--
2.44.0