[PATCH net-next v4 2/2] switchdev: fix: pass correct obj size when deferring obj add
From: Scott Feldman Fixes: 4d429c5dd ("switchdev: introduce possibility to defer obj_add/del") Signed-off-by: Scott Feldman Acked-by: Jiri Pirko --- v3->v4: rebase sync v2->v3: add Jiri's Acked-by v1->v2: use correct "Fixes" tag, use common func to calc obj size for add/del net/switchdev/switchdev.c | 19 +-- 1 file changed, 17 insertions(+), 2 deletions(-) diff --git a/net/switchdev/switchdev.c b/net/switchdev/switchdev.c index 8d3e6c3..2433e75 100644 --- a/net/switchdev/switchdev.c +++ b/net/switchdev/switchdev.c @@ -337,6 +337,21 @@ int switchdev_port_attr_set(struct net_device *dev, } EXPORT_SYMBOL_GPL(switchdev_port_attr_set); +static size_t switchdev_obj_size(const struct switchdev_obj *obj) +{ + switch (obj->id) { + case SWITCHDEV_OBJ_ID_PORT_VLAN: + return sizeof(struct switchdev_obj_port_vlan); + case SWITCHDEV_OBJ_ID_IPV4_FIB: + return sizeof(struct switchdev_obj_ipv4_fib); + case SWITCHDEV_OBJ_ID_PORT_FDB: + return sizeof(struct switchdev_obj_port_fdb); + default: + BUG(); + } + return 0; +} + static int __switchdev_port_obj_add(struct net_device *dev, const struct switchdev_obj *obj, struct switchdev_trans *trans) @@ -422,7 +437,7 @@ static void switchdev_port_obj_add_deferred(struct net_device *dev, static int switchdev_port_obj_add_defer(struct net_device *dev, const struct switchdev_obj *obj) { - return switchdev_deferred_enqueue(dev, obj, sizeof(*obj), + return switchdev_deferred_enqueue(dev, obj, switchdev_obj_size(obj), switchdev_port_obj_add_deferred); } @@ -490,7 +505,7 @@ static void switchdev_port_obj_del_deferred(struct net_device *dev, static int switchdev_port_obj_del_defer(struct net_device *dev, const struct switchdev_obj *obj) { - return switchdev_deferred_enqueue(dev, obj, sizeof(*obj), + return switchdev_deferred_enqueue(dev, obj, switchdev_obj_size(obj), switchdev_port_obj_del_deferred); } -- 1.7.10.4 -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH net-next v4 1/2] switchdev: fix: erasing too much of vlan obj when handling multiple vlan specs
From: Scott Feldman When adding vlans with multiple IFLA_BRIDGE_VLAN_INFO attrs set in AFSPEC, we would wipe the vlan obj struct after the first IFLA_BRIDGE_VLAN_INFO. Fix this by only clearing what's necessary on each IFLA_BRIDGE_VLAN_INFO iteration. Fixes: 9e8f4a54 ("switchdev: push object ID back to object structure") Signed-off-by: Scott Feldman Acked-by: Jiri Pirko --- v3->v4: rebase sync v2->v3: no change v1->v2: add Jiri's Acked-by net/switchdev/switchdev.c |4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/switchdev/switchdev.c b/net/switchdev/switchdev.c index 1eb76956..8d3e6c3 100644 --- a/net/switchdev/switchdev.c +++ b/net/switchdev/switchdev.c @@ -866,7 +866,7 @@ static int switchdev_port_br_afspec(struct net_device *dev, err = f(dev, &vlan.obj); if (err) return err; - memset(&vlan, 0, sizeof(vlan)); + vlan.vid_begin = 0; } else { if (vlan.vid_begin) return -EINVAL; @@ -875,7 +875,7 @@ static int switchdev_port_br_afspec(struct net_device *dev, err = f(dev, &vlan.obj); if (err) return err; - memset(&vlan, 0, sizeof(vlan)); + vlan.vid_begin = 0; } } -- 1.7.10.4 -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH net-next v3 2/3] switchdev: fix: pass correct obj size when deferring obj add
From: Scott Feldman Fixes: 4d429c5dd ("switchdev: introduce possibility to defer obj_add/del") Signed-off-by: Scott Feldman Acked-by: Jiri Pirko --- v2->v3: add Jiri's Acked-by v1->v2: use correct "Fixes" tag, use common func to calc obj size for add/del net/switchdev/switchdev.c | 19 +-- 1 file changed, 17 insertions(+), 2 deletions(-) diff --git a/net/switchdev/switchdev.c b/net/switchdev/switchdev.c index 56d8479..bff8e2b 100644 --- a/net/switchdev/switchdev.c +++ b/net/switchdev/switchdev.c @@ -336,6 +336,21 @@ int switchdev_port_attr_set(struct net_device *dev, } EXPORT_SYMBOL_GPL(switchdev_port_attr_set); +static size_t switchdev_obj_size(const struct switchdev_obj *obj) +{ + switch (obj->id) { + case SWITCHDEV_OBJ_ID_PORT_VLAN: + return sizeof(struct switchdev_obj_port_vlan); + case SWITCHDEV_OBJ_ID_IPV4_FIB: + return sizeof(struct switchdev_obj_ipv4_fib); + case SWITCHDEV_OBJ_ID_PORT_FDB: + return sizeof(struct switchdev_obj_port_fdb); + default: + BUG(); + } + return 0; +} + static int __switchdev_port_obj_add(struct net_device *dev, const struct switchdev_obj *obj, struct switchdev_trans *trans) @@ -421,7 +436,7 @@ static void switchdev_port_obj_add_deferred(struct net_device *dev, static int switchdev_port_obj_add_defer(struct net_device *dev, const struct switchdev_obj *obj) { - return switchdev_deferred_enqueue(dev, obj, sizeof(*obj), + return switchdev_deferred_enqueue(dev, obj, switchdev_obj_size(obj), switchdev_port_obj_add_deferred); } @@ -489,7 +504,7 @@ static void switchdev_port_obj_del_deferred(struct net_device *dev, static int switchdev_port_obj_del_defer(struct net_device *dev, const struct switchdev_obj *obj) { - return switchdev_deferred_enqueue(dev, obj, sizeof(*obj), + return switchdev_deferred_enqueue(dev, obj, switchdev_obj_size(obj), switchdev_port_obj_del_deferred); } -- 1.7.10.4 -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH net-next v3 1/3] switchdev: fix: erasing too much of vlan obj when handling multiple vlan specs
From: Scott Feldman When adding vlans with multiple IFLA_BRIDGE_VLAN_INFO attrs set in AFSPEC, we would wipe the vlan obj struct after the first IFLA_BRIDGE_VLAN_INFO. Fix this by only clearing what's necessary on each IFLA_BRIDGE_VLAN_INFO iteration. Fixes: 9e8f4a54 ("switchdev: push object ID back to object structure") Signed-off-by: Scott Feldman Acked-by: Jiri Pirko --- v2->v3: no change v1->v2: add Jiri's Acked-by net/switchdev/switchdev.c |4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/switchdev/switchdev.c b/net/switchdev/switchdev.c index 73e3895..56d8479 100644 --- a/net/switchdev/switchdev.c +++ b/net/switchdev/switchdev.c @@ -863,7 +863,7 @@ static int switchdev_port_br_afspec(struct net_device *dev, err = f(dev, &vlan.obj); if (err) return err; - memset(&vlan, 0, sizeof(vlan)); + vlan.vid_begin = 0; } else { if (vlan.vid_begin) return -EINVAL; @@ -872,7 +872,7 @@ static int switchdev_port_br_afspec(struct net_device *dev, err = f(dev, &vlan.obj); if (err) return err; - memset(&vlan, 0, sizeof(vlan)); + vlan.vid_begin = 0; } } -- 1.7.10.4 -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH net-next v3 3/3] switchdev: split switchdev_attr into individual structs
From: Scott Feldman This was already done for switchdev_objs. Changing switchdev_attrs to new style makes switchdev API consistent for both attrs and objs. No functional changes here. Signed-off-by: Scott Feldman Acked-by: Jiri Pirko --- v2->v3: remove something that slipped in from the future. I got a piece of another patchset mixed in with this one. Oops. v1->v2: add Jiri's Acked-by .../ethernet/mellanox/mlxsw/spectrum_switchdev.c | 24 -- drivers/net/ethernet/mellanox/mlxsw/switchx2.c |7 +- drivers/net/ethernet/rocker/rocker.c | 23 -- include/net/switchdev.h| 42 +-- net/bridge/br_stp.c| 24 +++--- net/core/net-sysfs.c | 14 ++-- net/core/rtnetlink.c | 14 ++-- net/dsa/slave.c| 10 ++- net/switchdev/switchdev.c | 77 +--- 9 files changed, 163 insertions(+), 72 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c index c39b7a1..efa1aa8 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c @@ -56,15 +56,19 @@ static int mlxsw_sp_port_attr_get(struct net_device *dev, { struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(dev); struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; + struct switchdev_attr_port_parent_id *parent_id; + struct switchdev_attr_port_bridge_flags *brport_flags; switch (attr->id) { case SWITCHDEV_ATTR_ID_PORT_PARENT_ID: - attr->u.ppid.id_len = sizeof(mlxsw_sp->base_mac); - memcpy(&attr->u.ppid.id, &mlxsw_sp->base_mac, - attr->u.ppid.id_len); + parent_id = SWITCHDEV_ATTR_PORT_PARENT_ID(attr); + parent_id->ppid.id_len = sizeof(mlxsw_sp->base_mac); + memcpy(&parent_id->ppid.id, &mlxsw_sp->base_mac, + parent_id->ppid.id_len); break; case SWITCHDEV_ATTR_ID_PORT_BRIDGE_FLAGS: - attr->u.brport_flags = + brport_flags = SWITCHDEV_ATTR_PORT_BRIDGE_FLAGS(attr); + brport_flags->brport_flags = (mlxsw_sp_port->learning ? BR_LEARNING : 0) | (mlxsw_sp_port->learning_sync ? BR_LEARNING_SYNC : 0); break; @@ -166,20 +170,26 @@ static int mlxsw_sp_port_attr_set(struct net_device *dev, struct switchdev_trans *trans) { struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(dev); + struct switchdev_attr_port_stp_state *stp_state; + struct switchdev_attr_port_bridge_flags *brport_flags; + struct switchdev_attr_bridge_ageing_time *ageing_time; int err = 0; switch (attr->id) { case SWITCHDEV_ATTR_ID_PORT_STP_STATE: + stp_state = SWITCHDEV_ATTR_PORT_STP_STATE(attr); err = mlxsw_sp_port_attr_stp_state_set(mlxsw_sp_port, trans, - attr->u.stp_state); + stp_state->state); break; case SWITCHDEV_ATTR_ID_PORT_BRIDGE_FLAGS: + brport_flags = SWITCHDEV_ATTR_PORT_BRIDGE_FLAGS(attr); err = mlxsw_sp_port_attr_br_flags_set(mlxsw_sp_port, trans, - attr->u.brport_flags); + brport_flags->brport_flags); break; case SWITCHDEV_ATTR_ID_BRIDGE_AGEING_TIME: + ageing_time = SWITCHDEV_ATTR_BRIDGE_AGEING_TIME(attr); err = mlxsw_sp_port_attr_br_ageing_set(mlxsw_sp_port, trans, - attr->u.ageing_time); + ageing_time->ageing_time); break; default: err = -EOPNOTSUPP; diff --git a/drivers/net/ethernet/mellanox/mlxsw/switchx2.c b/drivers/net/ethernet/mellanox/mlxsw/switchx2.c index 2fd2279..edabc82 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/switchx2.c +++ b/drivers/net/ethernet/mellanox/mlxsw/switchx2.c @@ -864,11 +864,14 @@ static int mlxsw_sx_port_attr_get(struct net_device *dev, { struct mlxsw_sx_port *mlxsw_sx_port = netdev_priv(dev); struct mlxsw_sx *mlxsw_sx = mlxsw_sx_port->mlxsw_sx; + struct switchdev_attr_port_parent_id *parent_id; switch (attr->id) { case SWITCHDEV_ATTR_ID_PORT_PARENT_ID: - attr->u.ppid.id_len = sizeof(mlxsw_sx->hw_id); - memcpy(&attr->u.ppid.id, &mlxsw_sx->hw_id, attr->u.ppid.id_len); + parent_id = SWITCHDEV_ATTR_PORT_PARENT_ID(attr); +
[PATCH net-next v2 2/3] switchdev: fix: pass correct obj size when deferring obj add/del
From: Scott Feldman Fixes: 4d429c5dd ("switchdev: introduce possibility to defer obj_add/del") Signed-off-by: Scott Feldman --- v1->v2: use correct "Fixes" tag, use common func to calc obj size for add/del net/switchdev/switchdev.c | 19 +-- 1 file changed, 17 insertions(+), 2 deletions(-) diff --git a/net/switchdev/switchdev.c b/net/switchdev/switchdev.c index 56d8479..bff8e2b 100644 --- a/net/switchdev/switchdev.c +++ b/net/switchdev/switchdev.c @@ -336,6 +336,21 @@ int switchdev_port_attr_set(struct net_device *dev, } EXPORT_SYMBOL_GPL(switchdev_port_attr_set); +static size_t switchdev_obj_size(const struct switchdev_obj *obj) +{ + switch (obj->id) { + case SWITCHDEV_OBJ_ID_PORT_VLAN: + return sizeof(struct switchdev_obj_port_vlan); + case SWITCHDEV_OBJ_ID_IPV4_FIB: + return sizeof(struct switchdev_obj_ipv4_fib); + case SWITCHDEV_OBJ_ID_PORT_FDB: + return sizeof(struct switchdev_obj_port_fdb); + default: + BUG(); + } + return 0; +} + static int __switchdev_port_obj_add(struct net_device *dev, const struct switchdev_obj *obj, struct switchdev_trans *trans) @@ -421,7 +436,7 @@ static void switchdev_port_obj_add_deferred(struct net_device *dev, static int switchdev_port_obj_add_defer(struct net_device *dev, const struct switchdev_obj *obj) { - return switchdev_deferred_enqueue(dev, obj, sizeof(*obj), + return switchdev_deferred_enqueue(dev, obj, switchdev_obj_size(obj), switchdev_port_obj_add_deferred); } @@ -489,7 +504,7 @@ static void switchdev_port_obj_del_deferred(struct net_device *dev, static int switchdev_port_obj_del_defer(struct net_device *dev, const struct switchdev_obj *obj) { - return switchdev_deferred_enqueue(dev, obj, sizeof(*obj), + return switchdev_deferred_enqueue(dev, obj, switchdev_obj_size(obj), switchdev_port_obj_del_deferred); } -- 1.7.10.4 -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH net-next v2 1/3] switchdev: fix: erasing too much of vlan obj when handling multiple vlan specs
From: Scott Feldman When adding vlans with multiple IFLA_BRIDGE_VLAN_INFO attrs set in AFSPEC, we would wipe the vlan obj struct after the first IFLA_BRIDGE_VLAN_INFO. Fix this by only clearing what's necessary on each IFLA_BRIDGE_VLAN_INFO iteration. Fixes: 9e8f4a54 ("switchdev: push object ID back to object structure") Signed-off-by: Scott Feldman Acked-by: Jiri Pirko --- v1->v2: add Jiri's Acked-by net/switchdev/switchdev.c |4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/switchdev/switchdev.c b/net/switchdev/switchdev.c index 73e3895..56d8479 100644 --- a/net/switchdev/switchdev.c +++ b/net/switchdev/switchdev.c @@ -863,7 +863,7 @@ static int switchdev_port_br_afspec(struct net_device *dev, err = f(dev, &vlan.obj); if (err) return err; - memset(&vlan, 0, sizeof(vlan)); + vlan.vid_begin = 0; } else { if (vlan.vid_begin) return -EINVAL; @@ -872,7 +872,7 @@ static int switchdev_port_br_afspec(struct net_device *dev, err = f(dev, &vlan.obj); if (err) return err; - memset(&vlan, 0, sizeof(vlan)); + vlan.vid_begin = 0; } } -- 1.7.10.4 -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH net-next v2 3/3] switchdev: split switchdev_attr into individual structs
From: Scott Feldman This was already done for switchdev_objs. Changing switchdev_attrs to new style makes switchdev API consistent for both attrs and objs. No functional changes here. Signed-off-by: Scott Feldman Acked-by: Jiri Pirko --- v1->v2: add Jiri's Acked-by .../ethernet/mellanox/mlxsw/spectrum_switchdev.c | 24 -- drivers/net/ethernet/mellanox/mlxsw/switchx2.c |7 +- drivers/net/ethernet/rocker/rocker.c | 23 -- include/net/switchdev.h| 51 +++-- net/bridge/br_stp.c| 24 +++--- net/core/net-sysfs.c | 14 ++-- net/core/rtnetlink.c | 14 ++-- net/dsa/slave.c| 10 ++- net/switchdev/switchdev.c | 77 +--- 9 files changed, 172 insertions(+), 72 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c index c39b7a1..efa1aa8 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c @@ -56,15 +56,19 @@ static int mlxsw_sp_port_attr_get(struct net_device *dev, { struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(dev); struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; + struct switchdev_attr_port_parent_id *parent_id; + struct switchdev_attr_port_bridge_flags *brport_flags; switch (attr->id) { case SWITCHDEV_ATTR_ID_PORT_PARENT_ID: - attr->u.ppid.id_len = sizeof(mlxsw_sp->base_mac); - memcpy(&attr->u.ppid.id, &mlxsw_sp->base_mac, - attr->u.ppid.id_len); + parent_id = SWITCHDEV_ATTR_PORT_PARENT_ID(attr); + parent_id->ppid.id_len = sizeof(mlxsw_sp->base_mac); + memcpy(&parent_id->ppid.id, &mlxsw_sp->base_mac, + parent_id->ppid.id_len); break; case SWITCHDEV_ATTR_ID_PORT_BRIDGE_FLAGS: - attr->u.brport_flags = + brport_flags = SWITCHDEV_ATTR_PORT_BRIDGE_FLAGS(attr); + brport_flags->brport_flags = (mlxsw_sp_port->learning ? BR_LEARNING : 0) | (mlxsw_sp_port->learning_sync ? BR_LEARNING_SYNC : 0); break; @@ -166,20 +170,26 @@ static int mlxsw_sp_port_attr_set(struct net_device *dev, struct switchdev_trans *trans) { struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(dev); + struct switchdev_attr_port_stp_state *stp_state; + struct switchdev_attr_port_bridge_flags *brport_flags; + struct switchdev_attr_bridge_ageing_time *ageing_time; int err = 0; switch (attr->id) { case SWITCHDEV_ATTR_ID_PORT_STP_STATE: + stp_state = SWITCHDEV_ATTR_PORT_STP_STATE(attr); err = mlxsw_sp_port_attr_stp_state_set(mlxsw_sp_port, trans, - attr->u.stp_state); + stp_state->state); break; case SWITCHDEV_ATTR_ID_PORT_BRIDGE_FLAGS: + brport_flags = SWITCHDEV_ATTR_PORT_BRIDGE_FLAGS(attr); err = mlxsw_sp_port_attr_br_flags_set(mlxsw_sp_port, trans, - attr->u.brport_flags); + brport_flags->brport_flags); break; case SWITCHDEV_ATTR_ID_BRIDGE_AGEING_TIME: + ageing_time = SWITCHDEV_ATTR_BRIDGE_AGEING_TIME(attr); err = mlxsw_sp_port_attr_br_ageing_set(mlxsw_sp_port, trans, - attr->u.ageing_time); + ageing_time->ageing_time); break; default: err = -EOPNOTSUPP; diff --git a/drivers/net/ethernet/mellanox/mlxsw/switchx2.c b/drivers/net/ethernet/mellanox/mlxsw/switchx2.c index 2fd2279..edabc82 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/switchx2.c +++ b/drivers/net/ethernet/mellanox/mlxsw/switchx2.c @@ -864,11 +864,14 @@ static int mlxsw_sx_port_attr_get(struct net_device *dev, { struct mlxsw_sx_port *mlxsw_sx_port = netdev_priv(dev); struct mlxsw_sx *mlxsw_sx = mlxsw_sx_port->mlxsw_sx; + struct switchdev_attr_port_parent_id *parent_id; switch (attr->id) { case SWITCHDEV_ATTR_ID_PORT_PARENT_ID: - attr->u.ppid.id_len = sizeof(mlxsw_sx->hw_id); - memcpy(&attr->u.ppid.id, &mlxsw_sx->hw_id, attr->u.ppid.id_len); + parent_id = SWITCHDEV_ATTR_PORT_PARENT_ID(attr); + parent_id->ppid.id_len = sizeof(mlxsw_sx->hw_id); + memcpy(&parent_id->ppid.id, &mlxsw_sx->hw_id, +
[PATCH net-next 2/3] switchdev: fix: pass correct obj size when deferring obj add
From: Scott Feldman Fixes: 0bc05d585d ("switchdev: allow caller to explicitly request attr_set as deferred") Signed-off-by: Scott Feldman --- net/switchdev/switchdev.c | 19 ++- 1 file changed, 18 insertions(+), 1 deletion(-) diff --git a/net/switchdev/switchdev.c b/net/switchdev/switchdev.c index 56d8479..be8ced1 100644 --- a/net/switchdev/switchdev.c +++ b/net/switchdev/switchdev.c @@ -489,7 +489,24 @@ static void switchdev_port_obj_del_deferred(struct net_device *dev, static int switchdev_port_obj_del_defer(struct net_device *dev, const struct switchdev_obj *obj) { - return switchdev_deferred_enqueue(dev, obj, sizeof(*obj), + size_t size = 0; + + switch (obj->id) { + case SWITCHDEV_OBJ_ID_PORT_VLAN: + size = sizeof(struct switchdev_obj_port_vlan); + break; + case SWITCHDEV_OBJ_ID_IPV4_FIB: + size = sizeof(struct switchdev_obj_ipv4_fib); + break; + case SWITCHDEV_OBJ_ID_PORT_FDB: + size = sizeof(struct switchdev_obj_port_fdb); + break; + default: + WARN_ON(!size); + return -EINVAL; + } + + return switchdev_deferred_enqueue(dev, obj, size, switchdev_port_obj_del_deferred); } -- 1.7.10.4 -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH net-next 3/3] switchdev: split switchdev_attr into individual structs
From: Scott Feldman This was already done for switchdev_objs. Changing switchdev_attrs to new style makes switchdev API consistent for both attrs and objs. No functional changes here. Signed-off-by: Scott Feldman --- .../ethernet/mellanox/mlxsw/spectrum_switchdev.c | 24 -- drivers/net/ethernet/mellanox/mlxsw/switchx2.c |7 +- drivers/net/ethernet/rocker/rocker.c | 23 -- include/net/switchdev.h| 51 +++-- net/bridge/br_stp.c| 24 +++--- net/core/net-sysfs.c | 14 ++-- net/core/rtnetlink.c | 14 ++-- net/dsa/slave.c| 10 ++- net/switchdev/switchdev.c | 77 +--- 9 files changed, 172 insertions(+), 72 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c index c39b7a1..efa1aa8 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c @@ -56,15 +56,19 @@ static int mlxsw_sp_port_attr_get(struct net_device *dev, { struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(dev); struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; + struct switchdev_attr_port_parent_id *parent_id; + struct switchdev_attr_port_bridge_flags *brport_flags; switch (attr->id) { case SWITCHDEV_ATTR_ID_PORT_PARENT_ID: - attr->u.ppid.id_len = sizeof(mlxsw_sp->base_mac); - memcpy(&attr->u.ppid.id, &mlxsw_sp->base_mac, - attr->u.ppid.id_len); + parent_id = SWITCHDEV_ATTR_PORT_PARENT_ID(attr); + parent_id->ppid.id_len = sizeof(mlxsw_sp->base_mac); + memcpy(&parent_id->ppid.id, &mlxsw_sp->base_mac, + parent_id->ppid.id_len); break; case SWITCHDEV_ATTR_ID_PORT_BRIDGE_FLAGS: - attr->u.brport_flags = + brport_flags = SWITCHDEV_ATTR_PORT_BRIDGE_FLAGS(attr); + brport_flags->brport_flags = (mlxsw_sp_port->learning ? BR_LEARNING : 0) | (mlxsw_sp_port->learning_sync ? BR_LEARNING_SYNC : 0); break; @@ -166,20 +170,26 @@ static int mlxsw_sp_port_attr_set(struct net_device *dev, struct switchdev_trans *trans) { struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(dev); + struct switchdev_attr_port_stp_state *stp_state; + struct switchdev_attr_port_bridge_flags *brport_flags; + struct switchdev_attr_bridge_ageing_time *ageing_time; int err = 0; switch (attr->id) { case SWITCHDEV_ATTR_ID_PORT_STP_STATE: + stp_state = SWITCHDEV_ATTR_PORT_STP_STATE(attr); err = mlxsw_sp_port_attr_stp_state_set(mlxsw_sp_port, trans, - attr->u.stp_state); + stp_state->state); break; case SWITCHDEV_ATTR_ID_PORT_BRIDGE_FLAGS: + brport_flags = SWITCHDEV_ATTR_PORT_BRIDGE_FLAGS(attr); err = mlxsw_sp_port_attr_br_flags_set(mlxsw_sp_port, trans, - attr->u.brport_flags); + brport_flags->brport_flags); break; case SWITCHDEV_ATTR_ID_BRIDGE_AGEING_TIME: + ageing_time = SWITCHDEV_ATTR_BRIDGE_AGEING_TIME(attr); err = mlxsw_sp_port_attr_br_ageing_set(mlxsw_sp_port, trans, - attr->u.ageing_time); + ageing_time->ageing_time); break; default: err = -EOPNOTSUPP; diff --git a/drivers/net/ethernet/mellanox/mlxsw/switchx2.c b/drivers/net/ethernet/mellanox/mlxsw/switchx2.c index 2fd2279..edabc82 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/switchx2.c +++ b/drivers/net/ethernet/mellanox/mlxsw/switchx2.c @@ -864,11 +864,14 @@ static int mlxsw_sx_port_attr_get(struct net_device *dev, { struct mlxsw_sx_port *mlxsw_sx_port = netdev_priv(dev); struct mlxsw_sx *mlxsw_sx = mlxsw_sx_port->mlxsw_sx; + struct switchdev_attr_port_parent_id *parent_id; switch (attr->id) { case SWITCHDEV_ATTR_ID_PORT_PARENT_ID: - attr->u.ppid.id_len = sizeof(mlxsw_sx->hw_id); - memcpy(&attr->u.ppid.id, &mlxsw_sx->hw_id, attr->u.ppid.id_len); + parent_id = SWITCHDEV_ATTR_PORT_PARENT_ID(attr); + parent_id->ppid.id_len = sizeof(mlxsw_sx->hw_id); + memcpy(&parent_id->ppid.id, &mlxsw_sx->hw_id, + parent_id->ppid.id_len);
[PATCH net-next 1/3] switchdev: fix: erasing too much of vlan obj when handling multiple vlan specs
From: Scott Feldman When adding vlans with multiple IFLA_BRIDGE_VLAN_INFO attrs set in AFSPEC, we would wipe the vlan obj struct after the first IFLA_BRIDGE_VLAN_INFO. Fix this by only clearing what's necessary on each IFLA_BRIDGE_VLAN_INFO iteration. Fixes: 9e8f4a54 ("switchdev: push object ID back to object structure") Signed-off-by: Scott Feldman --- net/switchdev/switchdev.c |4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/switchdev/switchdev.c b/net/switchdev/switchdev.c index 73e3895..56d8479 100644 --- a/net/switchdev/switchdev.c +++ b/net/switchdev/switchdev.c @@ -863,7 +863,7 @@ static int switchdev_port_br_afspec(struct net_device *dev, err = f(dev, &vlan.obj); if (err) return err; - memset(&vlan, 0, sizeof(vlan)); + vlan.vid_begin = 0; } else { if (vlan.vid_begin) return -EINVAL; @@ -872,7 +872,7 @@ static int switchdev_port_br_afspec(struct net_device *dev, err = f(dev, &vlan.obj); if (err) return err; - memset(&vlan, 0, sizeof(vlan)); + vlan.vid_begin = 0; } } -- 1.7.10.4 -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH net-next v3 0/4] switchdev: push bridge ageing_time attribute down
From: Scott Feldman Push bridge-level attributes down to switchdev drivers. This patchset adds the infrastructure and then pushes, as an example, ageing_time attribute down from bridge to switchdev (rocker) driver. Add some range-checking for ageing_time. # ip link set dev br0 type bridge ageing_time 1000 # ip link set dev br0 type bridge ageing_time 999 RTNETLINK answers: Numerical result out of range Up until now, switchdev attrs where port-level attrs, so the netdev used in switchdev_attr_set() would be a switch port or bond of switch ports. With bridge-level attrs, the netdev passed to switchdev_attr_set() is the bridge netdev. The same recusive algo is used to visit the leaves of the stacked drivers to set the attr, it's just in this case we start one layer higher in the stack. One note is not all ports in the bridge may support setting a bridge-level attribute, so rather than failing the entire set, we'll skip over those ports returning -EOPNOTSUPP. v2->v3: Per Jiri review: push only ageing_time attr down at this time, and don't pass raw bridge IFLA_BR_* values; rather use new switchdev attr ID for ageing_time. v1->v2: rebase w/ net-next Scott Feldman (4): switchdev: add bridge ageing_time attribute switchdev: skip over ports returning -EOPNOTSUPP when recursing ports bridge: push bridge setting ageing_time down to switchdev rocker: handle setting bridge ageing_time drivers/net/ethernet/rocker/rocker.c | 16 include/net/switchdev.h |3 +++ net/bridge/br_ioctl.c|3 +-- net/bridge/br_netlink.c |6 +++--- net/bridge/br_private.h |1 + net/bridge/br_stp.c | 23 +++ net/bridge/br_sysfs_br.c |3 +-- net/switchdev/switchdev.c|9 - 8 files changed, 56 insertions(+), 8 deletions(-) -- 1.7.10.4 -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH net-next v3 3/4] bridge: push bridge setting ageing_time down to switchdev
From: Scott Feldman Use SWITCHDEV_F_SKIP_EOPNOTSUPP to skip over ports in bridge that don't support setting ageing_time (or setting bridge attrs in general). If push fails, don't update ageing_time in bridge and return err to user. If push succeeds, update ageing_time in bridge and run gc_timer now to recalabrate when to run gc_timer next, based on new ageing_time. Signed-off-by: Scott Feldman Signed-off-by: Jiri Pirko --- net/bridge/br_ioctl.c|3 +-- net/bridge/br_netlink.c |6 +++--- net/bridge/br_private.h |1 + net/bridge/br_stp.c | 23 +++ net/bridge/br_sysfs_br.c |3 +-- 5 files changed, 29 insertions(+), 7 deletions(-) diff --git a/net/bridge/br_ioctl.c b/net/bridge/br_ioctl.c index 8d423bc..263b4de 100644 --- a/net/bridge/br_ioctl.c +++ b/net/bridge/br_ioctl.c @@ -200,8 +200,7 @@ static int old_dev_ioctl(struct net_device *dev, struct ifreq *rq, int cmd) if (!ns_capable(dev_net(dev)->user_ns, CAP_NET_ADMIN)) return -EPERM; - br->ageing_time = clock_t_to_jiffies(args[1]); - return 0; + return br_set_ageing_time(br, args[1]); case BRCTL_GET_PORT_INFO: { diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c index d78b442..544ab96 100644 --- a/net/bridge/br_netlink.c +++ b/net/bridge/br_netlink.c @@ -870,9 +870,9 @@ static int br_changelink(struct net_device *brdev, struct nlattr *tb[], } if (data[IFLA_BR_AGEING_TIME]) { - u32 ageing_time = nla_get_u32(data[IFLA_BR_AGEING_TIME]); - - br->ageing_time = clock_t_to_jiffies(ageing_time); + err = br_set_ageing_time(br, nla_get_u32(data[IFLA_BR_AGEING_TIME])); + if (err) + return err; } if (data[IFLA_BR_STP_STATE]) { diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h index 09d3ecb..ba0c67b 100644 --- a/net/bridge/br_private.h +++ b/net/bridge/br_private.h @@ -882,6 +882,7 @@ void __br_set_forward_delay(struct net_bridge *br, unsigned long t); int br_set_forward_delay(struct net_bridge *br, unsigned long x); int br_set_hello_time(struct net_bridge *br, unsigned long x); int br_set_max_age(struct net_bridge *br, unsigned long x); +int br_set_ageing_time(struct net_bridge *br, u32 ageing_time); /* br_stp_if.c */ diff --git a/net/bridge/br_stp.c b/net/bridge/br_stp.c index 3a982c0..db6d243de 100644 --- a/net/bridge/br_stp.c +++ b/net/bridge/br_stp.c @@ -566,6 +566,29 @@ int br_set_max_age(struct net_bridge *br, unsigned long val) } +int br_set_ageing_time(struct net_bridge *br, u32 ageing_time) +{ + struct switchdev_attr attr = { + .id = SWITCHDEV_ATTR_ID_BRIDGE_AGEING_TIME, + .flags = SWITCHDEV_F_SKIP_EOPNOTSUPP, + .u.ageing_time = ageing_time, + }; + unsigned long t = clock_t_to_jiffies(ageing_time); + int err; + + if (t < BR_MIN_AGEING_TIME || t > BR_MAX_AGEING_TIME) + return -ERANGE; + + err = switchdev_port_attr_set(br->dev, &attr); + if (err) + return err; + + br->ageing_time = t; + mod_timer(&br->gc_timer, jiffies); + + return 0; +} + void __br_set_forward_delay(struct net_bridge *br, unsigned long t) { br->bridge_forward_delay = t; diff --git a/net/bridge/br_sysfs_br.c b/net/bridge/br_sysfs_br.c index 4c97fc5..04ef192 100644 --- a/net/bridge/br_sysfs_br.c +++ b/net/bridge/br_sysfs_br.c @@ -102,8 +102,7 @@ static ssize_t ageing_time_show(struct device *d, static int set_ageing_time(struct net_bridge *br, unsigned long val) { - br->ageing_time = clock_t_to_jiffies(val); - return 0; + return br_set_ageing_time(br, val); } static ssize_t ageing_time_store(struct device *d, -- 1.7.10.4 -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH net-next v3 4/4] rocker: handle setting bridge ageing_time
From: Scott Feldman The FDB cleanup timer will get rescheduled to re-evaluate FDB entries based on new ageing_time. Signed-off-by: Scott Feldman --- drivers/net/ethernet/rocker/rocker.c | 16 1 file changed, 16 insertions(+) diff --git a/drivers/net/ethernet/rocker/rocker.c b/drivers/net/ethernet/rocker/rocker.c index cf91ffc..eafa907 100644 --- a/drivers/net/ethernet/rocker/rocker.c +++ b/drivers/net/ethernet/rocker/rocker.c @@ -4361,6 +4361,18 @@ static int rocker_port_brport_flags_set(struct rocker_port *rocker_port, return err; } +static int rocker_port_bridge_ageing_time(struct rocker_port *rocker_port, + struct switchdev_trans *trans, + u32 ageing_time) +{ + if (!switchdev_trans_ph_prepare(trans)) { + rocker_port->ageing_time = clock_t_to_jiffies(ageing_time); + mod_timer(&rocker_port->rocker->fdb_cleanup_timer, jiffies); + } + + return 0; +} + static int rocker_port_attr_set(struct net_device *dev, struct switchdev_attr *attr, struct switchdev_trans *trans) @@ -4378,6 +4390,10 @@ static int rocker_port_attr_set(struct net_device *dev, err = rocker_port_brport_flags_set(rocker_port, trans, attr->u.brport_flags); break; + case SWITCHDEV_ATTR_ID_BRIDGE_AGEING_TIME: + err = rocker_port_bridge_ageing_time(rocker_port, trans, +attr->u.ageing_time); + break; default: err = -EOPNOTSUPP; break; -- 1.7.10.4 -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH net-next v3 2/4] switchdev: skip over ports returning -EOPNOTSUPP when recursing ports
From: Scott Feldman This allows us to recurse over all the ports, skipping over unsupporting ports. Without the change, the recursion would stop at first unsupported port. Signed-off-by: Scott Feldman --- include/net/switchdev.h |1 + net/switchdev/switchdev.c |9 - 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/include/net/switchdev.h b/include/net/switchdev.h index 61f129b..1ce7083 100644 --- a/include/net/switchdev.h +++ b/include/net/switchdev.h @@ -16,6 +16,7 @@ #include #define SWITCHDEV_F_NO_RECURSE BIT(0) +#define SWITCHDEV_F_SKIP_EOPNOTSUPPBIT(1) struct switchdev_trans_item { struct list_head list; diff --git a/net/switchdev/switchdev.c b/net/switchdev/switchdev.c index 6e4a4f9..7a9ab90 100644 --- a/net/switchdev/switchdev.c +++ b/net/switchdev/switchdev.c @@ -147,7 +147,7 @@ static int __switchdev_port_attr_set(struct net_device *dev, return ops->switchdev_port_attr_set(dev, attr, trans); if (attr->flags & SWITCHDEV_F_NO_RECURSE) - return err; + goto done; /* Switch device port(s) may be stacked under * bond/team/vlan dev, so recurse down to set attr on @@ -156,10 +156,17 @@ static int __switchdev_port_attr_set(struct net_device *dev, netdev_for_each_lower_dev(dev, lower_dev, iter) { err = __switchdev_port_attr_set(lower_dev, attr, trans); + if (err == -EOPNOTSUPP && + attr->flags & SWITCHDEV_F_SKIP_EOPNOTSUPP) + continue; if (err) break; } +done: + if (err == -EOPNOTSUPP && attr->flags & SWITCHDEV_F_SKIP_EOPNOTSUPP) + err = 0; + return err; } -- 1.7.10.4 -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH net-next v3 1/4] switchdev: add bridge ageing_time attribute
From: Scott Feldman Setting the stage to push bridge-level attributes down to port driver so hardware can be programmed accordingly. Bridge-level attribute example is ageing_time. This is a per-bridge attribute, not a per-bridge-port attr. Signed-off-by: Scott Feldman --- include/net/switchdev.h |2 ++ 1 file changed, 2 insertions(+) diff --git a/include/net/switchdev.h b/include/net/switchdev.h index 89266a3..61f129b 100644 --- a/include/net/switchdev.h +++ b/include/net/switchdev.h @@ -43,6 +43,7 @@ enum switchdev_attr_id { SWITCHDEV_ATTR_ID_PORT_PARENT_ID, SWITCHDEV_ATTR_ID_PORT_STP_STATE, SWITCHDEV_ATTR_ID_PORT_BRIDGE_FLAGS, + SWITCHDEV_ATTR_ID_BRIDGE_AGEING_TIME, }; struct switchdev_attr { @@ -52,6 +53,7 @@ struct switchdev_attr { struct netdev_phys_item_id ppid;/* PORT_PARENT_ID */ u8 stp_state; /* PORT_STP_STATE */ unsigned long brport_flags; /* PORT_BRIDGE_FLAGS */ + u32 ageing_time;/* BRIDGE_AGEING_TIME */ } u; }; -- 1.7.10.4 -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH net-next v2 4/4] rocker: handle setting bridge ageing_time
From: Scott Feldman The FDB cleanup timer will get rescheduled to re-evaluate FDB entries based on new ageing_time. Signed-off-by: Scott Feldman --- drivers/net/ethernet/rocker/rocker.c | 22 ++ 1 file changed, 22 insertions(+) diff --git a/drivers/net/ethernet/rocker/rocker.c b/drivers/net/ethernet/rocker/rocker.c index cf91ffc..3c7f9ae 100644 --- a/drivers/net/ethernet/rocker/rocker.c +++ b/drivers/net/ethernet/rocker/rocker.c @@ -4361,6 +4361,24 @@ static int rocker_port_brport_flags_set(struct rocker_port *rocker_port, return err; } +static int rocker_port_bridge_set(struct rocker_port *rocker_port, + struct switchdev_trans *trans, + struct switchdev_attr_bridge *bridge) +{ + switch (bridge->attr) { + case IFLA_BR_AGEING_TIME: + if (switchdev_trans_ph_prepare(trans)) + return 0; + rocker_port->ageing_time = clock_t_to_jiffies(bridge->val); + mod_timer(&rocker_port->rocker->fdb_cleanup_timer, jiffies); + break; + default: + return -EOPNOTSUPP; + } + + return 0; +} + static int rocker_port_attr_set(struct net_device *dev, struct switchdev_attr *attr, struct switchdev_trans *trans) @@ -4378,6 +4396,10 @@ static int rocker_port_attr_set(struct net_device *dev, err = rocker_port_brport_flags_set(rocker_port, trans, attr->u.brport_flags); break; + case SWITCHDEV_ATTR_ID_BRIDGE: + err = rocker_port_bridge_set(rocker_port, trans, +&attr->u.bridge); + break; default: err = -EOPNOTSUPP; break; -- 1.7.10.4 -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH net-next v2 3/4] bridge: push bridge setting ageing_time down to switchdev
From: Scott Feldman Use SWITCHDEV_F_SKIP_EOPNOTSUPP to skip over ports in bridge that don't support setting ageing_time (or setting bridge attrs in general). If push fails, don't update ageing_time in bridge and return err to user. If push succeeds, update ageing_time in bridge and run gc_timer now to recalabrate when to run gc_timer next, based on new ageing_time. Signed-off-by: Scott Feldman Signed-off-by: Jiri Pirko --- net/bridge/br_ioctl.c|3 +-- net/bridge/br_netlink.c |6 +++--- net/bridge/br_private.h |1 + net/bridge/br_stp.c | 24 net/bridge/br_sysfs_br.c |3 +-- 5 files changed, 30 insertions(+), 7 deletions(-) diff --git a/net/bridge/br_ioctl.c b/net/bridge/br_ioctl.c index 8d423bc..263b4de 100644 --- a/net/bridge/br_ioctl.c +++ b/net/bridge/br_ioctl.c @@ -200,8 +200,7 @@ static int old_dev_ioctl(struct net_device *dev, struct ifreq *rq, int cmd) if (!ns_capable(dev_net(dev)->user_ns, CAP_NET_ADMIN)) return -EPERM; - br->ageing_time = clock_t_to_jiffies(args[1]); - return 0; + return br_set_ageing_time(br, args[1]); case BRCTL_GET_PORT_INFO: { diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c index d78b442..544ab96 100644 --- a/net/bridge/br_netlink.c +++ b/net/bridge/br_netlink.c @@ -870,9 +870,9 @@ static int br_changelink(struct net_device *brdev, struct nlattr *tb[], } if (data[IFLA_BR_AGEING_TIME]) { - u32 ageing_time = nla_get_u32(data[IFLA_BR_AGEING_TIME]); - - br->ageing_time = clock_t_to_jiffies(ageing_time); + err = br_set_ageing_time(br, nla_get_u32(data[IFLA_BR_AGEING_TIME])); + if (err) + return err; } if (data[IFLA_BR_STP_STATE]) { diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h index 09d3ecb..ba0c67b 100644 --- a/net/bridge/br_private.h +++ b/net/bridge/br_private.h @@ -882,6 +882,7 @@ void __br_set_forward_delay(struct net_bridge *br, unsigned long t); int br_set_forward_delay(struct net_bridge *br, unsigned long x); int br_set_hello_time(struct net_bridge *br, unsigned long x); int br_set_max_age(struct net_bridge *br, unsigned long x); +int br_set_ageing_time(struct net_bridge *br, u32 ageing_time); /* br_stp_if.c */ diff --git a/net/bridge/br_stp.c b/net/bridge/br_stp.c index 3a982c0..ae3286b 100644 --- a/net/bridge/br_stp.c +++ b/net/bridge/br_stp.c @@ -566,6 +566,30 @@ int br_set_max_age(struct net_bridge *br, unsigned long val) } +int br_set_ageing_time(struct net_bridge *br, u32 ageing_time) +{ + struct switchdev_attr attr = { + .id = SWITCHDEV_ATTR_ID_BRIDGE, + .flags = SWITCHDEV_F_SKIP_EOPNOTSUPP, + .u.bridge.attr = IFLA_BR_AGEING_TIME, + .u.bridge.val = ageing_time, + }; + unsigned long t = clock_t_to_jiffies(ageing_time); + int err; + + if (t < BR_MIN_AGEING_TIME || t > BR_MAX_AGEING_TIME) + return -ERANGE; + + err = switchdev_port_attr_set(br->dev, &attr); + if (err) + return err; + + br->ageing_time = t; + mod_timer(&br->gc_timer, jiffies); + + return 0; +} + void __br_set_forward_delay(struct net_bridge *br, unsigned long t) { br->bridge_forward_delay = t; diff --git a/net/bridge/br_sysfs_br.c b/net/bridge/br_sysfs_br.c index 4c97fc5..04ef192 100644 --- a/net/bridge/br_sysfs_br.c +++ b/net/bridge/br_sysfs_br.c @@ -102,8 +102,7 @@ static ssize_t ageing_time_show(struct device *d, static int set_ageing_time(struct net_bridge *br, unsigned long val) { - br->ageing_time = clock_t_to_jiffies(val); - return 0; + return br_set_ageing_time(br, val); } static ssize_t ageing_time_store(struct device *d, -- 1.7.10.4 -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH net-next v2 1/4] switchdev: add bridge attributes
From: Scott Feldman Setting the stage to push bridge-level attributes down to port driver so hardware can be programmed accordingly. Bridge-level attribute example is ageing_time. This is a per-bridge attribute, not a per-bridge-port attr. Signed-off-by: Scott Feldman --- include/net/switchdev.h |5 + include/uapi/linux/if_link.h |2 +- 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/include/net/switchdev.h b/include/net/switchdev.h index 89266a3..8d92cd0 100644 --- a/include/net/switchdev.h +++ b/include/net/switchdev.h @@ -43,6 +43,7 @@ enum switchdev_attr_id { SWITCHDEV_ATTR_ID_PORT_PARENT_ID, SWITCHDEV_ATTR_ID_PORT_STP_STATE, SWITCHDEV_ATTR_ID_PORT_BRIDGE_FLAGS, + SWITCHDEV_ATTR_ID_BRIDGE, }; struct switchdev_attr { @@ -52,6 +53,10 @@ struct switchdev_attr { struct netdev_phys_item_id ppid;/* PORT_PARENT_ID */ u8 stp_state; /* PORT_STP_STATE */ unsigned long brport_flags; /* PORT_BRIDGE_FLAGS */ + struct switchdev_attr_bridge { /* BRIDGE */ + enum ifla_br attr; + u32 val; + } bridge; } u; }; diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h index e3b6217..30177b3 100644 --- a/include/uapi/linux/if_link.h +++ b/include/uapi/linux/if_link.h @@ -222,7 +222,7 @@ enum in6_addr_gen_mode { /* Bridge section */ -enum { +enum ifla_br { IFLA_BR_UNSPEC, IFLA_BR_FORWARD_DELAY, IFLA_BR_HELLO_TIME, -- 1.7.10.4 -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH net-next v2 0/4] switchdev: push bridge attributes down
From: Scott Feldman Push bridge-level attributes down to switchdev drivers. This patchset adds the infrastructure and then pushes, as an example, ageing_time attribute down from bridge to switchdev (rocker) driver. Add some range-checking for ageing_time. # ip link set dev br0 type bridge ageing_time 1000 # ip link set dev br0 type bridge ageing_time 999 RTNETLINK answers: Numerical result out of range Up until now, switchdev attrs where port-level attrs, so the netdev used in switchdev_attr_set() would be a switch port or bond of switch ports. With bridge-level attrs, the netdev passed to switchdev_attr_set() is the bridge netdev. The same recusive algo is used to visit the leaves of the stacked drivers to set the attr, it's just in this case we start one layer higher in the stack. One note is not all ports in the bridge may support setting a bridge-level attribute, so rather than failing the entire set, we'll skip over those ports returning -EOPNOTSUPP. v1->v2: rebase w/ net-next Scott Feldman (4): switchdev: add bridge attributes switchdev: skip over ports returning -EOPNOTSUPP when recursing ports bridge: push bridge setting ageing_time down to switchdev rocker: handle setting bridge ageing_time drivers/net/ethernet/rocker/rocker.c | 22 ++ include/net/switchdev.h |6 ++ include/uapi/linux/if_link.h |2 +- net/bridge/br_ioctl.c|3 +-- net/bridge/br_netlink.c |6 +++--- net/bridge/br_private.h |1 + net/bridge/br_stp.c | 24 net/bridge/br_sysfs_br.c |3 +-- net/switchdev/switchdev.c|9 - 9 files changed, 67 insertions(+), 9 deletions(-) -- 1.7.10.4 -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH net-next v2 2/4] switchdev: skip over ports returning -EOPNOTSUPP when recursing ports
From: Scott Feldman This allows us to recurse over all the ports, skipping over unsupporting ports. Without the change, the recursion would stop at first unsupported port. Signed-off-by: Scott Feldman --- include/net/switchdev.h |1 + net/switchdev/switchdev.c |9 - 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/include/net/switchdev.h b/include/net/switchdev.h index 8d92cd0..f3de6f4 100644 --- a/include/net/switchdev.h +++ b/include/net/switchdev.h @@ -16,6 +16,7 @@ #include #define SWITCHDEV_F_NO_RECURSE BIT(0) +#define SWITCHDEV_F_SKIP_EOPNOTSUPPBIT(1) struct switchdev_trans_item { struct list_head list; diff --git a/net/switchdev/switchdev.c b/net/switchdev/switchdev.c index 6e4a4f9..7a9ab90 100644 --- a/net/switchdev/switchdev.c +++ b/net/switchdev/switchdev.c @@ -147,7 +147,7 @@ static int __switchdev_port_attr_set(struct net_device *dev, return ops->switchdev_port_attr_set(dev, attr, trans); if (attr->flags & SWITCHDEV_F_NO_RECURSE) - return err; + goto done; /* Switch device port(s) may be stacked under * bond/team/vlan dev, so recurse down to set attr on @@ -156,10 +156,17 @@ static int __switchdev_port_attr_set(struct net_device *dev, netdev_for_each_lower_dev(dev, lower_dev, iter) { err = __switchdev_port_attr_set(lower_dev, attr, trans); + if (err == -EOPNOTSUPP && + attr->flags & SWITCHDEV_F_SKIP_EOPNOTSUPP) + continue; if (err) break; } +done: + if (err == -EOPNOTSUPP && attr->flags & SWITCHDEV_F_SKIP_EOPNOTSUPP) + err = 0; + return err; } -- 1.7.10.4 -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH net-next 4/4] rocker: handle setting bridge ageing_time
From: Scott Feldman The FDB cleanup timer will get rescheduled to re-evaluate FDB entries based on new ageing_time. Signed-off-by: Scott Feldman --- drivers/net/ethernet/rocker/rocker.c | 22 ++ 1 file changed, 22 insertions(+) diff --git a/drivers/net/ethernet/rocker/rocker.c b/drivers/net/ethernet/rocker/rocker.c index 32c5429..99ec715 100644 --- a/drivers/net/ethernet/rocker/rocker.c +++ b/drivers/net/ethernet/rocker/rocker.c @@ -4382,6 +4382,24 @@ static int rocker_port_brport_flags_set(struct rocker_port *rocker_port, return err; } +static int rocker_port_bridge_set(struct rocker_port *rocker_port, + enum switchdev_trans trans, + struct switchdev_attr_bridge *bridge) +{ + switch (bridge->attr) { + case IFLA_BR_AGEING_TIME: + if (trans == SWITCHDEV_TRANS_PREPARE) + return 0; + rocker_port->ageing_time = clock_t_to_jiffies(bridge->val); + mod_timer(&rocker_port->rocker->fdb_cleanup_timer, jiffies); + break; + default: + return -EOPNOTSUPP; + } + + return 0; +} + static int rocker_port_attr_set(struct net_device *dev, struct switchdev_attr *attr) { @@ -4409,6 +4427,10 @@ static int rocker_port_attr_set(struct net_device *dev, err = rocker_port_brport_flags_set(rocker_port, attr->trans, attr->u.brport_flags); break; + case SWITCHDEV_ATTR_BRIDGE: + err = rocker_port_bridge_set(rocker_port, attr->trans, +&attr->u.bridge); + break; default: err = -EOPNOTSUPP; break; -- 1.7.10.4 -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH net-next 2/4] switchdev: skip over ports returning -EOPNOTSUPP when recursing ports
From: Scott Feldman This allows us to recurse over all the ports, skipping over unsupporting ports. Without the change, the recursion would stop at first unsupported port. Signed-off-by: Scott Feldman --- include/net/switchdev.h |1 + net/switchdev/switchdev.c |9 - 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/include/net/switchdev.h b/include/net/switchdev.h index 54b2faa..22a6dbe 100644 --- a/include/net/switchdev.h +++ b/include/net/switchdev.h @@ -15,6 +15,7 @@ #include #define SWITCHDEV_F_NO_RECURSE BIT(0) +#define SWITCHDEV_F_SKIP_EOPNOTSUPPBIT(1) enum switchdev_trans { SWITCHDEV_TRANS_NONE, diff --git a/net/switchdev/switchdev.c b/net/switchdev/switchdev.c index fda38f8..5c30da0 100644 --- a/net/switchdev/switchdev.c +++ b/net/switchdev/switchdev.c @@ -73,7 +73,7 @@ static int __switchdev_port_attr_set(struct net_device *dev, return ops->switchdev_port_attr_set(dev, attr); if (attr->flags & SWITCHDEV_F_NO_RECURSE) - return err; + goto done; /* Switch device port(s) may be stacked under * bond/team/vlan dev, so recurse down to set attr on @@ -82,10 +82,17 @@ static int __switchdev_port_attr_set(struct net_device *dev, netdev_for_each_lower_dev(dev, lower_dev, iter) { err = __switchdev_port_attr_set(lower_dev, attr); + if (err == -EOPNOTSUPP && + attr->flags & SWITCHDEV_F_SKIP_EOPNOTSUPP) + continue; if (err) break; } +done: + if (err == -EOPNOTSUPP && attr->flags & SWITCHDEV_F_SKIP_EOPNOTSUPP) + err = 0; + return err; } -- 1.7.10.4 -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH net-next 1/4] switchdev: add bridge attributes
From: Scott Feldman Setting the stage to push bridge-level attributes down to port driver so hardware can be programmed accordingly. Bridge-level attribute example is ageing_time. This is a per-bridge attribute, not a per-bridge-port attr. Signed-off-by: Scott Feldman --- include/net/switchdev.h |5 + include/uapi/linux/if_link.h |2 +- 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/include/net/switchdev.h b/include/net/switchdev.h index 319baab..54b2faa 100644 --- a/include/net/switchdev.h +++ b/include/net/switchdev.h @@ -28,6 +28,7 @@ enum switchdev_attr_id { SWITCHDEV_ATTR_PORT_PARENT_ID, SWITCHDEV_ATTR_PORT_STP_STATE, SWITCHDEV_ATTR_PORT_BRIDGE_FLAGS, + SWITCHDEV_ATTR_BRIDGE, }; struct switchdev_attr { @@ -38,6 +39,10 @@ struct switchdev_attr { struct netdev_phys_item_id ppid;/* PORT_PARENT_ID */ u8 stp_state; /* PORT_STP_STATE */ unsigned long brport_flags; /* PORT_BRIDGE_FLAGS */ + struct switchdev_attr_bridge { /* BRIDGE */ + enum ifla_br attr; + u32 val; + } bridge; } u; }; diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h index 3a5f263..8d0ef1c 100644 --- a/include/uapi/linux/if_link.h +++ b/include/uapi/linux/if_link.h @@ -222,7 +222,7 @@ enum in6_addr_gen_mode { /* Bridge section */ -enum { +enum ifla_br { IFLA_BR_UNSPEC, IFLA_BR_FORWARD_DELAY, IFLA_BR_HELLO_TIME, -- 1.7.10.4 -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH net-next 3/4] bridge: push bridge setting ageing_time down to switchdev
From: Scott Feldman Use SWITCHDEV_F_SKIP_EOPNOTSUPP to skip over ports in bridge that don't support setting ageing_time (or setting bridge attrs in general). If push fails, don't update ageing_time in bridge and return err to user. If push succeeds, update ageing_time in bridge and run gc_timer now to recalabrate when to run gc_timer next, based on new ageing_time. Signed-off-by: Scott Feldman Signed-off-by: Jiri Pirko --- net/bridge/br_ioctl.c|3 +-- net/bridge/br_netlink.c |6 +++--- net/bridge/br_private.h |1 + net/bridge/br_stp.c | 24 net/bridge/br_sysfs_br.c |3 +-- 5 files changed, 30 insertions(+), 7 deletions(-) diff --git a/net/bridge/br_ioctl.c b/net/bridge/br_ioctl.c index 8d423bc..263b4de 100644 --- a/net/bridge/br_ioctl.c +++ b/net/bridge/br_ioctl.c @@ -200,8 +200,7 @@ static int old_dev_ioctl(struct net_device *dev, struct ifreq *rq, int cmd) if (!ns_capable(dev_net(dev)->user_ns, CAP_NET_ADMIN)) return -EPERM; - br->ageing_time = clock_t_to_jiffies(args[1]); - return 0; + return br_set_ageing_time(br, args[1]); case BRCTL_GET_PORT_INFO: { diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c index ea748c9..6dbdd4d 100644 --- a/net/bridge/br_netlink.c +++ b/net/bridge/br_netlink.c @@ -774,9 +774,9 @@ static int br_changelink(struct net_device *brdev, struct nlattr *tb[], } if (data[IFLA_BR_AGEING_TIME]) { - u32 ageing_time = nla_get_u32(data[IFLA_BR_AGEING_TIME]); - - br->ageing_time = clock_t_to_jiffies(ageing_time); + err = br_set_ageing_time(br, nla_get_u32(data[IFLA_BR_AGEING_TIME])); + if (err) + return err; } if (data[IFLA_BR_STP_STATE]) { diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h index 74e99c7..dc4b390 100644 --- a/net/bridge/br_private.h +++ b/net/bridge/br_private.h @@ -808,6 +808,7 @@ void __br_set_forward_delay(struct net_bridge *br, unsigned long t); int br_set_forward_delay(struct net_bridge *br, unsigned long x); int br_set_hello_time(struct net_bridge *br, unsigned long x); int br_set_max_age(struct net_bridge *br, unsigned long x); +int br_set_ageing_time(struct net_bridge *br, u32 ageing_time); /* br_stp_if.c */ diff --git a/net/bridge/br_stp.c b/net/bridge/br_stp.c index ed74ffa..6241bab 100644 --- a/net/bridge/br_stp.c +++ b/net/bridge/br_stp.c @@ -566,6 +566,30 @@ int br_set_max_age(struct net_bridge *br, unsigned long val) } +int br_set_ageing_time(struct net_bridge *br, u32 ageing_time) +{ + struct switchdev_attr attr = { + .id = SWITCHDEV_ATTR_BRIDGE, + .flags = SWITCHDEV_F_SKIP_EOPNOTSUPP, + .u.bridge.attr = IFLA_BR_AGEING_TIME, + .u.bridge.val = ageing_time, + }; + unsigned long t = clock_t_to_jiffies(ageing_time); + int err; + + if (t < BR_MIN_AGEING_TIME || t > BR_MAX_AGEING_TIME) + return -ERANGE; + + err = switchdev_port_attr_set(br->dev, &attr); + if (err) + return err; + + br->ageing_time = t; + mod_timer(&br->gc_timer, jiffies); + + return 0; +} + void __br_set_forward_delay(struct net_bridge *br, unsigned long t) { br->bridge_forward_delay = t; diff --git a/net/bridge/br_sysfs_br.c b/net/bridge/br_sysfs_br.c index 4c97fc5..04ef192 100644 --- a/net/bridge/br_sysfs_br.c +++ b/net/bridge/br_sysfs_br.c @@ -102,8 +102,7 @@ static ssize_t ageing_time_show(struct device *d, static int set_ageing_time(struct net_bridge *br, unsigned long val) { - br->ageing_time = clock_t_to_jiffies(val); - return 0; + return br_set_ageing_time(br, val); } static ssize_t ageing_time_store(struct device *d, -- 1.7.10.4 -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH net-next 0/4] switchdev: push bridge attributes down
From: Scott Feldman Push bridge-level attributes down to switchdev drivers. This patchset adds the infrastructure and then pushes, as an example, ageing_time attribute down from bridge to switchdev (rocker) driver. Add some range-checking for ageing_time. # ip link set dev br0 type bridge ageing_time 1000 # ip link set dev br0 type bridge ageing_time 999 RTNETLINK answers: Numerical result out of range Up until now, switchdev attrs where port-level attrs, so the netdev used in switchdev_attr_set() would be a switch port or bond of switch ports. With bridge-level attrs, the netdev passed to switchdev_attr_set() is the bridge netdev. The same recusive algo is used to visit the leaves of the stacked drivers to set the attr, it's just in this case we start one layer higher in the stack. One note is not all ports in the bridge may support setting a bridge-level attribute, so rather than failing the entire set, we'll skip over those ports returning -EOPNOTSUPP. Scott Feldman (4): switchdev: add bridge attributes switchdev: skip over ports returning -EOPNOTSUPP when recursing ports bridge: push bridge setting ageing_time down to switchdev rocker: handle setting bridge ageing_time drivers/net/ethernet/rocker/rocker.c | 22 ++ include/net/switchdev.h |6 ++ include/uapi/linux/if_link.h |2 +- net/bridge/br_ioctl.c|3 +-- net/bridge/br_netlink.c |6 +++--- net/bridge/br_private.h |1 + net/bridge/br_stp.c | 24 net/bridge/br_sysfs_br.c |3 +-- net/switchdev/switchdev.c|9 - 9 files changed, 67 insertions(+), 9 deletions(-) -- 1.7.10.4 -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH net-next v3 5/7] rocker: add FDB cleanup timer
From: Scott Feldman Add a timer to each rocker switch to do FDB entry cleanup by ageing out expired entries. The timer scheduling algo is copied from the bridge driver, for the most part, to keep the firing of the timer to a minimum. Signed-off-by: Scott Feldman Acked-by: Jiri Pirko --- v3: Per davem review: add del_timer_sync on rocker port remove. drivers/net/ethernet/rocker/rocker.c | 42 ++ 1 file changed, 42 insertions(+) diff --git a/drivers/net/ethernet/rocker/rocker.c b/drivers/net/ethernet/rocker/rocker.c index be8bb04..32c5429 100644 --- a/drivers/net/ethernet/rocker/rocker.c +++ b/drivers/net/ethernet/rocker/rocker.c @@ -248,6 +248,7 @@ struct rocker { u64 flow_tbl_next_cookie; DECLARE_HASHTABLE(group_tbl, 16); spinlock_t group_tbl_lock; /* for group tbl accesses */ + struct timer_list fdb_cleanup_timer; DECLARE_HASHTABLE(fdb_tbl, 16); spinlock_t fdb_tbl_lock;/* for fdb tbl accesses */ unsigned long internal_vlan_bitmap[ROCKER_INTERNAL_VLAN_BITMAP_LEN]; @@ -3706,6 +3707,41 @@ err_out: return err; } +static void rocker_fdb_cleanup(unsigned long data) +{ + struct rocker *rocker = (struct rocker *)data; + struct rocker_port *rocker_port; + struct rocker_fdb_tbl_entry *entry; + struct hlist_node *tmp; + unsigned long next_timer = jiffies + BR_MIN_AGEING_TIME; + unsigned long expires; + unsigned long lock_flags; + int flags = ROCKER_OP_FLAG_NOWAIT | ROCKER_OP_FLAG_REMOVE | + ROCKER_OP_FLAG_LEARNED; + int bkt; + + spin_lock_irqsave(&rocker->fdb_tbl_lock, lock_flags); + + hash_for_each_safe(rocker->fdb_tbl, bkt, tmp, entry, entry) { + if (!entry->learned) + continue; + rocker_port = entry->key.rocker_port; + expires = entry->touched + rocker_port->ageing_time; + if (time_before_eq(expires, jiffies)) { + rocker_port_fdb_learn(rocker_port, SWITCHDEV_TRANS_NONE, + flags, entry->key.addr, + entry->key.vlan_id); + hash_del(&entry->entry); + } else if (time_before(expires, next_timer)) { + next_timer = expires; + } + } + + spin_unlock_irqrestore(&rocker->fdb_tbl_lock, lock_flags); + + mod_timer(&rocker->fdb_cleanup_timer, round_jiffies_up(next_timer)); +} + static int rocker_port_router_mac(struct rocker_port *rocker_port, enum switchdev_trans trans, int flags, __be16 vlan_id) @@ -5191,6 +5227,10 @@ static int rocker_probe(struct pci_dev *pdev, const struct pci_device_id *id) goto err_init_tbls; } + setup_timer(&rocker->fdb_cleanup_timer, rocker_fdb_cleanup, + (unsigned long) rocker); + mod_timer(&rocker->fdb_cleanup_timer, jiffies); + err = rocker_probe_ports(rocker); if (err) { dev_err(&pdev->dev, "failed to probe ports\n"); @@ -5203,6 +5243,7 @@ static int rocker_probe(struct pci_dev *pdev, const struct pci_device_id *id) return 0; err_probe_ports: + del_timer_sync(&rocker->fdb_cleanup_timer); rocker_free_tbls(rocker); err_init_tbls: free_irq(rocker_msix_vector(rocker, ROCKER_MSIX_VEC_EVENT), rocker); @@ -5230,6 +5271,7 @@ static void rocker_remove(struct pci_dev *pdev) { struct rocker *rocker = pci_get_drvdata(pdev); + del_timer_sync(&rocker->fdb_cleanup_timer); rocker_free_tbls(rocker); rocker_write32(rocker, CONTROL, ROCKER_CONTROL_RESET); rocker_remove_ports(rocker); -- 1.7.10.4 -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH net-next v3 7/7] switchdev: update documentation on FDB ageing_time
From: Scott Feldman Signed-off-by: Scott Feldman Reviewed-by: Vivien Didelot Acked-by: Jiri Pirko --- Documentation/networking/switchdev.txt | 24 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/Documentation/networking/switchdev.txt b/Documentation/networking/switchdev.txt index 476df04..67e43ee 100644 --- a/Documentation/networking/switchdev.txt +++ b/Documentation/networking/switchdev.txt @@ -239,20 +239,20 @@ The driver should initialize the attributes to the hardware defaults. FDB Ageing ^^ -There are two FDB ageing models supported: 1) ageing by the device, and 2) -ageing by the kernel. Ageing by the device is preferred if many FDB entries -are supported. The driver calls call_switchdev_notifiers(SWITCHDEV_FDB_DEL, -...) to age out the FDB entry. In this model, ageing by the kernel should be -turned off. XXX: how to turn off ageing in kernel on a per-port basis or -otherwise prevent the kernel from ageing out the FDB entry? - -In the kernel ageing model, the standard bridge ageing mechanism is used to age -out stale FDB entries. To keep an FDB entry "alive", the driver should refresh -the FDB entry by calling call_switchdev_notifiers(SWITCHDEV_FDB_ADD, ...). The +The bridge will skip ageing FDB entries marked with NTF_EXT_LEARNED and it is +the responsibility of the port driver/device to age out these entries. If the +port device supports ageing, when the FDB entry expires, it will notify the +driver which in turn will notify the bridge with SWITCHDEV_FDB_DEL. If the +device does not support ageing, the driver can simulate ageing using a +garbage collection timer to monitor FBD entries. Expired entries will be +notified to the bridge using SWITCHDEV_FDB_DEL. See rocker driver for +example of driver running ageing timer. + +To keep an NTF_EXT_LEARNED entry "alive", the driver should refresh the FDB +entry by calling call_switchdev_notifiers(SWITCHDEV_FDB_ADD, ...). The notification will reset the FDB entry's last-used time to now. The driver should rate limit refresh notifications, for example, no more than once a -second. If the FDB entry expires, fdb_delete is called to remove entry from -the device. +second. (The last-used time is visible using the bridge -s fdb option). STP State Change on Port -- 1.7.10.4 -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH net-next v3 6/7] bridge: don't age externally added FDB entries
From: Siva Mannem Signed-off-by: Siva Mannem Signed-off-by: Scott Feldman Acked-by: Vivien Didelot Acked-by: Jiri Pirko Acked-by: Premkumar Jonnala --- net/bridge/br_fdb.c |2 ++ 1 file changed, 2 insertions(+) diff --git a/net/bridge/br_fdb.c b/net/bridge/br_fdb.c index 9e9875d..6663cc0 100644 --- a/net/bridge/br_fdb.c +++ b/net/bridge/br_fdb.c @@ -299,6 +299,8 @@ void br_fdb_cleanup(unsigned long _data) unsigned long this_timer; if (f->is_static) continue; + if (f->added_by_external_learn) + continue; this_timer = f->updated + delay; if (time_before_eq(this_timer, jiffies)) fdb_delete(br, f); -- 1.7.10.4 -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH net-next v3 3/7] bridge: define some min/max/default ageing time constants
From: Scott Feldman Signed-off-by: Scott Feldman --- v2: Per Jiri review comment: add BR_DEFAULT_AGEING_TIME to defines include/linux/if_bridge.h |6 ++ net/bridge/br_device.c|2 +- 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/include/linux/if_bridge.h b/include/linux/if_bridge.h index dad8b00..a338a68 100644 --- a/include/linux/if_bridge.h +++ b/include/linux/if_bridge.h @@ -46,6 +46,12 @@ struct br_ip_list { #define BR_LEARNING_SYNC BIT(9) #define BR_PROXYARP_WIFI BIT(10) +/* values as per ieee8021QBridgeFdbAgingTime */ +#define BR_MIN_AGEING_TIME (10 * HZ) +#define BR_MAX_AGEING_TIME (100 * HZ) + +#define BR_DEFAULT_AGEING_TIME (300 * HZ) + extern void brioctl_set(int (*ioctl_hook)(struct net *, unsigned int, void __user *)); typedef int br_should_route_hook_t(struct sk_buff *skb); diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c index 6ed2feb..2f81624 100644 --- a/net/bridge/br_device.c +++ b/net/bridge/br_device.c @@ -391,7 +391,7 @@ void br_dev_setup(struct net_device *dev) br->bridge_max_age = br->max_age = 20 * HZ; br->bridge_hello_time = br->hello_time = 2 * HZ; br->bridge_forward_delay = br->forward_delay = 15 * HZ; - br->ageing_time = 300 * HZ; + br->ageing_time = BR_DEFAULT_AGEING_TIME; br_netfilter_rtable_init(br); br_stp_timer_init(br); -- 1.7.10.4 -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH net-next v3 1/7] rocker: track when FDB entry is touched.
From: Scott Feldman The entry is touched once when created, and touched again for each update. The touched time is used to calculate FDB entry age. Signed-off-by: Scott Feldman Acked-by: Jiri Pirko --- drivers/net/ethernet/rocker/rocker.c | 18 -- 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/rocker/rocker.c b/drivers/net/ethernet/rocker/rocker.c index 34ac41a..e517e9c 100644 --- a/drivers/net/ethernet/rocker/rocker.c +++ b/drivers/net/ethernet/rocker/rocker.c @@ -152,6 +152,7 @@ struct rocker_fdb_tbl_entry { struct hlist_node entry; u32 key_crc32; /* key */ bool learned; + unsigned long touched; struct rocker_fdb_tbl_key { u32 pport; u8 addr[ETH_ALEN]; @@ -3629,6 +3630,7 @@ static int rocker_port_fdb(struct rocker_port *rocker_port, return -ENOMEM; fdb->learned = (flags & ROCKER_OP_FLAG_LEARNED); + fdb->touched = jiffies; fdb->key.pport = rocker_port->pport; ether_addr_copy(fdb->key.addr, addr); fdb->key.vlan_id = vlan_id; @@ -3638,13 +3640,17 @@ static int rocker_port_fdb(struct rocker_port *rocker_port, found = rocker_fdb_tbl_find(rocker, fdb); - if (removing && found) { - rocker_port_kfree(trans, fdb); - if (trans != SWITCHDEV_TRANS_PREPARE) - hash_del(&found->entry); - } else if (!removing && !found) { + if (found) { + found->touched = jiffies; + if (removing) { + rocker_port_kfree(trans, fdb); + if (trans != SWITCHDEV_TRANS_PREPARE) + hash_del(&found->entry); + } + } else if (!removing) { if (trans != SWITCHDEV_TRANS_PREPARE) - hash_add(rocker->fdb_tbl, &fdb->entry, fdb->key_crc32); + hash_add(rocker->fdb_tbl, &fdb->entry, +fdb->key_crc32); } spin_unlock_irqrestore(&rocker->fdb_tbl_lock, lock_flags); -- 1.7.10.4 -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH net-next v3 0/7] bridge: don't age out externally added FDB entries
From: Scott Feldman v3: Per davem review: add del_timer_sync on rocker port remove. v2: Per Jiri review comment: add BR_DEFAULT_AGEING_TIME to defines Siva originally proposed skipping externally added FDB entries in the bridge's FDB garbage collection func, and moving the ageing of externally added entries to the port driver/device. This broke rocker, since rocker didn't have a hardware (or software) mechanism for ageing out its learned FDB entries. This patchset reintroduces Siva's bridge driver patch to skip externally added entries and adds support in rocker so rocker can age out its own entries. Rocker does this using a software timer similar to the bridge's FDB garbage collection timer. Other switchdev devices/drivers can use this software timer method or program the device to nofity aged-out entries to the driver. Updated switchdev.txt documentation to reflect current state-of-the-art. This removes one more XXX todo comment in switchdev.txt. Scott Feldman (6): rocker: track when FDB entry is touched. rocker: store rocker_port in fdb key rather than pport bridge: define some min/max/default ageing time constants rocker: adding port ageing_time for ageing out FDB entries rocker: add FDB cleanup timer switchdev: update documentation on FDB ageing_time Siva Mannem (1): bridge: don't age externally added FDB entries Documentation/networking/switchdev.txt | 24 +-- drivers/net/ethernet/rocker/rocker.c | 70 +++- include/linux/if_bridge.h |6 +++ net/bridge/br_device.c |2 +- net/bridge/br_fdb.c|2 + 5 files changed, 81 insertions(+), 23 deletions(-) -- 1.7.10.4 -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH net-next v3 2/7] rocker: store rocker_port in fdb key rather than pport
From: Scott Feldman We'll need more info from rocker_port than just pport when we age out fdb entries, so store rocker_port rather than pport in each fdb entry. Signed-off-by: Scott Feldman Acked-by: Jiri Pirko --- drivers/net/ethernet/rocker/rocker.c |8 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/rocker/rocker.c b/drivers/net/ethernet/rocker/rocker.c index e517e9c..f55ed2c 100644 --- a/drivers/net/ethernet/rocker/rocker.c +++ b/drivers/net/ethernet/rocker/rocker.c @@ -154,7 +154,7 @@ struct rocker_fdb_tbl_entry { bool learned; unsigned long touched; struct rocker_fdb_tbl_key { - u32 pport; + struct rocker_port *rocker_port; u8 addr[ETH_ALEN]; __be16 vlan_id; } key; @@ -3631,7 +3631,7 @@ static int rocker_port_fdb(struct rocker_port *rocker_port, fdb->learned = (flags & ROCKER_OP_FLAG_LEARNED); fdb->touched = jiffies; - fdb->key.pport = rocker_port->pport; + fdb->key.rocker_port = rocker_port; ether_addr_copy(fdb->key.addr, addr); fdb->key.vlan_id = vlan_id; fdb->key_crc32 = crc32(~0, &fdb->key, sizeof(fdb->key)); @@ -3686,7 +3686,7 @@ static int rocker_port_fdb_flush(struct rocker_port *rocker_port, spin_lock_irqsave(&rocker->fdb_tbl_lock, lock_flags); hash_for_each_safe(rocker->fdb_tbl, bkt, tmp, found, entry) { - if (found->key.pport != rocker_port->pport) + if (found->key.rocker_port != rocker_port) continue; if (!found->learned) continue; @@ -4553,7 +4553,7 @@ static int rocker_port_fdb_dump(const struct rocker_port *rocker_port, spin_lock_irqsave(&rocker->fdb_tbl_lock, lock_flags); hash_for_each_safe(rocker->fdb_tbl, bkt, tmp, found, entry) { - if (found->key.pport != rocker_port->pport) + if (found->key.rocker_port != rocker_port) continue; fdb->addr = found->key.addr; fdb->ndm_state = NUD_REACHABLE; -- 1.7.10.4 -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH net-next v3 4/7] rocker: adding port ageing_time for ageing out FDB entries
From: Scott Feldman Follow-up patcheset will allow user to change ageing_time, but for now just hard-code it to a fixed value (the same value used as the default for the bridge driver). Signed-off-by: Scott Feldman --- drivers/net/ethernet/rocker/rocker.c |2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/ethernet/rocker/rocker.c b/drivers/net/ethernet/rocker/rocker.c index f55ed2c..be8bb04 100644 --- a/drivers/net/ethernet/rocker/rocker.c +++ b/drivers/net/ethernet/rocker/rocker.c @@ -221,6 +221,7 @@ struct rocker_port { __be16 internal_vlan_id; int stp_state; u32 brport_flags; + unsigned long ageing_time; bool ctrls[ROCKER_CTRL_MAX]; unsigned long vlan_bitmap[ROCKER_VLAN_BITMAP_LEN]; struct napi_struct napi_tx; @@ -4975,6 +4976,7 @@ static int rocker_probe_port(struct rocker *rocker, unsigned int port_number) rocker_port->port_number = port_number; rocker_port->pport = port_number + 1; rocker_port->brport_flags = BR_LEARNING | BR_LEARNING_SYNC; + rocker_port->ageing_time = BR_DEFAULT_AGEING_TIME; INIT_LIST_HEAD(&rocker_port->trans_mem); rocker_port_dev_addr_init(rocker_port); -- 1.7.10.4 -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH net-next v2 6/7] bridge: don't age externally added FDB entries
From: Siva Mannem Signed-off-by: Siva Mannem Signed-off-by: Scott Feldman Acked-by: Vivien Didelot Acked-by: Jiri Pirko --- net/bridge/br_fdb.c |2 ++ 1 file changed, 2 insertions(+) diff --git a/net/bridge/br_fdb.c b/net/bridge/br_fdb.c index 9e9875d..6663cc0 100644 --- a/net/bridge/br_fdb.c +++ b/net/bridge/br_fdb.c @@ -299,6 +299,8 @@ void br_fdb_cleanup(unsigned long _data) unsigned long this_timer; if (f->is_static) continue; + if (f->added_by_external_learn) + continue; this_timer = f->updated + delay; if (time_before_eq(this_timer, jiffies)) fdb_delete(br, f); -- 1.7.10.4 -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH net-next v2 4/7] rocker: adding port ageing_time for ageing out FDB entries
From: Scott Feldman Follow-up patcheset will allow user to change ageing_time, but for now just hard-code it to a fixed value (the same value used as the default for the bridge driver). Signed-off-by: Scott Feldman --- drivers/net/ethernet/rocker/rocker.c |2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/ethernet/rocker/rocker.c b/drivers/net/ethernet/rocker/rocker.c index f55ed2c..be8bb04 100644 --- a/drivers/net/ethernet/rocker/rocker.c +++ b/drivers/net/ethernet/rocker/rocker.c @@ -221,6 +221,7 @@ struct rocker_port { __be16 internal_vlan_id; int stp_state; u32 brport_flags; + unsigned long ageing_time; bool ctrls[ROCKER_CTRL_MAX]; unsigned long vlan_bitmap[ROCKER_VLAN_BITMAP_LEN]; struct napi_struct napi_tx; @@ -4975,6 +4976,7 @@ static int rocker_probe_port(struct rocker *rocker, unsigned int port_number) rocker_port->port_number = port_number; rocker_port->pport = port_number + 1; rocker_port->brport_flags = BR_LEARNING | BR_LEARNING_SYNC; + rocker_port->ageing_time = BR_DEFAULT_AGEING_TIME; INIT_LIST_HEAD(&rocker_port->trans_mem); rocker_port_dev_addr_init(rocker_port); -- 1.7.10.4 -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH net-next v2 3/7] bridge: define some min/max/default ageing time constants
From: Scott Feldman Signed-off-by: Scott Feldman --- v2: Per Jiri review comment: add BR_DEFAULT_AGEING_TIME to defines include/linux/if_bridge.h |6 ++ net/bridge/br_device.c|2 +- 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/include/linux/if_bridge.h b/include/linux/if_bridge.h index dad8b00..a338a68 100644 --- a/include/linux/if_bridge.h +++ b/include/linux/if_bridge.h @@ -46,6 +46,12 @@ struct br_ip_list { #define BR_LEARNING_SYNC BIT(9) #define BR_PROXYARP_WIFI BIT(10) +/* values as per ieee8021QBridgeFdbAgingTime */ +#define BR_MIN_AGEING_TIME (10 * HZ) +#define BR_MAX_AGEING_TIME (100 * HZ) + +#define BR_DEFAULT_AGEING_TIME (300 * HZ) + extern void brioctl_set(int (*ioctl_hook)(struct net *, unsigned int, void __user *)); typedef int br_should_route_hook_t(struct sk_buff *skb); diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c index 6ed2feb..2f81624 100644 --- a/net/bridge/br_device.c +++ b/net/bridge/br_device.c @@ -391,7 +391,7 @@ void br_dev_setup(struct net_device *dev) br->bridge_max_age = br->max_age = 20 * HZ; br->bridge_hello_time = br->hello_time = 2 * HZ; br->bridge_forward_delay = br->forward_delay = 15 * HZ; - br->ageing_time = 300 * HZ; + br->ageing_time = BR_DEFAULT_AGEING_TIME; br_netfilter_rtable_init(br); br_stp_timer_init(br); -- 1.7.10.4 -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH net-next v2 7/7] switchdev: update documentation on FDB ageing_time
From: Scott Feldman Signed-off-by: Scott Feldman Reviewed-by: Vivien Didelot Acked-by: Jiri Pirko --- Documentation/networking/switchdev.txt | 24 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/Documentation/networking/switchdev.txt b/Documentation/networking/switchdev.txt index 476df04..67e43ee 100644 --- a/Documentation/networking/switchdev.txt +++ b/Documentation/networking/switchdev.txt @@ -239,20 +239,20 @@ The driver should initialize the attributes to the hardware defaults. FDB Ageing ^^ -There are two FDB ageing models supported: 1) ageing by the device, and 2) -ageing by the kernel. Ageing by the device is preferred if many FDB entries -are supported. The driver calls call_switchdev_notifiers(SWITCHDEV_FDB_DEL, -...) to age out the FDB entry. In this model, ageing by the kernel should be -turned off. XXX: how to turn off ageing in kernel on a per-port basis or -otherwise prevent the kernel from ageing out the FDB entry? - -In the kernel ageing model, the standard bridge ageing mechanism is used to age -out stale FDB entries. To keep an FDB entry "alive", the driver should refresh -the FDB entry by calling call_switchdev_notifiers(SWITCHDEV_FDB_ADD, ...). The +The bridge will skip ageing FDB entries marked with NTF_EXT_LEARNED and it is +the responsibility of the port driver/device to age out these entries. If the +port device supports ageing, when the FDB entry expires, it will notify the +driver which in turn will notify the bridge with SWITCHDEV_FDB_DEL. If the +device does not support ageing, the driver can simulate ageing using a +garbage collection timer to monitor FBD entries. Expired entries will be +notified to the bridge using SWITCHDEV_FDB_DEL. See rocker driver for +example of driver running ageing timer. + +To keep an NTF_EXT_LEARNED entry "alive", the driver should refresh the FDB +entry by calling call_switchdev_notifiers(SWITCHDEV_FDB_ADD, ...). The notification will reset the FDB entry's last-used time to now. The driver should rate limit refresh notifications, for example, no more than once a -second. If the FDB entry expires, fdb_delete is called to remove entry from -the device. +second. (The last-used time is visible using the bridge -s fdb option). STP State Change on Port -- 1.7.10.4 -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH net-next v2 0/7] bridge: don't age out externally added FDB entries
From: Scott Feldman v2: Per Jiri review comment: add BR_DEFAULT_AGEING_TIME to defines Siva originally proposed skipping externally added FDB entries in the bridge's FDB garbage collection func, and moving the ageing of externally added entries to the port driver/device. This broke rocker, since rocker didn't have a hardware (or software) mechanism for ageing out its learned FDB entries. This patchset reintroduces Siva's bridge driver patch to skip externally added entries and adds support in rocker so rocker can age out its own entries. Rocker does this using a software timer similar to the bridge's FDB garbage collection timer. Other switchdev devices/drivers can use this software timer method or program the device to nofity aged-out entries to the driver. Updated switchdev.txt documentation to reflect current state-of-the-art. This removes one more XXX todo comment in switchdev.txt. Scott Feldman (6): rocker: track when FDB entry is touched. rocker: store rocker_port in fdb key rather than pport bridge: define some min/max/default ageing time constants rocker: adding port ageing_time for ageing out FDB entries rocker: add FDB cleanup timer switchdev: update documentation on FDB ageing_time Siva Mannem (1): bridge: don't age externally added FDB entries Documentation/networking/switchdev.txt | 24 +-- drivers/net/ethernet/rocker/rocker.c | 69 +++- include/linux/if_bridge.h |6 +++ net/bridge/br_device.c |2 +- net/bridge/br_fdb.c|2 + 5 files changed, 80 insertions(+), 23 deletions(-) -- 1.7.10.4 -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH net-next v2 5/7] rocker: add FDB cleanup timer
From: Scott Feldman Add a timer to each rocker switch to do FDB entry cleanup by ageing out expired entries. The timer scheduling algo is copied from the bridge driver, for the most part, to keep the firing of the timer to a minimum. Signed-off-by: Scott Feldman Acked-by: Jiri Pirko --- drivers/net/ethernet/rocker/rocker.c | 41 ++ 1 file changed, 41 insertions(+) diff --git a/drivers/net/ethernet/rocker/rocker.c b/drivers/net/ethernet/rocker/rocker.c index be8bb04..e4e0278 100644 --- a/drivers/net/ethernet/rocker/rocker.c +++ b/drivers/net/ethernet/rocker/rocker.c @@ -248,6 +248,7 @@ struct rocker { u64 flow_tbl_next_cookie; DECLARE_HASHTABLE(group_tbl, 16); spinlock_t group_tbl_lock; /* for group tbl accesses */ + struct timer_list fdb_cleanup_timer; DECLARE_HASHTABLE(fdb_tbl, 16); spinlock_t fdb_tbl_lock;/* for fdb tbl accesses */ unsigned long internal_vlan_bitmap[ROCKER_INTERNAL_VLAN_BITMAP_LEN]; @@ -3706,6 +3707,41 @@ err_out: return err; } +static void rocker_fdb_cleanup(unsigned long data) +{ + struct rocker *rocker = (struct rocker *)data; + struct rocker_port *rocker_port; + struct rocker_fdb_tbl_entry *entry; + struct hlist_node *tmp; + unsigned long next_timer = jiffies + BR_MIN_AGEING_TIME; + unsigned long expires; + unsigned long lock_flags; + int flags = ROCKER_OP_FLAG_NOWAIT | ROCKER_OP_FLAG_REMOVE | + ROCKER_OP_FLAG_LEARNED; + int bkt; + + spin_lock_irqsave(&rocker->fdb_tbl_lock, lock_flags); + + hash_for_each_safe(rocker->fdb_tbl, bkt, tmp, entry, entry) { + if (!entry->learned) + continue; + rocker_port = entry->key.rocker_port; + expires = entry->touched + rocker_port->ageing_time; + if (time_before_eq(expires, jiffies)) { + rocker_port_fdb_learn(rocker_port, SWITCHDEV_TRANS_NONE, + flags, entry->key.addr, + entry->key.vlan_id); + hash_del(&entry->entry); + } else if (time_before(expires, next_timer)) { + next_timer = expires; + } + } + + spin_unlock_irqrestore(&rocker->fdb_tbl_lock, lock_flags); + + mod_timer(&rocker->fdb_cleanup_timer, round_jiffies_up(next_timer)); +} + static int rocker_port_router_mac(struct rocker_port *rocker_port, enum switchdev_trans trans, int flags, __be16 vlan_id) @@ -5191,6 +5227,10 @@ static int rocker_probe(struct pci_dev *pdev, const struct pci_device_id *id) goto err_init_tbls; } + setup_timer(&rocker->fdb_cleanup_timer, rocker_fdb_cleanup, + (unsigned long) rocker); + mod_timer(&rocker->fdb_cleanup_timer, jiffies); + err = rocker_probe_ports(rocker); if (err) { dev_err(&pdev->dev, "failed to probe ports\n"); @@ -5203,6 +5243,7 @@ static int rocker_probe(struct pci_dev *pdev, const struct pci_device_id *id) return 0; err_probe_ports: + del_timer(&rocker->fdb_cleanup_timer); rocker_free_tbls(rocker); err_init_tbls: free_irq(rocker_msix_vector(rocker, ROCKER_MSIX_VEC_EVENT), rocker); -- 1.7.10.4 -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH net-next v2 1/7] rocker: track when FDB entry is touched.
From: Scott Feldman The entry is touched once when created, and touched again for each update. The touched time is used to calculate FDB entry age. Signed-off-by: Scott Feldman Acked-by: Jiri Pirko --- drivers/net/ethernet/rocker/rocker.c | 18 -- 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/rocker/rocker.c b/drivers/net/ethernet/rocker/rocker.c index 34ac41a..e517e9c 100644 --- a/drivers/net/ethernet/rocker/rocker.c +++ b/drivers/net/ethernet/rocker/rocker.c @@ -152,6 +152,7 @@ struct rocker_fdb_tbl_entry { struct hlist_node entry; u32 key_crc32; /* key */ bool learned; + unsigned long touched; struct rocker_fdb_tbl_key { u32 pport; u8 addr[ETH_ALEN]; @@ -3629,6 +3630,7 @@ static int rocker_port_fdb(struct rocker_port *rocker_port, return -ENOMEM; fdb->learned = (flags & ROCKER_OP_FLAG_LEARNED); + fdb->touched = jiffies; fdb->key.pport = rocker_port->pport; ether_addr_copy(fdb->key.addr, addr); fdb->key.vlan_id = vlan_id; @@ -3638,13 +3640,17 @@ static int rocker_port_fdb(struct rocker_port *rocker_port, found = rocker_fdb_tbl_find(rocker, fdb); - if (removing && found) { - rocker_port_kfree(trans, fdb); - if (trans != SWITCHDEV_TRANS_PREPARE) - hash_del(&found->entry); - } else if (!removing && !found) { + if (found) { + found->touched = jiffies; + if (removing) { + rocker_port_kfree(trans, fdb); + if (trans != SWITCHDEV_TRANS_PREPARE) + hash_del(&found->entry); + } + } else if (!removing) { if (trans != SWITCHDEV_TRANS_PREPARE) - hash_add(rocker->fdb_tbl, &fdb->entry, fdb->key_crc32); + hash_add(rocker->fdb_tbl, &fdb->entry, +fdb->key_crc32); } spin_unlock_irqrestore(&rocker->fdb_tbl_lock, lock_flags); -- 1.7.10.4 -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH net-next v2 2/7] rocker: store rocker_port in fdb key rather than pport
From: Scott Feldman We'll need more info from rocker_port than just pport when we age out fdb entries, so store rocker_port rather than pport in each fdb entry. Signed-off-by: Scott Feldman Acked-by: Jiri Pirko --- drivers/net/ethernet/rocker/rocker.c |8 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/rocker/rocker.c b/drivers/net/ethernet/rocker/rocker.c index e517e9c..f55ed2c 100644 --- a/drivers/net/ethernet/rocker/rocker.c +++ b/drivers/net/ethernet/rocker/rocker.c @@ -154,7 +154,7 @@ struct rocker_fdb_tbl_entry { bool learned; unsigned long touched; struct rocker_fdb_tbl_key { - u32 pport; + struct rocker_port *rocker_port; u8 addr[ETH_ALEN]; __be16 vlan_id; } key; @@ -3631,7 +3631,7 @@ static int rocker_port_fdb(struct rocker_port *rocker_port, fdb->learned = (flags & ROCKER_OP_FLAG_LEARNED); fdb->touched = jiffies; - fdb->key.pport = rocker_port->pport; + fdb->key.rocker_port = rocker_port; ether_addr_copy(fdb->key.addr, addr); fdb->key.vlan_id = vlan_id; fdb->key_crc32 = crc32(~0, &fdb->key, sizeof(fdb->key)); @@ -3686,7 +3686,7 @@ static int rocker_port_fdb_flush(struct rocker_port *rocker_port, spin_lock_irqsave(&rocker->fdb_tbl_lock, lock_flags); hash_for_each_safe(rocker->fdb_tbl, bkt, tmp, found, entry) { - if (found->key.pport != rocker_port->pport) + if (found->key.rocker_port != rocker_port) continue; if (!found->learned) continue; @@ -4553,7 +4553,7 @@ static int rocker_port_fdb_dump(const struct rocker_port *rocker_port, spin_lock_irqsave(&rocker->fdb_tbl_lock, lock_flags); hash_for_each_safe(rocker->fdb_tbl, bkt, tmp, found, entry) { - if (found->key.pport != rocker_port->pport) + if (found->key.rocker_port != rocker_port) continue; fdb->addr = found->key.addr; fdb->ndm_state = NUD_REACHABLE; -- 1.7.10.4 -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH net-next 7/7] switchdev: update documentation on FDB ageing_time
From: Scott Feldman Signed-off-by: Scott Feldman --- Documentation/networking/switchdev.txt | 24 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/Documentation/networking/switchdev.txt b/Documentation/networking/switchdev.txt index 476df04..67e43ee 100644 --- a/Documentation/networking/switchdev.txt +++ b/Documentation/networking/switchdev.txt @@ -239,20 +239,20 @@ The driver should initialize the attributes to the hardware defaults. FDB Ageing ^^ -There are two FDB ageing models supported: 1) ageing by the device, and 2) -ageing by the kernel. Ageing by the device is preferred if many FDB entries -are supported. The driver calls call_switchdev_notifiers(SWITCHDEV_FDB_DEL, -...) to age out the FDB entry. In this model, ageing by the kernel should be -turned off. XXX: how to turn off ageing in kernel on a per-port basis or -otherwise prevent the kernel from ageing out the FDB entry? - -In the kernel ageing model, the standard bridge ageing mechanism is used to age -out stale FDB entries. To keep an FDB entry "alive", the driver should refresh -the FDB entry by calling call_switchdev_notifiers(SWITCHDEV_FDB_ADD, ...). The +The bridge will skip ageing FDB entries marked with NTF_EXT_LEARNED and it is +the responsibility of the port driver/device to age out these entries. If the +port device supports ageing, when the FDB entry expires, it will notify the +driver which in turn will notify the bridge with SWITCHDEV_FDB_DEL. If the +device does not support ageing, the driver can simulate ageing using a +garbage collection timer to monitor FBD entries. Expired entries will be +notified to the bridge using SWITCHDEV_FDB_DEL. See rocker driver for +example of driver running ageing timer. + +To keep an NTF_EXT_LEARNED entry "alive", the driver should refresh the FDB +entry by calling call_switchdev_notifiers(SWITCHDEV_FDB_ADD, ...). The notification will reset the FDB entry's last-used time to now. The driver should rate limit refresh notifications, for example, no more than once a -second. If the FDB entry expires, fdb_delete is called to remove entry from -the device. +second. (The last-used time is visible using the bridge -s fdb option). STP State Change on Port -- 1.7.10.4 -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH net-next 3/7] rocker: adding port ageing_time for ageing out FDB entries
From: Scott Feldman Follow-up patcheset will allow user to change ageing_time, but for now just hard-code it to a fixed value (the same value used as the default for the bridge driver). Signed-off-by: Scott Feldman --- drivers/net/ethernet/rocker/rocker.c |2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/ethernet/rocker/rocker.c b/drivers/net/ethernet/rocker/rocker.c index f55ed2c..eba22f5 100644 --- a/drivers/net/ethernet/rocker/rocker.c +++ b/drivers/net/ethernet/rocker/rocker.c @@ -221,6 +221,7 @@ struct rocker_port { __be16 internal_vlan_id; int stp_state; u32 brport_flags; + unsigned long ageing_time; bool ctrls[ROCKER_CTRL_MAX]; unsigned long vlan_bitmap[ROCKER_VLAN_BITMAP_LEN]; struct napi_struct napi_tx; @@ -4975,6 +4976,7 @@ static int rocker_probe_port(struct rocker *rocker, unsigned int port_number) rocker_port->port_number = port_number; rocker_port->pport = port_number + 1; rocker_port->brport_flags = BR_LEARNING | BR_LEARNING_SYNC; + rocker_port->ageing_time = 300 * HZ; INIT_LIST_HEAD(&rocker_port->trans_mem); rocker_port_dev_addr_init(rocker_port); -- 1.7.10.4 -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH net-next 5/7] rocker: add FDB cleanup timer
From: Scott Feldman Add a timer to each rocker switch to do FDB entry cleanup by ageing out expired entries. The timer scheduling algo is copied from the bridge driver, for the most part, to keep the firing of the timer to a minimum. Signed-off-by: Scott Feldman --- drivers/net/ethernet/rocker/rocker.c | 41 ++ 1 file changed, 41 insertions(+) diff --git a/drivers/net/ethernet/rocker/rocker.c b/drivers/net/ethernet/rocker/rocker.c index eba22f5..232df69 100644 --- a/drivers/net/ethernet/rocker/rocker.c +++ b/drivers/net/ethernet/rocker/rocker.c @@ -248,6 +248,7 @@ struct rocker { u64 flow_tbl_next_cookie; DECLARE_HASHTABLE(group_tbl, 16); spinlock_t group_tbl_lock; /* for group tbl accesses */ + struct timer_list fdb_cleanup_timer; DECLARE_HASHTABLE(fdb_tbl, 16); spinlock_t fdb_tbl_lock;/* for fdb tbl accesses */ unsigned long internal_vlan_bitmap[ROCKER_INTERNAL_VLAN_BITMAP_LEN]; @@ -3706,6 +3707,41 @@ err_out: return err; } +static void rocker_fdb_cleanup(unsigned long data) +{ + struct rocker *rocker = (struct rocker *)data; + struct rocker_port *rocker_port; + struct rocker_fdb_tbl_entry *entry; + struct hlist_node *tmp; + unsigned long next_timer = jiffies + BR_MIN_AGEING_TIME; + unsigned long expires; + unsigned long lock_flags; + int flags = ROCKER_OP_FLAG_NOWAIT | ROCKER_OP_FLAG_REMOVE | + ROCKER_OP_FLAG_LEARNED; + int bkt; + + spin_lock_irqsave(&rocker->fdb_tbl_lock, lock_flags); + + hash_for_each_safe(rocker->fdb_tbl, bkt, tmp, entry, entry) { + if (!entry->learned) + continue; + rocker_port = entry->key.rocker_port; + expires = entry->touched + rocker_port->ageing_time; + if (time_before_eq(expires, jiffies)) { + rocker_port_fdb_learn(rocker_port, SWITCHDEV_TRANS_NONE, + flags, entry->key.addr, + entry->key.vlan_id); + hash_del(&entry->entry); + } else if (time_before(expires, next_timer)) { + next_timer = expires; + } + } + + spin_unlock_irqrestore(&rocker->fdb_tbl_lock, lock_flags); + + mod_timer(&rocker->fdb_cleanup_timer, round_jiffies_up(next_timer)); +} + static int rocker_port_router_mac(struct rocker_port *rocker_port, enum switchdev_trans trans, int flags, __be16 vlan_id) @@ -5191,6 +5227,10 @@ static int rocker_probe(struct pci_dev *pdev, const struct pci_device_id *id) goto err_init_tbls; } + setup_timer(&rocker->fdb_cleanup_timer, rocker_fdb_cleanup, + (unsigned long) rocker); + mod_timer(&rocker->fdb_cleanup_timer, jiffies); + err = rocker_probe_ports(rocker); if (err) { dev_err(&pdev->dev, "failed to probe ports\n"); @@ -5203,6 +5243,7 @@ static int rocker_probe(struct pci_dev *pdev, const struct pci_device_id *id) return 0; err_probe_ports: + del_timer(&rocker->fdb_cleanup_timer); rocker_free_tbls(rocker); err_init_tbls: free_irq(rocker_msix_vector(rocker, ROCKER_MSIX_VEC_EVENT), rocker); -- 1.7.10.4 -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH net-next 0/7] bridge: don't age out externally added FDB entries
From: Scott Feldman Siva originally proposed skipping externally added FDB entries in the bridge's FDB garbage collection func, and moving the ageing of externally added entries to the port driver/device. This broke rocker, since rocker didn't have a hardware (or software) mechanism for ageing out its learned FDB entries. This patchset reintroduces Siva's bridge driver patch to skip externally added entries and adds support in rocker so rocker can age out its own entries. Rocker does this using a software timer similar to the bridge's FDB garbage collection timer. Other switchdev devices/drivers can use this software timer method or program the device to nofity aged-out entries to the driver. Updated switchdev.txt documentation to reflect current state-of-the-art. This removes one more XXX todo comment in switchdev.txt. Scott Feldman (6): rocker: track when FDB entry is touched. rocker: store rocker_port in fdb key rather than pport rocker: adding port ageing_time for ageing out FDB entries bridge: define some min/max ageing time constants we'll use next rocker: add FDB cleanup timer switchdev: update documentation on FDB ageing_time Siva Mannem (1): bridge: don't age externally added FDB entries Documentation/networking/switchdev.txt | 24 +-- drivers/net/ethernet/rocker/rocker.c | 69 +++- include/linux/if_bridge.h |4 ++ net/bridge/br_fdb.c|2 + 4 files changed, 77 insertions(+), 22 deletions(-) -- 1.7.10.4 -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH net-next 6/7] bridge: don't age externally added FDB entries
From: Siva Mannem Signed-off-by: Siva Mannem Signed-off-by: Scott Feldman --- net/bridge/br_fdb.c |2 ++ 1 file changed, 2 insertions(+) diff --git a/net/bridge/br_fdb.c b/net/bridge/br_fdb.c index 9e9875d..6663cc0 100644 --- a/net/bridge/br_fdb.c +++ b/net/bridge/br_fdb.c @@ -299,6 +299,8 @@ void br_fdb_cleanup(unsigned long _data) unsigned long this_timer; if (f->is_static) continue; + if (f->added_by_external_learn) + continue; this_timer = f->updated + delay; if (time_before_eq(this_timer, jiffies)) fdb_delete(br, f); -- 1.7.10.4 -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH net-next 1/7] rocker: track when FDB entry is touched.
From: Scott Feldman The entry is touched once when created, and touched again for each update. The touched time is used to calculate FDB entry age. Signed-off-by: Scott Feldman --- drivers/net/ethernet/rocker/rocker.c | 18 -- 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/rocker/rocker.c b/drivers/net/ethernet/rocker/rocker.c index 34ac41a..e517e9c 100644 --- a/drivers/net/ethernet/rocker/rocker.c +++ b/drivers/net/ethernet/rocker/rocker.c @@ -152,6 +152,7 @@ struct rocker_fdb_tbl_entry { struct hlist_node entry; u32 key_crc32; /* key */ bool learned; + unsigned long touched; struct rocker_fdb_tbl_key { u32 pport; u8 addr[ETH_ALEN]; @@ -3629,6 +3630,7 @@ static int rocker_port_fdb(struct rocker_port *rocker_port, return -ENOMEM; fdb->learned = (flags & ROCKER_OP_FLAG_LEARNED); + fdb->touched = jiffies; fdb->key.pport = rocker_port->pport; ether_addr_copy(fdb->key.addr, addr); fdb->key.vlan_id = vlan_id; @@ -3638,13 +3640,17 @@ static int rocker_port_fdb(struct rocker_port *rocker_port, found = rocker_fdb_tbl_find(rocker, fdb); - if (removing && found) { - rocker_port_kfree(trans, fdb); - if (trans != SWITCHDEV_TRANS_PREPARE) - hash_del(&found->entry); - } else if (!removing && !found) { + if (found) { + found->touched = jiffies; + if (removing) { + rocker_port_kfree(trans, fdb); + if (trans != SWITCHDEV_TRANS_PREPARE) + hash_del(&found->entry); + } + } else if (!removing) { if (trans != SWITCHDEV_TRANS_PREPARE) - hash_add(rocker->fdb_tbl, &fdb->entry, fdb->key_crc32); + hash_add(rocker->fdb_tbl, &fdb->entry, +fdb->key_crc32); } spin_unlock_irqrestore(&rocker->fdb_tbl_lock, lock_flags); -- 1.7.10.4 -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH net-next 2/7] rocker: store rocker_port in fdb key rather than pport
From: Scott Feldman We'll need more info from rocker_port than just pport when we age out fdb entries, so store rocker_port rather than pport in each fdb entry. Signed-off-by: Scott Feldman --- drivers/net/ethernet/rocker/rocker.c |8 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/rocker/rocker.c b/drivers/net/ethernet/rocker/rocker.c index e517e9c..f55ed2c 100644 --- a/drivers/net/ethernet/rocker/rocker.c +++ b/drivers/net/ethernet/rocker/rocker.c @@ -154,7 +154,7 @@ struct rocker_fdb_tbl_entry { bool learned; unsigned long touched; struct rocker_fdb_tbl_key { - u32 pport; + struct rocker_port *rocker_port; u8 addr[ETH_ALEN]; __be16 vlan_id; } key; @@ -3631,7 +3631,7 @@ static int rocker_port_fdb(struct rocker_port *rocker_port, fdb->learned = (flags & ROCKER_OP_FLAG_LEARNED); fdb->touched = jiffies; - fdb->key.pport = rocker_port->pport; + fdb->key.rocker_port = rocker_port; ether_addr_copy(fdb->key.addr, addr); fdb->key.vlan_id = vlan_id; fdb->key_crc32 = crc32(~0, &fdb->key, sizeof(fdb->key)); @@ -3686,7 +3686,7 @@ static int rocker_port_fdb_flush(struct rocker_port *rocker_port, spin_lock_irqsave(&rocker->fdb_tbl_lock, lock_flags); hash_for_each_safe(rocker->fdb_tbl, bkt, tmp, found, entry) { - if (found->key.pport != rocker_port->pport) + if (found->key.rocker_port != rocker_port) continue; if (!found->learned) continue; @@ -4553,7 +4553,7 @@ static int rocker_port_fdb_dump(const struct rocker_port *rocker_port, spin_lock_irqsave(&rocker->fdb_tbl_lock, lock_flags); hash_for_each_safe(rocker->fdb_tbl, bkt, tmp, found, entry) { - if (found->key.pport != rocker_port->pport) + if (found->key.rocker_port != rocker_port) continue; fdb->addr = found->key.addr; fdb->ndm_state = NUD_REACHABLE; -- 1.7.10.4 -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH net-next 4/7] bridge: define some min/max ageing time constants we'll use next
From: Scott Feldman Signed-off-by: Scott Feldman --- include/linux/if_bridge.h |4 1 file changed, 4 insertions(+) diff --git a/include/linux/if_bridge.h b/include/linux/if_bridge.h index dad8b00..6cc6dbc 100644 --- a/include/linux/if_bridge.h +++ b/include/linux/if_bridge.h @@ -46,6 +46,10 @@ struct br_ip_list { #define BR_LEARNING_SYNC BIT(9) #define BR_PROXYARP_WIFI BIT(10) +/* values as per ieee8021QBridgeFdbAgingTime */ +#define BR_MIN_AGEING_TIME (10 * HZ) +#define BR_MAX_AGEING_TIME (100 * HZ) + extern void brioctl_set(int (*ioctl_hook)(struct net *, unsigned int, void __user *)); typedef int br_should_route_hook_t(struct sk_buff *skb); -- 1.7.10.4 -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[RFC PATCH net-next 2/2] rocker: register each switch as a switchdev
From: Scott Feldman Signed-off-by: Scott Feldman --- drivers/net/ethernet/rocker/rocker.c | 23 +++ 1 file changed, 23 insertions(+) diff --git a/drivers/net/ethernet/rocker/rocker.c b/drivers/net/ethernet/rocker/rocker.c index a7cb74a..9555ae4 100644 --- a/drivers/net/ethernet/rocker/rocker.c +++ b/drivers/net/ethernet/rocker/rocker.c @@ -233,6 +233,7 @@ struct rocker { struct pci_dev *pdev; u8 __iomem *hw_addr; struct msix_entry *msix_entries; + struct switchdev switchdev; unsigned int port_count; struct rocker_port **ports; struct { @@ -5090,6 +5091,19 @@ static void rocker_msix_fini(const struct rocker *rocker) kfree(rocker->msix_entries); } +static int rocker_probe_register_switchdev(struct rocker *rocker) +{ + char name[sizeof(rocker->hw.id) * 2 + 1]; + + sprintf(name, "%*phN", (int)sizeof(rocker->hw.id), &rocker->hw.id); + return register_switchdev(&rocker->switchdev, name); +} + +static void rocker_probe_unregister_switchdev(struct rocker *rocker) +{ + unregister_switchdev(&rocker->switchdev); +} + static int rocker_probe(struct pci_dev *pdev, const struct pci_device_id *id) { struct rocker *rocker; @@ -5194,11 +5208,19 @@ static int rocker_probe(struct pci_dev *pdev, const struct pci_device_id *id) goto err_probe_ports; } + err = rocker_probe_register_switchdev(rocker); + if (err) { + dev_err(&pdev->dev, "cannot register switchdev\n"); + goto err_register_switchdev; + } + dev_info(&pdev->dev, "Rocker switch with id %*phN\n", (int)sizeof(rocker->hw.id), &rocker->hw.id); return 0; +err_register_switchdev: + rocker_remove_ports(rocker); err_probe_ports: rocker_free_tbls(rocker); err_init_tbls: @@ -5227,6 +5249,7 @@ static void rocker_remove(struct pci_dev *pdev) { struct rocker *rocker = pci_get_drvdata(pdev); + rocker_probe_unregister_switchdev(rocker); rocker_free_tbls(rocker); rocker_write32(rocker, CONTROL, ROCKER_CONTROL_RESET); rocker_remove_ports(rocker); -- 1.7.10.4 -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[RFC PATCH net-next 0/2] Add new switchdev device class
From: Scott Feldman In the switchdev model, we use netdevs to represent switchdev ports, but we have no representation for the switch itself. So, introduce a new switchdev device class so we can define semantics and programming interfaces for the switch itself. Switchdev device class isn't tied to any particular bus. This patch set is just the skeleton to get us started. It adds the sysfs object registration for the new class and defines a class-level attr "foo". With the new class, we could hook PM functions, for example, to handle power transitions at the switch level. I registered rocker and get: $ ls /sys/class/switchdev/525400123501/ foo power subsystem uevent So what next? I'd rather not build APIs around sysfs, so we need a netlink API we can build on top of this. It's not really rtnl. Maybe genl would work? What ever it is, we'd need to teach iproute2 about a new 'switch' command. Netlink API would allow us to represent switch-wide objects such as registers, tables, stats, firmware, and maybe even control. I think with with netlink TLVs, we can create a framework for these objects but still allow the switch driver provide switch-specific info. For example, a table object: [TABLES] [TABLE] [FIELDS] [FIELD] [ID, TYPE] [DATA] [ID, VALUE] Maybe iproute2 has pretty-printers for specific switches like ethtool has for reg dumps. I don't know about how this overlaps with DSA platform_class. Florian? Comments? Scott Feldman (2): switchdev: create new switchdev device class rocker: register each switch as a switchdev drivers/net/ethernet/rocker/rocker.c | 23 ++ include/net/switchdev.h | 16 +++ net/switchdev/switchdev.c| 76 ++ 3 files changed, 115 insertions(+) -- 1.7.10.4 -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[RFC PATCH net-next 1/2] switchdev: create new switchdev device class
From: Scott Feldman Signed-off-by: Scott Feldman --- include/net/switchdev.h | 16 ++ net/switchdev/switchdev.c | 76 + 2 files changed, 92 insertions(+) diff --git a/include/net/switchdev.h b/include/net/switchdev.h index 319baab..d61e73c 100644 --- a/include/net/switchdev.h +++ b/include/net/switchdev.h @@ -16,6 +16,11 @@ #define SWITCHDEV_F_NO_RECURSE BIT(0) +struct switchdev { + struct device dev; + atomic_t foo; +}; + enum switchdev_trans { SWITCHDEV_TRANS_NONE, SWITCHDEV_TRANS_PREPARE, @@ -126,6 +131,8 @@ switchdev_notifier_info_to_dev(const struct switchdev_notifier_info *info) #ifdef CONFIG_NET_SWITCHDEV +int register_switchdev(struct switchdev *sdev, const char *name); +void unregister_switchdev(struct switchdev *sdev); int switchdev_port_attr_get(struct net_device *dev, struct switchdev_attr *attr); int switchdev_port_attr_set(struct net_device *dev, @@ -164,6 +171,15 @@ void switchdev_port_fwd_mark_set(struct net_device *dev, #else +static inline int register_switchdev(struct switchdev *sdev, const char *name) +{ + return -EOPNOTSUPP; +} + +static inline void unregister_switchdev(struct switchdev *sdev) +{ +} + static inline int switchdev_port_attr_get(struct net_device *dev, struct switchdev_attr *attr) { diff --git a/net/switchdev/switchdev.c b/net/switchdev/switchdev.c index 16c1c43..f705202 100644 --- a/net/switchdev/switchdev.c +++ b/net/switchdev/switchdev.c @@ -10,6 +10,7 @@ */ #include +#include #include #include #include @@ -19,6 +20,63 @@ #include #include +#define to_switchdev(d) container_of(d, struct switchdev, dev) + +static void switchdev_release(struct device *dev) +{ +} + +static ssize_t foo_show(struct device *dev, struct device_attribute *attr, + char *buf) +{ + struct switchdev *switchdev = to_switchdev(dev); + + return sprintf(buf, "%d\n", atomic_read(&switchdev->foo)); +} + +static DEVICE_ATTR_RO(foo); + +static struct attribute *switchdev_attrs[] = { + &dev_attr_foo.attr +}; + +ATTRIBUTE_GROUPS(switchdev); + +static int switchdev_uevent(struct device *d, struct kobj_uevent_env *env) +{ + return 0; +} + +static struct class switchdev_class = { + .name = "switchdev", + .dev_release = switchdev_release, + .dev_groups = switchdev_groups, + .dev_uevent = switchdev_uevent, +}; + +int register_switchdev(struct switchdev *sdev, const char *name) +{ + struct device *dev = &sdev->dev; + int err; + + device_initialize(dev); + + dev->class = &switchdev_class; + + err = dev_set_name(dev, "%s", name); + if (err) + return err; + + return device_add(dev); +} +EXPORT_SYMBOL_GPL(register_switchdev); + +void unregister_switchdev(struct switchdev *sdev) +{ + put_device(&sdev->dev); +} +EXPORT_SYMBOL_GPL(unregister_switchdev); + /** * switchdev_port_attr_get - Get port attribute * @@ -1142,3 +1200,21 @@ void switchdev_port_fwd_mark_set(struct net_device *dev, dev->offload_fwd_mark = mark; } EXPORT_SYMBOL_GPL(switchdev_port_fwd_mark_set); + +static int __init switchdev_module_init(void) +{ + return class_register(&switchdev_class); +} + +static void __exit switchdev_module_exit(void) +{ + class_unregister(&switchdev_class); +} + +module_init(switchdev_module_init); +module_exit(switchdev_module_exit); + +MODULE_LICENSE("GPL v2"); +MODULE_AUTHOR("Jiri Pirko "); +MODULE_AUTHOR("Scott Feldman "); +MODULE_DESCRIPTION("Ethernet switch device model"); -- 1.7.10.4 -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH net-next] bridge: fix netlink max attr size
From: Scott Feldman .maxtype should match .policy. Probably just been getting lucky here because IFLA_BRPORT_MAX > IFLA_BR_MAX. Fixes: 13323516 ("bridge: implement rtnl_link_ops->changelink") Signed-off-by: Scott Feldman --- net/bridge/br_netlink.c |2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c index 01401ea..d2c4d66 100644 --- a/net/bridge/br_netlink.c +++ b/net/bridge/br_netlink.c @@ -849,7 +849,7 @@ struct rtnl_link_ops br_link_ops __read_mostly = { .kind = "bridge", .priv_size = sizeof(struct net_bridge), .setup = br_dev_setup, - .maxtype= IFLA_BRPORT_MAX, + .maxtype= IFLA_BR_MAX, .policy = br_policy, .validate = br_validate, .newlink= br_dev_newlink, -- 1.7.10.4 -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH net-next v3] rocker: add debugfs support to dump internal tables
From: Scott Feldman > tree /sys/kernel/debug/rocker /sys/kernel/debug/rocker └── 525400123501 ├── fdb_tbl ├── internal_vlan_tbl ├── neigh_tbl ├── of_dpa_flow_tbl └── of_dpa_group_tbl 1 directory, 5 files > cat /sys/kernel/debug/rocker/525400123501/internal_vlan_tbl ifindex 5 ref_count 1 vlan 3843 ifindex 7 ref_count 2 vlan 3840 ifindex 4 ref_count 1 vlan 3842 > cat /sys/kernel/debug/rocker/525400123501/fdb_tbl learned 1 pport 1 addr 00:02:00:00:02:00 vlan 3840 learned 1 pport 2 addr 00:02:00:00:03:00 vlan 3840 > cat /sys/kernel/debug/rocker/525400123501/neigh_tbl 11.0.0.9 dev sw1p2 ref_count 3 index 1 dst 00:02:00:00:01:00 ttl_check 1 11.0.0.1 dev sw1p1 ref_count 3 index 0 dst 00:02:00:00:00:00 ttl_check 1 > cat /sys/kernel/debug/rocker/525400123501/of_dpa_flow_tbl cmd 3 cookie 15 priority 3 tbl aclin_pport 2 01:80:c2:00:00:00/ff:ff:ff:ff:ff:f0 eth_type 0x vlan_id 3841 ip proto 0/0 ip tos 0/0 group_id 0x0f01 cmd 3 cookie 2 priority 0 tbl term_mac in_pport 1 eth_type 0x0800 52:54:00:12:35:01 vlan_id 3840 goto_tbl ucast_routing copy_to_cpu 0 cmd 3 cookie 1f priority 3 tbl bridge 00:02:00:00:00:00 vlan_id 3840 tunnel_id 0 goto_tbl acl group_id 0x copy_to_cpu 0 cmd 3 cookie 4 priority 1 tbl vlan in_pport 2 vlan_id 0 goto_tbl term_mac untagged 1 new_vlan_id 3841 cmd 3 cookie 20 priority 0 tbl ucast_routing eth_type 0x0800 11.0.0.1 goto_tbl acl group_id 0x2000 cmd 3 cookie 21 priority 3 tbl bridge 00:02:00:00:01:00 vlan_id 3841 tunnel_id 0 goto_tbl acl group_id 0x copy_to_cpu 0 cmd 3 cookie 16 priority 2 tbl aclin_pport 2 eth_type 0x0806 vlan_id 3841 ip proto 0/0 ip tos 0/0 group_id 0x0f01 cmd 3 cookie 12 priority 0 tbl ucast_routing eth_type 0x0800 11.0.0.0 goto_tbl acl group_id 0x0f00 cmd 3 cookie 9 priority 3 tbl aclin_pport 1 01:80:c2:00:00:00/ff:ff:ff:ff:ff:f0 eth_type 0x vlan_id 3840 ip proto 0/0 ip tos 0/0 group_id 0x0f00 cmd 3 cookie 6 priority 0 tbl term_mac in_pport 2 eth_type 0x86dd 52:54:00:12:35:02 vlan_id 3841 goto_tbl ucast_routing copy_to_cpu 0 cmd 4 cookie 0 priority 1 tbl ig_portin_pport 0/0x goto_tbl vlan cmd 4 cookie e priority 0 tbl ucast_routing eth_type 0x0800 11.0.0.3 goto_tbl acl group_id 0x0f00 cmd 3 cookie 1 priority 1 tbl vlan in_pport 1 vlan_id 0 goto_tbl term_mac untagged 1 new_vlan_id 3840 cmd 3 cookie 24 priority 20 tbl ucast_routing eth_type 0x0800 11.0.0.4/255.255.255.252 goto_tbl acl group_id 0x2000 cmd 4 cookie 14 priority 0 tbl ucast_routing eth_type 0x0800 11.0.0.10 goto_tbl acl group_id 0x0f01 cmd 3 cookie 2c priority 20 tbl ucast_routing eth_type 0x0800 12.0.0.4 goto_tbl acl group_id 0x2001 cmd 3 cookie 17 priority 1 tbl term_mac in_pport 2 eth_type 0x0800 01:00:5e:00:00:00/ff:ff:ff:80:00:00 vlan_id 3841 goto_tbl mcast_routing copy_to_cpu 1 cmd 3 cookie 26 priority 20 tbl ucast_routing eth_type 0x0800 12.0.0.3 goto_tbl acl group_id 0x2000 cmd 3 cookie 2e priority 30 tbl ucast_routing eth_type 0x0800 12.0.0.2 goto_tbl acl group_id 0x0f01 cmd 3 cookie 22 priority 0 tbl ucast_routing eth_type 0x0800 11.0.0.9 goto_tbl acl group_id 0x2001 cmd 3 cookie 1c priority 0 tbl ucast_routing eth_type 0x0800 11.0.0.8/255.255.255.252 goto_tbl acl group_id 0x0f01 cmd 3 cookie 18 priority 1 tbl term_mac in_pport 2 eth_type 0x86dd 33:33:00:00:00:00/ff:ff:00:00:00:00 vlan_id 3841 goto_tbl mcast_routing copy_to_cpu 1 cmd 3 cookie 5 priority 0 tbl term_mac in_pport 2 eth_type 0x0800 52:54:00:12:35:02 vlan_id 3841 goto_tbl ucast_routing copy_to_cpu 0 cmd 3 cookie a priority 2 tbl aclin_pport 1 eth_type 0x0806 vlan_id 3840 ip proto 0/0 ip tos 0/0 group_id 0x0f00 cmd 4 cookie 1a priority 0 tbl ucast_routing eth_type 0x0800 11.0.0.11 goto_tbl acl group_id 0x0f01 cmd 3 cookie 1e priority 0 tbl ucast_routing eth_type 0x0800 11.0.0.8 goto_tbl acl group_id 0x0f01 cmd 3 cookie 3 priority 0 tbl term_mac in_pport 1 eth_type 0x86dd 52:54:00:12:35:01 vlan_id 3840 goto_tbl ucast_routing copy_to_cpu 0 cmd 3 cookie b priority 1 tbl term_mac in_pport 1 eth_type 0x0800 01:00:5e:00:00:00/ff:ff:ff:80:00:00 vlan_id 3840 goto_tbl mcast_routing copy_to_cpu 1 cmd 4 cookie 8 priority 0 tbl ucast_routing eth_type 0x0800 11.0.0.2 goto_tbl acl group_id 0x0f00 cmd 3 cookie 10 priority 0 tbl ucast_routing eth_type 0x0800 11.0.0.0/255.255.255.252 goto_tbl acl group_id 0x0f00 cmd 3 cookie 28 priority 20 tbl ucast_routing eth_type 0x0800 11.0.0.12/255.255.255.252 goto_tbl acl group_id 0x2001 cmd 3 cookie c priority 1 tbl term_mac in_pport 1 eth_type 0x86dd 33:33:00:00:00:00/ff:ff:00:00:00:00 vlan_id 3840 goto_tbl mcast_routing copy_to_cpu 1 > cat /sys/kernel/debug/rocker/525400123501/of_dpa_group_tbl cmd 7 group_id 0x0f00 (L2 interface v
[PATCH net-next v2] rocker: add debugfs support to dump internal tables
From: Scott Feldman > tree /sys/kernel/debug/rocker /sys/kernel/debug/rocker └── 525400123501 ├── fdb_tbl ├── internal_vlan_tbl ├── neigh_tbl ├── of_dpa_flow_tbl └── of_dpa_group_tbl 1 directory, 5 files > cat /sys/kernel/debug/rocker/525400123501/internal_vlan_tbl ifindex 5 ref_count 1 vlan 3843 ifindex 7 ref_count 2 vlan 3840 ifindex 4 ref_count 1 vlan 3842 > cat /sys/kernel/debug/rocker/525400123501/fdb_tbl learned 1 pport 1 addr 00:02:00:00:02:00 vlan 3840 learned 1 pport 2 addr 00:02:00:00:03:00 vlan 3840 > cat /sys/kernel/debug/rocker/525400123501/neigh_tbl 11.0.0.9 dev sw1p2 ref_count 3 index 1 dst 00:02:00:00:01:00 ttl_check 1 11.0.0.1 dev sw1p1 ref_count 3 index 0 dst 00:02:00:00:00:00 ttl_check 1 > cat /sys/kernel/debug/rocker/525400123501/of_dpa_flow_tbl cmd 3 cookie 15 priority 3 tbl aclin_pport 2 01:80:c2:00:00:00/ff:ff:ff:ff:ff:f0 eth_type 0x vlan_id 3841 ip proto 0/0 ip tos 0/0 group_id 0x0f01 cmd 3 cookie 2 priority 0 tbl term_mac in_pport 1 eth_type 0x0800 52:54:00:12:35:01 vlan_id 3840 goto_tbl ucast_routing copy_to_cpu 0 cmd 3 cookie 1f priority 3 tbl bridge 00:02:00:00:00:00 vlan_id 3840 tunnel_id 0 goto_tbl acl group_id 0x copy_to_cpu 0 cmd 3 cookie 4 priority 1 tbl vlan in_pport 2 vlan_id 0 goto_tbl term_mac untagged 1 new_vlan_id 3841 cmd 3 cookie 20 priority 0 tbl ucast_routing eth_type 0x0800 11.0.0.1 goto_tbl acl group_id 0x2000 cmd 3 cookie 21 priority 3 tbl bridge 00:02:00:00:01:00 vlan_id 3841 tunnel_id 0 goto_tbl acl group_id 0x copy_to_cpu 0 cmd 3 cookie 16 priority 2 tbl aclin_pport 2 eth_type 0x0806 vlan_id 3841 ip proto 0/0 ip tos 0/0 group_id 0x0f01 cmd 3 cookie 12 priority 0 tbl ucast_routing eth_type 0x0800 11.0.0.0 goto_tbl acl group_id 0x0f00 cmd 3 cookie 9 priority 3 tbl aclin_pport 1 01:80:c2:00:00:00/ff:ff:ff:ff:ff:f0 eth_type 0x vlan_id 3840 ip proto 0/0 ip tos 0/0 group_id 0x0f00 cmd 3 cookie 6 priority 0 tbl term_mac in_pport 2 eth_type 0x86dd 52:54:00:12:35:02 vlan_id 3841 goto_tbl ucast_routing copy_to_cpu 0 cmd 4 cookie 0 priority 1 tbl ig_portin_pport 0/0x goto_tbl vlan cmd 4 cookie e priority 0 tbl ucast_routing eth_type 0x0800 11.0.0.3 goto_tbl acl group_id 0x0f00 cmd 3 cookie 1 priority 1 tbl vlan in_pport 1 vlan_id 0 goto_tbl term_mac untagged 1 new_vlan_id 3840 cmd 3 cookie 24 priority 20 tbl ucast_routing eth_type 0x0800 11.0.0.4/255.255.255.252 goto_tbl acl group_id 0x2000 cmd 4 cookie 14 priority 0 tbl ucast_routing eth_type 0x0800 11.0.0.10 goto_tbl acl group_id 0x0f01 cmd 3 cookie 2c priority 20 tbl ucast_routing eth_type 0x0800 12.0.0.4 goto_tbl acl group_id 0x2001 cmd 3 cookie 17 priority 1 tbl term_mac in_pport 2 eth_type 0x0800 01:00:5e:00:00:00/ff:ff:ff:80:00:00 vlan_id 3841 goto_tbl mcast_routing copy_to_cpu 1 cmd 3 cookie 26 priority 20 tbl ucast_routing eth_type 0x0800 12.0.0.3 goto_tbl acl group_id 0x2000 cmd 3 cookie 2e priority 30 tbl ucast_routing eth_type 0x0800 12.0.0.2 goto_tbl acl group_id 0x0f01 cmd 3 cookie 22 priority 0 tbl ucast_routing eth_type 0x0800 11.0.0.9 goto_tbl acl group_id 0x2001 cmd 3 cookie 1c priority 0 tbl ucast_routing eth_type 0x0800 11.0.0.8/255.255.255.252 goto_tbl acl group_id 0x0f01 cmd 3 cookie 18 priority 1 tbl term_mac in_pport 2 eth_type 0x86dd 33:33:00:00:00:00/ff:ff:00:00:00:00 vlan_id 3841 goto_tbl mcast_routing copy_to_cpu 1 cmd 3 cookie 5 priority 0 tbl term_mac in_pport 2 eth_type 0x0800 52:54:00:12:35:02 vlan_id 3841 goto_tbl ucast_routing copy_to_cpu 0 cmd 3 cookie a priority 2 tbl aclin_pport 1 eth_type 0x0806 vlan_id 3840 ip proto 0/0 ip tos 0/0 group_id 0x0f00 cmd 4 cookie 1a priority 0 tbl ucast_routing eth_type 0x0800 11.0.0.11 goto_tbl acl group_id 0x0f01 cmd 3 cookie 1e priority 0 tbl ucast_routing eth_type 0x0800 11.0.0.8 goto_tbl acl group_id 0x0f01 cmd 3 cookie 3 priority 0 tbl term_mac in_pport 1 eth_type 0x86dd 52:54:00:12:35:01 vlan_id 3840 goto_tbl ucast_routing copy_to_cpu 0 cmd 3 cookie b priority 1 tbl term_mac in_pport 1 eth_type 0x0800 01:00:5e:00:00:00/ff:ff:ff:80:00:00 vlan_id 3840 goto_tbl mcast_routing copy_to_cpu 1 cmd 4 cookie 8 priority 0 tbl ucast_routing eth_type 0x0800 11.0.0.2 goto_tbl acl group_id 0x0f00 cmd 3 cookie 10 priority 0 tbl ucast_routing eth_type 0x0800 11.0.0.0/255.255.255.252 goto_tbl acl group_id 0x0f00 cmd 3 cookie 28 priority 20 tbl ucast_routing eth_type 0x0800 11.0.0.12/255.255.255.252 goto_tbl acl group_id 0x2001 cmd 3 cookie c priority 1 tbl term_mac in_pport 1 eth_type 0x86dd 33:33:00:00:00:00/ff:ff:00:00:00:00 vlan_id 3840 goto_tbl mcast_routing copy_to_cpu 1 > cat /sys/kernel/debug/rocker/525400123501/of_dpa_group_tbl cmd 7 group_id 0x0f00 (L2 interface v
[PATCH net-next] rocker: add debugfs support to dump internal tables
From: Scott Feldman > tree /sys/kernel/debug/rocker /sys/kernel/debug/rocker └── 525400123501 ├── fdb_tbl ├── internal_vlan_tbl ├── neigh_tbl ├── of_dpa_flow_tbl └── of_dpa_group_tbl 1 directory, 5 files > cat /sys/kernel/debug/rocker/525400123501/internal_vlan_tbl ifindex 5 ref_count 1 vlan 3843 ifindex 7 ref_count 2 vlan 3840 ifindex 4 ref_count 1 vlan 3842 > cat /sys/kernel/debug/rocker/525400123501/fdb_tbl learned 1 pport 1 addr 00:02:00:00:02:00 vlan 3840 learned 1 pport 2 addr 00:02:00:00:03:00 vlan 3840 > cat /sys/kernel/debug/rocker/525400123501/neigh_tbl 11.0.0.9 dev sw1p2 ref_count 3 index 1 dst 00:02:00:00:01:00 ttl_check 1 11.0.0.1 dev sw1p1 ref_count 3 index 0 dst 00:02:00:00:00:00 ttl_check 1 > cat /sys/kernel/debug/rocker/525400123501/of_dpa_flow_tbl cmd 3 cookie 15 priority 3 tbl aclin_pport 2 01:80:c2:00:00:00/ff:ff:ff:ff:ff:f0 eth_type 0x vlan_id 3841 ip proto 0/0 ip tos 0/0 group_id 0x0f01 cmd 3 cookie 2 priority 0 tbl term_mac in_pport 1 eth_type 0x0800 52:54:00:12:35:01 vlan_id 3840 goto_tbl ucast_routing copy_to_cpu 0 cmd 3 cookie 1f priority 3 tbl bridge 00:02:00:00:00:00 vlan_id 3840 tunnel_id 0 goto_tbl acl group_id 0x copy_to_cpu 0 cmd 3 cookie 4 priority 1 tbl vlan in_pport 2 vlan_id 0 goto_tbl term_mac untagged 1 new_vlan_id 3841 cmd 3 cookie 20 priority 0 tbl ucast_routing eth_type 0x0800 11.0.0.1 goto_tbl acl group_id 0x2000 cmd 3 cookie 21 priority 3 tbl bridge 00:02:00:00:01:00 vlan_id 3841 tunnel_id 0 goto_tbl acl group_id 0x copy_to_cpu 0 cmd 3 cookie 16 priority 2 tbl aclin_pport 2 eth_type 0x0806 vlan_id 3841 ip proto 0/0 ip tos 0/0 group_id 0x0f01 cmd 3 cookie 12 priority 0 tbl ucast_routing eth_type 0x0800 11.0.0.0 goto_tbl acl group_id 0x0f00 cmd 3 cookie 9 priority 3 tbl aclin_pport 1 01:80:c2:00:00:00/ff:ff:ff:ff:ff:f0 eth_type 0x vlan_id 3840 ip proto 0/0 ip tos 0/0 group_id 0x0f00 cmd 3 cookie 6 priority 0 tbl term_mac in_pport 2 eth_type 0x86dd 52:54:00:12:35:02 vlan_id 3841 goto_tbl ucast_routing copy_to_cpu 0 cmd 4 cookie 0 priority 1 tbl ig_portin_pport 0/0x goto_tbl vlan cmd 4 cookie e priority 0 tbl ucast_routing eth_type 0x0800 11.0.0.3 goto_tbl acl group_id 0x0f00 cmd 3 cookie 1 priority 1 tbl vlan in_pport 1 vlan_id 0 goto_tbl term_mac untagged 1 new_vlan_id 3840 cmd 3 cookie 24 priority 20 tbl ucast_routing eth_type 0x0800 11.0.0.4/255.255.255.252 goto_tbl acl group_id 0x2000 cmd 4 cookie 14 priority 0 tbl ucast_routing eth_type 0x0800 11.0.0.10 goto_tbl acl group_id 0x0f01 cmd 3 cookie 2c priority 20 tbl ucast_routing eth_type 0x0800 12.0.0.4 goto_tbl acl group_id 0x2001 cmd 3 cookie 17 priority 1 tbl term_mac in_pport 2 eth_type 0x0800 01:00:5e:00:00:00/ff:ff:ff:80:00:00 vlan_id 3841 goto_tbl mcast_routing copy_to_cpu 1 cmd 3 cookie 26 priority 20 tbl ucast_routing eth_type 0x0800 12.0.0.3 goto_tbl acl group_id 0x2000 cmd 3 cookie 2e priority 30 tbl ucast_routing eth_type 0x0800 12.0.0.2 goto_tbl acl group_id 0x0f01 cmd 3 cookie 22 priority 0 tbl ucast_routing eth_type 0x0800 11.0.0.9 goto_tbl acl group_id 0x2001 cmd 3 cookie 1c priority 0 tbl ucast_routing eth_type 0x0800 11.0.0.8/255.255.255.252 goto_tbl acl group_id 0x0f01 cmd 3 cookie 18 priority 1 tbl term_mac in_pport 2 eth_type 0x86dd 33:33:00:00:00:00/ff:ff:00:00:00:00 vlan_id 3841 goto_tbl mcast_routing copy_to_cpu 1 cmd 3 cookie 5 priority 0 tbl term_mac in_pport 2 eth_type 0x0800 52:54:00:12:35:02 vlan_id 3841 goto_tbl ucast_routing copy_to_cpu 0 cmd 3 cookie a priority 2 tbl aclin_pport 1 eth_type 0x0806 vlan_id 3840 ip proto 0/0 ip tos 0/0 group_id 0x0f00 cmd 4 cookie 1a priority 0 tbl ucast_routing eth_type 0x0800 11.0.0.11 goto_tbl acl group_id 0x0f01 cmd 3 cookie 1e priority 0 tbl ucast_routing eth_type 0x0800 11.0.0.8 goto_tbl acl group_id 0x0f01 cmd 3 cookie 3 priority 0 tbl term_mac in_pport 1 eth_type 0x86dd 52:54:00:12:35:01 vlan_id 3840 goto_tbl ucast_routing copy_to_cpu 0 cmd 3 cookie b priority 1 tbl term_mac in_pport 1 eth_type 0x0800 01:00:5e:00:00:00/ff:ff:ff:80:00:00 vlan_id 3840 goto_tbl mcast_routing copy_to_cpu 1 cmd 4 cookie 8 priority 0 tbl ucast_routing eth_type 0x0800 11.0.0.2 goto_tbl acl group_id 0x0f00 cmd 3 cookie 10 priority 0 tbl ucast_routing eth_type 0x0800 11.0.0.0/255.255.255.252 goto_tbl acl group_id 0x0f00 cmd 3 cookie 28 priority 20 tbl ucast_routing eth_type 0x0800 11.0.0.12/255.255.255.252 goto_tbl acl group_id 0x2001 cmd 3 cookie c priority 1 tbl term_mac in_pport 1 eth_type 0x86dd 33:33:00:00:00:00/ff:ff:00:00:00:00 vlan_id 3840 goto_tbl mcast_routing copy_to_cpu 1 > cat /sys/kernel/debug/rocker/525400123501/of_dpa_group_tbl cmd 7 group_id 0x0f00 (L2 interface v
[PATCH net-next] rocker: hook ndo_neigh_destroy to cleanup neigh refs in driver
From: Scott Feldman Rocker driver tracks arp_tbl neighs to resolve IPv4 route nexthops. The driver uses NETEVENT_NEIGH_UPDATE for neigh adds and updates, but there is no event when the neigh is removed from the device (such as when the device goes admin down). This patches hooks ndo_neigh_destroy so the driver can know when a neigh is removed from the device. In response, the driver will purge the neigh entry from its internal tbl. I didn't find an in-tree users of ndo_neigh_destroy, so I'm not sure if this ndo is vestigial or if there are out-of-tree users. In any case, it does what I need here. An alternative design would be to generate NETEVENT_NEIGH_UPDATE event when neigh is being destroyed, setting state to NUD_NONE so driver knows neigh entry is dead. Signed-off-by: Scott Feldman --- Documentation/networking/switchdev.txt |3 ++- drivers/net/ethernet/rocker/rocker.c | 11 +++ 2 files changed, 13 insertions(+), 1 deletion(-) diff --git a/Documentation/networking/switchdev.txt b/Documentation/networking/switchdev.txt index 9825f32..476df04 100644 --- a/Documentation/networking/switchdev.txt +++ b/Documentation/networking/switchdev.txt @@ -367,4 +367,5 @@ driver's rocker_port_ipv4_resolve() for an example. The driver can monitor for updates to arp_tbl using the netevent notifier NETEVENT_NEIGH_UPDATE. The device can be programmed with resolved nexthops -for the routes as arp_tbl updates. +for the routes as arp_tbl updates. The driver implements ndo_neigh_destroy +to know when arp_tbl neighbor entries are purged from the port. diff --git a/drivers/net/ethernet/rocker/rocker.c b/drivers/net/ethernet/rocker/rocker.c index af05075..619b2e2 100644 --- a/drivers/net/ethernet/rocker/rocker.c +++ b/drivers/net/ethernet/rocker/rocker.c @@ -4264,6 +4264,16 @@ static int rocker_port_change_proto_down(struct net_device *dev, return 0; } +static void rocker_port_neigh_destroy(struct neighbour *n) +{ + struct rocker_port *rocker_port = netdev_priv(n->dev); + int flags = ROCKER_OP_FLAG_REMOVE | ROCKER_OP_FLAG_NOWAIT; + __be32 ip_addr = *(__be32 *)n->primary_key; + + rocker_port_ipv4_neigh(rocker_port, SWITCHDEV_TRANS_NONE, + flags, ip_addr, n->ha); +} + static const struct net_device_ops rocker_port_netdev_ops = { .ndo_open = rocker_port_open, .ndo_stop = rocker_port_stop, @@ -4278,6 +4288,7 @@ static const struct net_device_ops rocker_port_netdev_ops = { .ndo_fdb_dump = switchdev_port_fdb_dump, .ndo_get_phys_port_name = rocker_port_get_phys_port_name, .ndo_change_proto_down = rocker_port_change_proto_down, + .ndo_neigh_destroy = rocker_port_neigh_destroy, }; / -- 1.7.10.4 -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH net-next] rocker: print switch ID consistent with phys_switch_id sysfs node
From: Scott Feldman On sucessful probe, driver prints the switch ID. This patch changes the format of the printed ID to match what's used in sysfs phys_switch_id node. Signed-off-by: Scott Feldman --- drivers/net/ethernet/rocker/rocker.c |3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/rocker/rocker.c b/drivers/net/ethernet/rocker/rocker.c index 619b2e2..d963cdc 100644 --- a/drivers/net/ethernet/rocker/rocker.c +++ b/drivers/net/ethernet/rocker/rocker.c @@ -5193,7 +5193,8 @@ static int rocker_probe(struct pci_dev *pdev, const struct pci_device_id *id) goto err_probe_ports; } - dev_info(&pdev->dev, "Rocker switch with id %016llx\n", rocker->hw.id); + dev_info(&pdev->dev, "Rocker switch with id %*phN\n", +(int)sizeof(rocker->hw.id), &rocker->hw.id); return 0; -- 1.7.10.4 -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH net-next 2/2] rocker: use netdev_err after register_netdev
From: Scott Feldman After successful register_netdev, we can use netdev_err rather the more generic dev_err. Signed-off-by: Scott Feldman --- drivers/net/ethernet/rocker/rocker.c |2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/rocker/rocker.c b/drivers/net/ethernet/rocker/rocker.c index 0ab3a3b..4e8cad0 100644 --- a/drivers/net/ethernet/rocker/rocker.c +++ b/drivers/net/ethernet/rocker/rocker.c @@ -4985,7 +4985,7 @@ static int rocker_probe_port(struct rocker *rocker, unsigned int port_number) err = rocker_port_ig_tbl(rocker_port, SWITCHDEV_TRANS_NONE, 0); if (err) { - dev_err(&pdev->dev, "install ig port table failed\n"); + netdev_err(rocker_port->dev, "install ig port table failed\n"); goto err_port_ig_tbl; } -- 1.7.10.4 -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH net-next 1/2] rocker: NULL port if port probe fails
From: Scott Feldman Set port to NULL if port probe fails so we don't try to remove partially initialized port on port probe err cleanup path. Signed-off-by: Scott Feldman --- drivers/net/ethernet/rocker/rocker.c |1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/rocker/rocker.c b/drivers/net/ethernet/rocker/rocker.c index 7b4c347..0ab3a3b 100644 --- a/drivers/net/ethernet/rocker/rocker.c +++ b/drivers/net/ethernet/rocker/rocker.c @@ -5005,6 +5005,7 @@ err_untagged_vlan: rocker_port_ig_tbl(rocker_port, SWITCHDEV_TRANS_NONE, ROCKER_OP_FLAG_REMOVE); err_port_ig_tbl: + rocker->ports[port_number] = NULL; unregister_netdev(dev); err_register_netdev: free_netdev(dev); -- 1.7.10.4 -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH net-next v3 4/5] rocker: add offload_fwd_mark support
From: Scott Feldman If device flags ingress packet as "fwd offload", mark the skb->offlaod_fwd_mark using the ingress port's dev->offlaod_fwd_mark. This will be the hint to the kernel that this packet has already been forwarded by device to egress ports matching skb->offlaod_fwd_mark. For rocker, derive port dev->offlaod_fwd_mark based on device switch ID and port ifindex. If port is bridged, use the bridge ifindex rather than the port ifindex. Signed-off-by: Scott Feldman Acked-by: Jiri Pirko --- drivers/net/ethernet/rocker/rocker.c | 11 +++ drivers/net/ethernet/rocker/rocker.h |1 + 2 files changed, 12 insertions(+) diff --git a/drivers/net/ethernet/rocker/rocker.c b/drivers/net/ethernet/rocker/rocker.c index 9324283..0fdfa47 100644 --- a/drivers/net/ethernet/rocker/rocker.c +++ b/drivers/net/ethernet/rocker/rocker.c @@ -4800,6 +4800,7 @@ static int rocker_port_rx_proc(const struct rocker *rocker, const struct rocker_tlv *attrs[ROCKER_TLV_RX_MAX + 1]; struct sk_buff *skb = rocker_desc_cookie_ptr_get(desc_info); size_t rx_len; + u16 rx_flags = 0; if (!skb) return -ENOENT; @@ -4807,6 +4808,8 @@ static int rocker_port_rx_proc(const struct rocker *rocker, rocker_tlv_parse_desc(attrs, ROCKER_TLV_RX_MAX, desc_info); if (!attrs[ROCKER_TLV_RX_FRAG_LEN]) return -EINVAL; + if (attrs[ROCKER_TLV_RX_FLAGS]) + rx_flags = rocker_tlv_get_u16(attrs[ROCKER_TLV_RX_FLAGS]); rocker_dma_rx_ring_skb_unmap(rocker, attrs); @@ -4814,6 +4817,9 @@ static int rocker_port_rx_proc(const struct rocker *rocker, skb_put(skb, rx_len); skb->protocol = eth_type_trans(skb, rocker_port->dev); + if (rx_flags & ROCKER_RX_FLAGS_FWD_OFFLOAD) + skb->offload_fwd_mark = rocker_port->dev->offload_fwd_mark; + rocker_port->dev->stats.rx_packets++; rocker_port->dev->stats.rx_bytes += skb->len; @@ -4951,6 +4957,8 @@ static int rocker_probe_port(struct rocker *rocker, unsigned int port_number) } rocker->ports[port_number] = rocker_port; + switchdev_port_fwd_mark_set(rocker_port->dev, NULL, false); + rocker_port_set_learning(rocker_port, SWITCHDEV_TRANS_NONE); err = rocker_port_ig_tbl(rocker_port, SWITCHDEV_TRANS_NONE, 0); @@ -5230,6 +5238,7 @@ static int rocker_port_bridge_join(struct rocker_port *rocker_port, rocker_port_internal_vlan_id_get(rocker_port, bridge->ifindex); rocker_port->bridge_dev = bridge; + switchdev_port_fwd_mark_set(rocker_port->dev, bridge, true); return rocker_port_vlan_add(rocker_port, SWITCHDEV_TRANS_NONE, untagged_vid, 0); @@ -5250,6 +5259,8 @@ static int rocker_port_bridge_leave(struct rocker_port *rocker_port) rocker_port_internal_vlan_id_get(rocker_port, rocker_port->dev->ifindex); + switchdev_port_fwd_mark_set(rocker_port->dev, rocker_port->bridge_dev, + false); rocker_port->bridge_dev = NULL; err = rocker_port_vlan_add(rocker_port, SWITCHDEV_TRANS_NONE, diff --git a/drivers/net/ethernet/rocker/rocker.h b/drivers/net/ethernet/rocker/rocker.h index 08b2c3d..12490b2 100644 --- a/drivers/net/ethernet/rocker/rocker.h +++ b/drivers/net/ethernet/rocker/rocker.h @@ -246,6 +246,7 @@ enum { #define ROCKER_RX_FLAGS_TCPBIT(5) #define ROCKER_RX_FLAGS_UDPBIT(6) #define ROCKER_RX_FLAGS_TCP_UDP_CSUM_GOOD BIT(7) +#define ROCKER_RX_FLAGS_FWD_OFFLOADBIT(8) enum { ROCKER_TLV_TX_UNSPEC, -- 1.7.10.4 -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH net-next v3 5/5] switchdev: update documentation for offload_fwd_mark
From: Scott Feldman Signed-off-by: Scott Feldman Acked-by: Jiri Pirko --- Documentation/networking/switchdev.txt | 14 -- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/Documentation/networking/switchdev.txt b/Documentation/networking/switchdev.txt index c5d7ade..9825f32 100644 --- a/Documentation/networking/switchdev.txt +++ b/Documentation/networking/switchdev.txt @@ -279,8 +279,18 @@ and unknown unicast packets to all ports in domain, if allowed by port's current STP state. The switch driver, knowing which ports are within which vlan L2 domain, can program the switch device for flooding. The packet should also be sent to the port netdev for processing by the bridge driver. The -bridge should not reflood the packet to the same ports the device flooded. -XXX: the mechanism to avoid duplicate flood packets is being discuseed. +bridge should not reflood the packet to the same ports the device flooded, +otherwise there will be duplicate packets on the wire. + +To avoid duplicate packets, the device/driver should mark a packet as already +forwarded using skb->offload_fwd_mark. The same mark is set on the device +ports in the domain using dev->offload_fwd_mark. If the skb->offload_fwd_mark +is non-zero and matches the forwarding egress port's dev->skb_mark, the kernel +will drop the skb right before transmit on the egress port, with the +understanding that the device already forwarded the packet on same egress port. +The driver can use switchdev_port_fwd_mark_set() to set a globally unique mark +for port's dev->offload_fwd_mark, based on the port's parent ID (switch ID) and +a group ifindex. It is possible for the switch device to not handle flooding and push the packets up to the bridge driver for flooding. This is not ideal as the number -- 1.7.10.4 -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH net-next v3 2/5] net: add phys ID compare helper to test if two IDs are the same
From: Scott Feldman Signed-off-by: Scott Feldman Acked-by: Jiri Pirko --- include/linux/netdevice.h |7 +++ net/switchdev/switchdev.c |8 ++-- 2 files changed, 9 insertions(+), 6 deletions(-) diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 8364f29..607b5f4 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -766,6 +766,13 @@ struct netdev_phys_item_id { unsigned char id_len; }; +static inline bool netdev_phys_item_id_same(struct netdev_phys_item_id *a, + struct netdev_phys_item_id *b) +{ + return a->id_len == b->id_len && + memcmp(a->id, b->id, a->id_len) == 0; +} + typedef u16 (*select_queue_fallback_t)(struct net_device *dev, struct sk_buff *skb); diff --git a/net/switchdev/switchdev.c b/net/switchdev/switchdev.c index 9f2add3..4e5bba5 100644 --- a/net/switchdev/switchdev.c +++ b/net/switchdev/switchdev.c @@ -910,13 +910,9 @@ static struct net_device *switchdev_get_dev_by_nhs(struct fib_info *fi) if (switchdev_port_attr_get(dev, &attr)) return NULL; - if (nhsel > 0) { - if (prev_attr.u.ppid.id_len != attr.u.ppid.id_len) + if (nhsel > 0 && + !netdev_phys_item_id_same(&prev_attr.u.ppid, &attr.u.ppid)) return NULL; - if (memcmp(prev_attr.u.ppid.id, attr.u.ppid.id, - attr.u.ppid.id_len)) - return NULL; - } prev_attr = attr; } -- 1.7.10.4 -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH net-next v3 0/5] switchdev: avoid duplicate packet forwarding
From: Scott Feldman v3: - Per Nicolas Dichtel review: remove errant empty union. v2: - Per davem review: in sk_buff, union fwd_mark with secmark to save space since features appear to be mutually exclusive. - Per Simon Horman review: - fix grammar in switchdev.txt wrt fwd_mark - remove some unrelated changes that snuck in v1: This patchset was previously submitted as RFC. No changes from the last version (v2) sent under RFC. Including RFC version history here for reference. RFC v2: - s/fwd_mark/offload_fwd_mark - use consume_skb rather than kfree_skb when dropping pkt on egress. - Use Jiri's suggestion to use ifindex of one of the ports in a group as the mark for all the ports in the group. This can be done with no additional storage (no hashtable from v1). To pull it off, we need some simple recursive routines to walk the netdev tree ensuring all leaves in the tree (ports) in the same group (e.g. bridge) belonging to the same switch device will have the same offload fwd mark. Maybe someone sees a better design for the recusive routines? They're not too bad, and should cover the stacked driver cases. RFC v1: With switchdev support for offloading L2/L3 forwarding data path to a switch device, we have a general problem where both the device and the kernel may forward the packet, resulting in duplicate packets on the wire. Anytime a packet is forwarded by the device and a copy is sent to the CPU, there is potential for duplicate forwarding, as the kernel may also do a forwarding lookup and send the packet on the wire. The specific problem this patch series is interested in solving is avoiding duplicate packets on bridged ports. There was a previous RFC from Roopa (http://marc.info/?l=linux-netdev&m=142687073314252&w=2) to address this problem, but didn't solve the problem of mixed ports in the bridge from different devices; there was no way to exclude some ports from forwarding and include others. This RFC solves that problem by tagging the ingressing packet with a unique mark, and then comparing the packet mark with the egress port mark, and skip forwarding when there is a match. For the mixed ports bridge case, only those ports with matching marks are skipped. The switchdev port driver must do two things: 1) Generate a fwd_mark for each switch port, using some unique key of the switch device (and optionally port). This is done when the port netdev is registered or if the port's group membership changes (joins/leaves a bridge, for example). 2) On packet ingress from port, mark the skb with the ingress port's fwd_mark. If the device supports it, it's useful to only mark skbs which were already forwarded by the device. If the device does not support such indication, all skbs can be marked, even if they're local dst. Two new 32-bit fields are added to struct sk_buff and struct netdevice to hold the fwd_mark. I've wrapped these with CONFIG_NET_SWITCHDEV for now. I tried using skb->mark for this purpose, but ebtables can overwrite the skb->mark before the bridge gets it, so that will not work. In general, this fwd_mark can be used for any case where a packet is forwarded by the device and a copy is sent to the CPU, to avoid the kernel re-forwarding the packet. sFlow is another use-case that comes to mind, but I haven't explored the details. Scott Feldman (5): net: don't reforward packets already forwarded by offload device net: add phys ID compare helper to test if two IDs are the same switchdev: add offload_fwd_mark generator helper rocker: add offload_fwd_mark support switchdev: update documentation for offload_fwd_mark Documentation/networking/switchdev.txt | 14 +++- drivers/net/ethernet/rocker/rocker.c | 11 drivers/net/ethernet/rocker/rocker.h |1 + include/linux/netdevice.h | 13 include/linux/skbuff.h |9 ++- include/net/switchdev.h|9 +++ net/core/dev.c | 10 +++ net/switchdev/switchdev.c | 111 ++-- 8 files changed, 169 insertions(+), 9 deletions(-) -- 1.7.10.4 -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH net-next v3 1/5] net: don't reforward packets already forwarded by offload device
From: Scott Feldman Just before queuing skb for xmit on port, check if skb has been marked by switchdev port driver as already fordwarded by device. If so, drop skb. A non-zero skb->offload_fwd_mark field is set by the switchdev port driver/device on ingress to indicate the skb has already been forwarded by the device to egress ports with matching dev->skb_mark. The switchdev port driver would assign a non-zero dev->offload_skb_mark for each device port netdev during registration, for example. Signed-off-by: Scott Feldman Acked-by: Jiri Pirko Acked-by: Roopa Prabhu --- include/linux/netdevice.h |6 ++ include/linux/skbuff.h|9 - net/core/dev.c| 10 ++ 3 files changed, 24 insertions(+), 1 deletion(-) diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 45cfd79..8364f29 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1456,6 +1456,8 @@ enum netdev_priv_flags { * * @xps_maps: XXX: need comments on this one * + * @offload_fwd_mark: Offload device fwding mark + * * @trans_start: Time (in jiffies) of last Tx * @watchdog_timeo:Represents the timeout that is used by * the watchdog ( see dev_watchdog() ) @@ -1697,6 +1699,10 @@ struct net_device { struct xps_dev_maps __rcu *xps_maps; #endif +#ifdef CONFIG_NET_SWITCHDEV + u32 offload_fwd_mark; +#endif + /* These may be needed for future network-power-down code. */ /* diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index d6cdd6e..af7a096 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -506,6 +506,7 @@ static inline u32 skb_mstamp_us_delta(const struct skb_mstamp *t1, * @no_fcs: Request NIC to treat last 4 bytes as Ethernet FCS *@napi_id: id of the NAPI struct this skb came from * @secmark: security marking + * @offload_fwd_mark: fwding offload mark * @mark: Generic packet mark * @vlan_proto: vlan encapsulation protocol * @vlan_tci: vlan tag control information @@ -650,9 +651,15 @@ struct sk_buff { unsigned intsender_cpu; }; #endif + union { #ifdef CONFIG_NETWORK_SECMARK - __u32 secmark; + __u32 secmark; +#endif +#ifdef CONFIG_NET_SWITCHDEV + __u32 offload_fwd_mark; #endif + }; + union { __u32 mark; __u32 reserved_tailroom; diff --git a/net/core/dev.c b/net/core/dev.c index 8810b6b..2ee15af 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -3061,6 +3061,16 @@ static int __dev_queue_xmit(struct sk_buff *skb, void *accel_priv) else skb_dst_force(skb); +#ifdef CONFIG_NET_SWITCHDEV + /* Don't forward if offload device already forwarded */ + if (skb->offload_fwd_mark && + skb->offload_fwd_mark == dev->offload_fwd_mark) { + consume_skb(skb); + rc = NET_XMIT_SUCCESS; + goto out; + } +#endif + txq = netdev_pick_tx(dev, skb, accel_priv); q = rcu_dereference_bh(txq->qdisc); -- 1.7.10.4 -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH net-next v3 3/5] switchdev: add offload_fwd_mark generator helper
From: Scott Feldman skb->offload_fwd_mark and dev->offload_fwd_mark are 32-bit and should be unique for device and may even be unique for a sub-set of ports within device, so add switchdev helper function to generate unique marks based on port's switch ID and group_ifindex. group_ifindex would typically be the container dev's ifindex, such as the bridge's ifindex. The generator uses a global hash table to store offload_fwd_marks hashed by {switch ID, group_ifindex} key. Signed-off-by: Scott Feldman Acked-by: Jiri Pirko --- include/net/switchdev.h |9 net/switchdev/switchdev.c | 103 + 2 files changed, 112 insertions(+) diff --git a/include/net/switchdev.h b/include/net/switchdev.h index d5671f1..89da893 100644 --- a/include/net/switchdev.h +++ b/include/net/switchdev.h @@ -157,6 +157,9 @@ int switchdev_port_fdb_del(struct ndmsg *ndm, struct nlattr *tb[], int switchdev_port_fdb_dump(struct sk_buff *skb, struct netlink_callback *cb, struct net_device *dev, struct net_device *filter_dev, int idx); +void switchdev_port_fwd_mark_set(struct net_device *dev, +struct net_device *group_dev, +bool joining); #else @@ -271,6 +274,12 @@ static inline int switchdev_port_fdb_dump(struct sk_buff *skb, return -EOPNOTSUPP; } +static inline void switchdev_port_fwd_mark_set(struct net_device *dev, + struct net_device *group_dev, + bool joining) +{ +} + #endif #endif /* _LINUX_SWITCHDEV_H_ */ diff --git a/net/switchdev/switchdev.c b/net/switchdev/switchdev.c index 4e5bba5..33bafa2 100644 --- a/net/switchdev/switchdev.c +++ b/net/switchdev/switchdev.c @@ -1039,3 +1039,106 @@ void switchdev_fib_ipv4_abort(struct fib_info *fi) fi->fib_net->ipv4.fib_offload_disabled = true; } EXPORT_SYMBOL_GPL(switchdev_fib_ipv4_abort); + +static bool switchdev_port_same_parent_id(struct net_device *a, + struct net_device *b) +{ + struct switchdev_attr a_attr = { + .id = SWITCHDEV_ATTR_PORT_PARENT_ID, + .flags = SWITCHDEV_F_NO_RECURSE, + }; + struct switchdev_attr b_attr = { + .id = SWITCHDEV_ATTR_PORT_PARENT_ID, + .flags = SWITCHDEV_F_NO_RECURSE, + }; + + if (switchdev_port_attr_get(a, &a_attr) || + switchdev_port_attr_get(b, &b_attr)) + return false; + + return netdev_phys_item_id_same(&a_attr.u.ppid, &b_attr.u.ppid); +} + +static u32 switchdev_port_fwd_mark_get(struct net_device *dev, + struct net_device *group_dev) +{ + struct net_device *lower_dev; + struct list_head *iter; + + netdev_for_each_lower_dev(group_dev, lower_dev, iter) { + if (lower_dev == dev) + continue; + if (switchdev_port_same_parent_id(dev, lower_dev)) + return lower_dev->offload_fwd_mark; + return switchdev_port_fwd_mark_get(dev, lower_dev); + } + + return dev->ifindex; +} + +static void switchdev_port_fwd_mark_reset(struct net_device *group_dev, + u32 old_mark, u32 *reset_mark) +{ + struct net_device *lower_dev; + struct list_head *iter; + + netdev_for_each_lower_dev(group_dev, lower_dev, iter) { + if (lower_dev->offload_fwd_mark == old_mark) { + if (!*reset_mark) + *reset_mark = lower_dev->ifindex; + lower_dev->offload_fwd_mark = *reset_mark; + } + switchdev_port_fwd_mark_reset(lower_dev, old_mark, reset_mark); + } +} + +/** + * switchdev_port_fwd_mark_set - Set port offload forwarding mark + * + * @dev: port device + * @group_dev: containing device + * @joining: true if dev is joining group; false if leaving group + * + * An ungrouped port's offload mark is just its ifindex. A grouped + * port's (member of a bridge, for example) offload mark is the ifindex + * of one of the ports in the group with the same parent (switch) ID. + * Ports on the same device in the same group will have the same mark. + * + * Example: + * + * br0 ifindex=9 + * sw1p1 ifindex=2 mark=2 + * sw1p2 ifindex=3 mark=2 + * sw2p1 ifindex=4 mark=5 + * sw2p2 ifindex=5 mark=5 + * + * If sw2p2 leaves the bridge, we'll have: + * + * br0 ifindex=9 + * sw1p1 ifindex=2 mark=2 + * sw1p2 ifindex=3 mark=2 + * sw2p1 ifindex=4 mark=4 + * sw2p2
[PATCH net-next v2 5/5] switchdev: update documentation for offload_fwd_mark
From: Scott Feldman Signed-off-by: Scott Feldman --- Documentation/networking/switchdev.txt | 14 -- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/Documentation/networking/switchdev.txt b/Documentation/networking/switchdev.txt index c5d7ade..9825f32 100644 --- a/Documentation/networking/switchdev.txt +++ b/Documentation/networking/switchdev.txt @@ -279,8 +279,18 @@ and unknown unicast packets to all ports in domain, if allowed by port's current STP state. The switch driver, knowing which ports are within which vlan L2 domain, can program the switch device for flooding. The packet should also be sent to the port netdev for processing by the bridge driver. The -bridge should not reflood the packet to the same ports the device flooded. -XXX: the mechanism to avoid duplicate flood packets is being discuseed. +bridge should not reflood the packet to the same ports the device flooded, +otherwise there will be duplicate packets on the wire. + +To avoid duplicate packets, the device/driver should mark a packet as already +forwarded using skb->offload_fwd_mark. The same mark is set on the device +ports in the domain using dev->offload_fwd_mark. If the skb->offload_fwd_mark +is non-zero and matches the forwarding egress port's dev->skb_mark, the kernel +will drop the skb right before transmit on the egress port, with the +understanding that the device already forwarded the packet on same egress port. +The driver can use switchdev_port_fwd_mark_set() to set a globally unique mark +for port's dev->offload_fwd_mark, based on the port's parent ID (switch ID) and +a group ifindex. It is possible for the switch device to not handle flooding and push the packets up to the bridge driver for flooding. This is not ideal as the number -- 1.7.10.4 -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH net-next v2 1/5] net: don't reforward packets already forwarded by offload device
From: Scott Feldman Just before queuing skb for xmit on port, check if skb has been marked by switchdev port driver as already fordwarded by device. If so, drop skb. A non-zero skb->offload_fwd_mark field is set by the switchdev port driver/device on ingress to indicate the skb has already been forwarded by the device to egress ports with matching dev->skb_mark. The switchdev port driver would assign a non-zero dev->skb_mark for each device port netdev during registration, for example. Signed-off-by: Scott Feldman --- include/linux/netdevice.h |6 ++ include/linux/skbuff.h| 11 ++- net/core/dev.c| 10 ++ 3 files changed, 26 insertions(+), 1 deletion(-) diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 45cfd79..8364f29 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1456,6 +1456,8 @@ enum netdev_priv_flags { * * @xps_maps: XXX: need comments on this one * + * @offload_fwd_mark: Offload device fwding mark + * * @trans_start: Time (in jiffies) of last Tx * @watchdog_timeo:Represents the timeout that is used by * the watchdog ( see dev_watchdog() ) @@ -1697,6 +1699,10 @@ struct net_device { struct xps_dev_maps __rcu *xps_maps; #endif +#ifdef CONFIG_NET_SWITCHDEV + u32 offload_fwd_mark; +#endif + /* These may be needed for future network-power-down code. */ /* diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index d6cdd6e..2edcf50 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -506,6 +506,7 @@ static inline u32 skb_mstamp_us_delta(const struct skb_mstamp *t1, * @no_fcs: Request NIC to treat last 4 bytes as Ethernet FCS *@napi_id: id of the NAPI struct this skb came from * @secmark: security marking + * @offload_fwd_mark: fwding offload mark * @mark: Generic packet mark * @vlan_proto: vlan encapsulation protocol * @vlan_tci: vlan tag control information @@ -650,9 +651,17 @@ struct sk_buff { unsigned intsender_cpu; }; #endif + union { #ifdef CONFIG_NETWORK_SECMARK - __u32 secmark; + __u32 secmark; +#endif +#ifdef CONFIG_NET_SWITCHDEV + __u32 offload_fwd_mark; #endif + }; + + union {}; + union { __u32 mark; __u32 reserved_tailroom; diff --git a/net/core/dev.c b/net/core/dev.c index 8810b6b..2ee15af 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -3061,6 +3061,16 @@ static int __dev_queue_xmit(struct sk_buff *skb, void *accel_priv) else skb_dst_force(skb); +#ifdef CONFIG_NET_SWITCHDEV + /* Don't forward if offload device already forwarded */ + if (skb->offload_fwd_mark && + skb->offload_fwd_mark == dev->offload_fwd_mark) { + consume_skb(skb); + rc = NET_XMIT_SUCCESS; + goto out; + } +#endif + txq = netdev_pick_tx(dev, skb, accel_priv); q = rcu_dereference_bh(txq->qdisc); -- 1.7.10.4 -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH net-next v2 3/5] switchdev: add offload_fwd_mark generator helper
From: Scott Feldman skb->offload_fwd_mark and dev->offload_fwd_mark are 32-bit and should be unique for device and may even be unique for a sub-set of ports within device, so add switchdev helper function to generate unique marks based on port's switch ID and group_ifindex. group_ifindex would typically be the container dev's ifindex, such as the bridge's ifindex. The generator uses a global hash table to store offload_fwd_marks hashed by {switch ID, group_ifindex} key. Signed-off-by: Scott Feldman --- include/net/switchdev.h |9 net/switchdev/switchdev.c | 103 + 2 files changed, 112 insertions(+) diff --git a/include/net/switchdev.h b/include/net/switchdev.h index d5671f1..89da893 100644 --- a/include/net/switchdev.h +++ b/include/net/switchdev.h @@ -157,6 +157,9 @@ int switchdev_port_fdb_del(struct ndmsg *ndm, struct nlattr *tb[], int switchdev_port_fdb_dump(struct sk_buff *skb, struct netlink_callback *cb, struct net_device *dev, struct net_device *filter_dev, int idx); +void switchdev_port_fwd_mark_set(struct net_device *dev, +struct net_device *group_dev, +bool joining); #else @@ -271,6 +274,12 @@ static inline int switchdev_port_fdb_dump(struct sk_buff *skb, return -EOPNOTSUPP; } +static inline void switchdev_port_fwd_mark_set(struct net_device *dev, + struct net_device *group_dev, + bool joining) +{ +} + #endif #endif /* _LINUX_SWITCHDEV_H_ */ diff --git a/net/switchdev/switchdev.c b/net/switchdev/switchdev.c index 4e5bba5..33bafa2 100644 --- a/net/switchdev/switchdev.c +++ b/net/switchdev/switchdev.c @@ -1039,3 +1039,106 @@ void switchdev_fib_ipv4_abort(struct fib_info *fi) fi->fib_net->ipv4.fib_offload_disabled = true; } EXPORT_SYMBOL_GPL(switchdev_fib_ipv4_abort); + +static bool switchdev_port_same_parent_id(struct net_device *a, + struct net_device *b) +{ + struct switchdev_attr a_attr = { + .id = SWITCHDEV_ATTR_PORT_PARENT_ID, + .flags = SWITCHDEV_F_NO_RECURSE, + }; + struct switchdev_attr b_attr = { + .id = SWITCHDEV_ATTR_PORT_PARENT_ID, + .flags = SWITCHDEV_F_NO_RECURSE, + }; + + if (switchdev_port_attr_get(a, &a_attr) || + switchdev_port_attr_get(b, &b_attr)) + return false; + + return netdev_phys_item_id_same(&a_attr.u.ppid, &b_attr.u.ppid); +} + +static u32 switchdev_port_fwd_mark_get(struct net_device *dev, + struct net_device *group_dev) +{ + struct net_device *lower_dev; + struct list_head *iter; + + netdev_for_each_lower_dev(group_dev, lower_dev, iter) { + if (lower_dev == dev) + continue; + if (switchdev_port_same_parent_id(dev, lower_dev)) + return lower_dev->offload_fwd_mark; + return switchdev_port_fwd_mark_get(dev, lower_dev); + } + + return dev->ifindex; +} + +static void switchdev_port_fwd_mark_reset(struct net_device *group_dev, + u32 old_mark, u32 *reset_mark) +{ + struct net_device *lower_dev; + struct list_head *iter; + + netdev_for_each_lower_dev(group_dev, lower_dev, iter) { + if (lower_dev->offload_fwd_mark == old_mark) { + if (!*reset_mark) + *reset_mark = lower_dev->ifindex; + lower_dev->offload_fwd_mark = *reset_mark; + } + switchdev_port_fwd_mark_reset(lower_dev, old_mark, reset_mark); + } +} + +/** + * switchdev_port_fwd_mark_set - Set port offload forwarding mark + * + * @dev: port device + * @group_dev: containing device + * @joining: true if dev is joining group; false if leaving group + * + * An ungrouped port's offload mark is just its ifindex. A grouped + * port's (member of a bridge, for example) offload mark is the ifindex + * of one of the ports in the group with the same parent (switch) ID. + * Ports on the same device in the same group will have the same mark. + * + * Example: + * + * br0 ifindex=9 + * sw1p1 ifindex=2 mark=2 + * sw1p2 ifindex=3 mark=2 + * sw2p1 ifindex=4 mark=5 + * sw2p2 ifindex=5 mark=5 + * + * If sw2p2 leaves the bridge, we'll have: + * + * br0 ifindex=9 + * sw1p1 ifindex=2 mark=2 + * sw1p2 ifindex=3 mark=2 + * sw2p1 ifindex=4 mark=4 + * sw2p2 ifindex=5
[PATCH net-next v2 2/5] net: add phys ID compare helper to test if two IDs are the same
From: Scott Feldman Signed-off-by: Scott Feldman --- include/linux/netdevice.h |7 +++ net/switchdev/switchdev.c |8 ++-- 2 files changed, 9 insertions(+), 6 deletions(-) diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 8364f29..607b5f4 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -766,6 +766,13 @@ struct netdev_phys_item_id { unsigned char id_len; }; +static inline bool netdev_phys_item_id_same(struct netdev_phys_item_id *a, + struct netdev_phys_item_id *b) +{ + return a->id_len == b->id_len && + memcmp(a->id, b->id, a->id_len) == 0; +} + typedef u16 (*select_queue_fallback_t)(struct net_device *dev, struct sk_buff *skb); diff --git a/net/switchdev/switchdev.c b/net/switchdev/switchdev.c index 9f2add3..4e5bba5 100644 --- a/net/switchdev/switchdev.c +++ b/net/switchdev/switchdev.c @@ -910,13 +910,9 @@ static struct net_device *switchdev_get_dev_by_nhs(struct fib_info *fi) if (switchdev_port_attr_get(dev, &attr)) return NULL; - if (nhsel > 0) { - if (prev_attr.u.ppid.id_len != attr.u.ppid.id_len) + if (nhsel > 0 && + !netdev_phys_item_id_same(&prev_attr.u.ppid, &attr.u.ppid)) return NULL; - if (memcmp(prev_attr.u.ppid.id, attr.u.ppid.id, - attr.u.ppid.id_len)) - return NULL; - } prev_attr = attr; } -- 1.7.10.4 -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH net-next v2 0/5] switchdev: avoid duplicate packet forwarding
From: Scott Feldman v2: - Per davem review: in sk_buff, union fwd_mark with secmark to save space since features appear to be mutually exclusive. - Per Simon Horman review: - fix grammar in switchdev.txt wrt fwd_mark - remove some unrelated changes that snuck in v1: This patchset was previously submitted as RFC. No changes from the last version (v2) sent under RFC. Including RFC version history here for reference. RFC v2: - s/fwd_mark/offload_fwd_mark - use consume_skb rather than kfree_skb when dropping pkt on egress. - Use Jiri's suggestion to use ifindex of one of the ports in a group as the mark for all the ports in the group. This can be done with no additional storage (no hashtable from v1). To pull it off, we need some simple recursive routines to walk the netdev tree ensuring all leaves in the tree (ports) in the same group (e.g. bridge) belonging to the same switch device will have the same offload fwd mark. Maybe someone sees a better design for the recusive routines? They're not too bad, and should cover the stacked driver cases. RFC v1: With switchdev support for offloading L2/L3 forwarding data path to a switch device, we have a general problem where both the device and the kernel may forward the packet, resulting in duplicate packets on the wire. Anytime a packet is forwarded by the device and a copy is sent to the CPU, there is potential for duplicate forwarding, as the kernel may also do a forwarding lookup and send the packet on the wire. The specific problem this patch series is interested in solving is avoiding duplicate packets on bridged ports. There was a previous RFC from Roopa (http://marc.info/?l=linux-netdev&m=142687073314252&w=2) to address this problem, but didn't solve the problem of mixed ports in the bridge from different devices; there was no way to exclude some ports from forwarding and include others. This RFC solves that problem by tagging the ingressing packet with a unique mark, and then comparing the packet mark with the egress port mark, and skip forwarding when there is a match. For the mixed ports bridge case, only those ports with matching marks are skipped. The switchdev port driver must do two things: 1) Generate a fwd_mark for each switch port, using some unique key of the switch device (and optionally port). This is done when the port netdev is registered or if the port's group membership changes (joins/leaves a bridge, for example). 2) On packet ingress from port, mark the skb with the ingress port's fwd_mark. If the device supports it, it's useful to only mark skbs which were already forwarded by the device. If the device does not support such indication, all skbs can be marked, even if they're local dst. Two new 32-bit fields are added to struct sk_buff and struct netdevice to hold the fwd_mark. I've wrapped these with CONFIG_NET_SWITCHDEV for now. I tried using skb->mark for this purpose, but ebtables can overwrite the skb->mark before the bridge gets it, so that will not work. In general, this fwd_mark can be used for any case where a packet is forwarded by the device and a copy is sent to the CPU, to avoid the kernel re-forwarding the packet. sFlow is another use-case that comes to mind, but I haven't explored the details. Scott Feldman (5): net: don't reforward packets already forwarded by offload device net: add phys ID compare helper to test if two IDs are the same switchdev: add offload_fwd_mark generator helper rocker: add offload_fwd_mark support switchdev: update documentation for offload_fwd_mark Documentation/networking/switchdev.txt | 14 +++- drivers/net/ethernet/rocker/rocker.c | 11 drivers/net/ethernet/rocker/rocker.h |1 + include/linux/netdevice.h | 13 include/linux/skbuff.h | 11 +++- include/net/switchdev.h|9 +++ net/core/dev.c | 10 +++ net/switchdev/switchdev.c | 111 ++-- 8 files changed, 171 insertions(+), 9 deletions(-) -- 1.7.10.4 -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH net-next v2 4/5] rocker: add offload_fwd_mark support
From: Scott Feldman If device flags ingress packet as "fwd offload", mark the skb->offlaod_fwd_mark using the ingress port's dev->offlaod_fwd_mark. This will be the hint to the kernel that this packet has already been forwarded by device to egress ports matching skb->offlaod_fwd_mark. For rocker, derive port dev->offlaod_fwd_mark based on device switch ID and port ifindex. If port is bridged, use the bridge ifindex rather than the port ifindex. Signed-off-by: Scott Feldman --- drivers/net/ethernet/rocker/rocker.c | 11 +++ drivers/net/ethernet/rocker/rocker.h |1 + 2 files changed, 12 insertions(+) diff --git a/drivers/net/ethernet/rocker/rocker.c b/drivers/net/ethernet/rocker/rocker.c index 9324283..0fdfa47 100644 --- a/drivers/net/ethernet/rocker/rocker.c +++ b/drivers/net/ethernet/rocker/rocker.c @@ -4800,6 +4800,7 @@ static int rocker_port_rx_proc(const struct rocker *rocker, const struct rocker_tlv *attrs[ROCKER_TLV_RX_MAX + 1]; struct sk_buff *skb = rocker_desc_cookie_ptr_get(desc_info); size_t rx_len; + u16 rx_flags = 0; if (!skb) return -ENOENT; @@ -4807,6 +4808,8 @@ static int rocker_port_rx_proc(const struct rocker *rocker, rocker_tlv_parse_desc(attrs, ROCKER_TLV_RX_MAX, desc_info); if (!attrs[ROCKER_TLV_RX_FRAG_LEN]) return -EINVAL; + if (attrs[ROCKER_TLV_RX_FLAGS]) + rx_flags = rocker_tlv_get_u16(attrs[ROCKER_TLV_RX_FLAGS]); rocker_dma_rx_ring_skb_unmap(rocker, attrs); @@ -4814,6 +4817,9 @@ static int rocker_port_rx_proc(const struct rocker *rocker, skb_put(skb, rx_len); skb->protocol = eth_type_trans(skb, rocker_port->dev); + if (rx_flags & ROCKER_RX_FLAGS_FWD_OFFLOAD) + skb->offload_fwd_mark = rocker_port->dev->offload_fwd_mark; + rocker_port->dev->stats.rx_packets++; rocker_port->dev->stats.rx_bytes += skb->len; @@ -4951,6 +4957,8 @@ static int rocker_probe_port(struct rocker *rocker, unsigned int port_number) } rocker->ports[port_number] = rocker_port; + switchdev_port_fwd_mark_set(rocker_port->dev, NULL, false); + rocker_port_set_learning(rocker_port, SWITCHDEV_TRANS_NONE); err = rocker_port_ig_tbl(rocker_port, SWITCHDEV_TRANS_NONE, 0); @@ -5230,6 +5238,7 @@ static int rocker_port_bridge_join(struct rocker_port *rocker_port, rocker_port_internal_vlan_id_get(rocker_port, bridge->ifindex); rocker_port->bridge_dev = bridge; + switchdev_port_fwd_mark_set(rocker_port->dev, bridge, true); return rocker_port_vlan_add(rocker_port, SWITCHDEV_TRANS_NONE, untagged_vid, 0); @@ -5250,6 +5259,8 @@ static int rocker_port_bridge_leave(struct rocker_port *rocker_port) rocker_port_internal_vlan_id_get(rocker_port, rocker_port->dev->ifindex); + switchdev_port_fwd_mark_set(rocker_port->dev, rocker_port->bridge_dev, + false); rocker_port->bridge_dev = NULL; err = rocker_port_vlan_add(rocker_port, SWITCHDEV_TRANS_NONE, diff --git a/drivers/net/ethernet/rocker/rocker.h b/drivers/net/ethernet/rocker/rocker.h index 08b2c3d..12490b2 100644 --- a/drivers/net/ethernet/rocker/rocker.h +++ b/drivers/net/ethernet/rocker/rocker.h @@ -246,6 +246,7 @@ enum { #define ROCKER_RX_FLAGS_TCPBIT(5) #define ROCKER_RX_FLAGS_UDPBIT(6) #define ROCKER_RX_FLAGS_TCP_UDP_CSUM_GOOD BIT(7) +#define ROCKER_RX_FLAGS_FWD_OFFLOADBIT(8) enum { ROCKER_TLV_TX_UNSPEC, -- 1.7.10.4 -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH net-next 3/5] switchdev: add offload_fwd_mark generator helper
From: Scott Feldman skb->offload_fwd_mark and dev->offload_fwd_mark are 32-bit and should be unique for device and may even be unique for a sub-set of ports within device, so add switchdev helper function to generate unique marks based on port's switch ID and group_ifindex. group_ifindex would typically be the container dev's ifindex, such as the bridge's ifindex. The generator uses a global hash table to store offload_fwd_marks hashed by {switch ID, group_ifindex} key. Signed-off-by: Scott Feldman --- include/net/switchdev.h |9 net/switchdev/switchdev.c | 103 + 2 files changed, 112 insertions(+) diff --git a/include/net/switchdev.h b/include/net/switchdev.h index d5671f1..89da893 100644 --- a/include/net/switchdev.h +++ b/include/net/switchdev.h @@ -157,6 +157,9 @@ int switchdev_port_fdb_del(struct ndmsg *ndm, struct nlattr *tb[], int switchdev_port_fdb_dump(struct sk_buff *skb, struct netlink_callback *cb, struct net_device *dev, struct net_device *filter_dev, int idx); +void switchdev_port_fwd_mark_set(struct net_device *dev, +struct net_device *group_dev, +bool joining); #else @@ -271,6 +274,12 @@ static inline int switchdev_port_fdb_dump(struct sk_buff *skb, return -EOPNOTSUPP; } +static inline void switchdev_port_fwd_mark_set(struct net_device *dev, + struct net_device *group_dev, + bool joining) +{ +} + #endif #endif /* _LINUX_SWITCHDEV_H_ */ diff --git a/net/switchdev/switchdev.c b/net/switchdev/switchdev.c index e16586f..6de147d 100644 --- a/net/switchdev/switchdev.c +++ b/net/switchdev/switchdev.c @@ -1035,3 +1035,106 @@ void switchdev_fib_ipv4_abort(struct fib_info *fi) fi->fib_net->ipv4.fib_offload_disabled = true; } EXPORT_SYMBOL_GPL(switchdev_fib_ipv4_abort); + +static bool switchdev_port_same_parent_id(struct net_device *a, + struct net_device *b) +{ + struct switchdev_attr a_attr = { + .id = SWITCHDEV_ATTR_PORT_PARENT_ID, + .flags = SWITCHDEV_F_NO_RECURSE, + }; + struct switchdev_attr b_attr = { + .id = SWITCHDEV_ATTR_PORT_PARENT_ID, + .flags = SWITCHDEV_F_NO_RECURSE, + }; + + if (switchdev_port_attr_get(a, &a_attr) || + switchdev_port_attr_get(b, &b_attr)) + return false; + + return netdev_phys_item_id_same(&a_attr.u.ppid, &b_attr.u.ppid); +} + +static u32 switchdev_port_fwd_mark_get(struct net_device *dev, + struct net_device *group_dev) +{ + struct net_device *lower_dev; + struct list_head *iter; + + netdev_for_each_lower_dev(group_dev, lower_dev, iter) { + if (lower_dev == dev) + continue; + if (switchdev_port_same_parent_id(dev, lower_dev)) + return lower_dev->offload_fwd_mark; + return switchdev_port_fwd_mark_get(dev, lower_dev); + } + + return dev->ifindex; +} + +static void switchdev_port_fwd_mark_reset(struct net_device *group_dev, + u32 old_mark, u32 *reset_mark) +{ + struct net_device *lower_dev; + struct list_head *iter; + + netdev_for_each_lower_dev(group_dev, lower_dev, iter) { + if (lower_dev->offload_fwd_mark == old_mark) { + if (!*reset_mark) + *reset_mark = lower_dev->ifindex; + lower_dev->offload_fwd_mark = *reset_mark; + } + switchdev_port_fwd_mark_reset(lower_dev, old_mark, reset_mark); + } +} + +/** + * switchdev_port_fwd_mark_set - Set port offload forwarding mark + * + * @dev: port device + * @group_dev: containing device + * @joining: true if dev is joining group; false if leaving group + * + * An ungrouped port's offload mark is just its ifindex. A grouped + * port's (member of a bridge, for example) offload mark is the ifindex + * of one of the ports in the group with the same parent (switch) ID. + * Ports on the same device in the same group will have the same mark. + * + * Example: + * + * br0 ifindex=9 + * sw1p1 ifindex=2 mark=2 + * sw1p2 ifindex=3 mark=2 + * sw2p1 ifindex=4 mark=5 + * sw2p2 ifindex=5 mark=5 + * + * If sw2p2 leaves the bridge, we'll have: + * + * br0 ifindex=9 + * sw1p1 ifindex=2 mark=2 + * sw1p2 ifindex=3 mark=2 + * sw2p1 ifindex=4 mark=4 + * sw2p2 ifindex=5
[PATCH net-next 5/5] switchdev: update documentation for offload_fwd_mark
From: Scott Feldman Signed-off-by: Scott Feldman --- Documentation/networking/switchdev.txt | 14 -- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/Documentation/networking/switchdev.txt b/Documentation/networking/switchdev.txt index c5d7ade..b864e47 100644 --- a/Documentation/networking/switchdev.txt +++ b/Documentation/networking/switchdev.txt @@ -279,8 +279,18 @@ and unknown unicast packets to all ports in domain, if allowed by port's current STP state. The switch driver, knowing which ports are within which vlan L2 domain, can program the switch device for flooding. The packet should also be sent to the port netdev for processing by the bridge driver. The -bridge should not reflood the packet to the same ports the device flooded. -XXX: the mechanism to avoid duplicate flood packets is being discuseed. +bridge should not reflood the packet to the same ports the device flooded, +otherwise there will be duplicate packets on the wire. + +To avoid duplicate packets, the device/driver can mark a packet as already +forwarded using skb->offload_fwd_mark. The same mark is set on the device +ports in the domain using dev->offload_fwd_mark. If the skb->offload_fwd_mark +is non-zero and matches the forwarding egress port's dev->skb_mark, the kernel +will drop the skb right before transmit on the egress port, with the +understanding that the device already forwarded the packet on same egress port. +The driver can use switchdev_port_fwd_mark_set() to set a globally unique mark +for port's dev->offload_fwd_mark, based on the port's parent ID (switch ID) and +a group ifindex. It is possible for the switch device to not handle flooding and push the packets up to the bridge driver for flooding. This is not ideal as the number -- 1.7.10.4 -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH net-next 0/5] switchdev: avoid duplicate packet forwarding
From: Scott Feldman This patchset was previously submitted as RFC. No changes from the last version (v2) sent under RFC. Including RFC version history here for reference. RFC v2: - s/fwd_mark/offload_fwd_mark - use consume_skb rather than kfree_skb when dropping pkt on egress. - Use Jiri's suggestion to use ifindex of one of the ports in a group as the mark for all the ports in the group. This can be done with no additional storage (no hashtable from v1). To pull it off, we need some simple recursive routines to walk the netdev tree ensuring all leaves in the tree (ports) in the same group (e.g. bridge) belonging to the same switch device will have the same offload fwd mark. Maybe someone sees a better design for the recusive routines? They're not too bad, and should cover the stacked driver cases. RFC v1: With switchdev support for offloading L2/L3 forwarding data path to a switch device, we have a general problem where both the device and the kernel may forward the packet, resulting in duplicate packets on the wire. Anytime a packet is forwarded by the device and a copy is sent to the CPU, there is potential for duplicate forwarding, as the kernel may also do a forwarding lookup and send the packet on the wire. The specific problem this patch series is interested in solving is avoiding duplicate packets on bridged ports. There was a previous RFC from Roopa (http://marc.info/?l=linux-netdev&m=142687073314252&w=2) to address this problem, but didn't solve the problem of mixed ports in the bridge from different devices; there was no way to exclude some ports from forwarding and include others. This RFC solves that problem by tagging the ingressing packet with a unique mark, and then comparing the packet mark with the egress port mark, and skip forwarding when there is a match. For the mixed ports bridge case, only those ports with matching marks are skipped. The switchdev port driver must do two things: 1) Generate a fwd_mark for each switch port, using some unique key of the switch device (and optionally port). This is done when the port netdev is registered or if the port's group membership changes (joins/leaves a bridge, for example). 2) On packet ingress from port, mark the skb with the ingress port's fwd_mark. If the device supports it, it's useful to only mark skbs which were already forwarded by the device. If the device does not support such indication, all skbs can be marked, even if they're local dst. Two new 32-bit fields are added to struct sk_buff and struct netdevice to hold the fwd_mark. I've wrapped these with CONFIG_NET_SWITCHDEV for now. I tried using skb->mark for this purpose, but ebtables can overwrite the skb->mark before the bridge gets it, so that will not work. In general, this fwd_mark can be used for any case where a packet is forwarded by the device and a copy is sent to the CPU, to avoid the kernel re-forwarding the packet. sFlow is another use-case that comes to mind, but I haven't explored the details. Scott Feldman (5): net: don't reforward packets already forwarded by offload device net: add phys ID compare helper to test if two IDs are the same switchdev: add offload_fwd_mark generator helper rocker: add offload_fwd_mark support switchdev: update documentation for offload_fwd_mark Documentation/networking/switchdev.txt | 14 +++- drivers/net/ethernet/rocker/rocker.c | 14 +++- drivers/net/ethernet/rocker/rocker.h |1 + include/linux/netdevice.h | 13 include/linux/skbuff.h |4 ++ include/net/switchdev.h|9 +++ net/core/dev.c | 10 +++ net/switchdev/switchdev.c | 111 ++-- 8 files changed, 167 insertions(+), 9 deletions(-) -- 1.7.10.4 -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH net-next 2/5] net: add phys ID compare helper to test if two IDs are the same
From: Scott Feldman Signed-off-by: Scott Feldman --- include/linux/netdevice.h |7 +++ net/switchdev/switchdev.c |8 ++-- 2 files changed, 9 insertions(+), 6 deletions(-) diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 7be616e1..89db412 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -766,6 +766,13 @@ struct netdev_phys_item_id { unsigned char id_len; }; +static inline bool netdev_phys_item_id_same(struct netdev_phys_item_id *a, + struct netdev_phys_item_id *b) +{ + return a->id_len == b->id_len && + memcmp(a->id, b->id, a->id_len) == 0; +} + typedef u16 (*select_queue_fallback_t)(struct net_device *dev, struct sk_buff *skb); diff --git a/net/switchdev/switchdev.c b/net/switchdev/switchdev.c index 84f77a0..e16586f 100644 --- a/net/switchdev/switchdev.c +++ b/net/switchdev/switchdev.c @@ -906,13 +906,9 @@ static struct net_device *switchdev_get_dev_by_nhs(struct fib_info *fi) if (switchdev_port_attr_get(dev, &attr)) return NULL; - if (nhsel > 0) { - if (prev_attr.u.ppid.id_len != attr.u.ppid.id_len) + if (nhsel > 0 && + !netdev_phys_item_id_same(&prev_attr.u.ppid, &attr.u.ppid)) return NULL; - if (memcmp(prev_attr.u.ppid.id, attr.u.ppid.id, - attr.u.ppid.id_len)) - return NULL; - } prev_attr = attr; } -- 1.7.10.4 -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH net-next 1/5] net: don't reforward packets already forwarded by offload device
From: Scott Feldman Just before queuing skb for xmit on port, check if skb has been marked by switchdev port driver as already fordwarded by device. If so, drop skb. A non-zero skb->offload_fwd_mark field is set by the switchdev port driver/device on ingress to indicate the skb has already been forwarded by the device to egress ports with matching dev->skb_mark. The switchdev port driver would assign a non-zero dev->skb_mark for each device port netdev during registration, for example. Signed-off-by: Scott Feldman --- include/linux/netdevice.h |6 ++ include/linux/skbuff.h|4 net/core/dev.c| 10 ++ 3 files changed, 20 insertions(+) diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index e20979d..7be616e1 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1448,6 +1448,8 @@ enum netdev_priv_flags { * * @xps_maps: XXX: need comments on this one * + * @offload_fwd_mark: Offload device fwding mark + * * @trans_start: Time (in jiffies) of last Tx * @watchdog_timeo:Represents the timeout that is used by * the watchdog ( see dev_watchdog() ) @@ -1685,6 +1687,10 @@ struct net_device { struct xps_dev_maps __rcu *xps_maps; #endif +#ifdef CONFIG_NET_SWITCHDEV + u32 offload_fwd_mark; +#endif + /* These may be needed for future network-power-down code. */ /* diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index d6cdd6e..1533c4f 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -506,6 +506,7 @@ static inline u32 skb_mstamp_us_delta(const struct skb_mstamp *t1, * @no_fcs: Request NIC to treat last 4 bytes as Ethernet FCS *@napi_id: id of the NAPI struct this skb came from * @secmark: security marking + * @offload_fwd_mark: fwding offload mark * @mark: Generic packet mark * @vlan_proto: vlan encapsulation protocol * @vlan_tci: vlan tag control information @@ -653,6 +654,9 @@ struct sk_buff { #ifdef CONFIG_NETWORK_SECMARK __u32 secmark; #endif +#ifdef CONFIG_NET_SWITCHDEV + __u32 offload_fwd_mark; +#endif union { __u32 mark; __u32 reserved_tailroom; diff --git a/net/core/dev.c b/net/core/dev.c index e0d2701..71919cc 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -3065,6 +3065,16 @@ static int __dev_queue_xmit(struct sk_buff *skb, void *accel_priv) else skb_dst_force(skb); +#ifdef CONFIG_NET_SWITCHDEV + /* Don't forward if offload device already forwarded */ + if (skb->offload_fwd_mark && + skb->offload_fwd_mark == dev->offload_fwd_mark) { + consume_skb(skb); + rc = NET_XMIT_SUCCESS; + goto out; + } +#endif + txq = netdev_pick_tx(dev, skb, accel_priv); q = rcu_dereference_bh(txq->qdisc); -- 1.7.10.4 -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH net-next 4/5] rocker: add offload_fwd_mark support
From: Scott Feldman If device flags ingress packet as "fwd offload", mark the skb->offlaod_fwd_mark using the ingress port's dev->offlaod_fwd_mark. This will be the hint to the kernel that this packet has already been forwarded by device to egress ports matching skb->offlaod_fwd_mark. For rocker, derive port dev->offlaod_fwd_mark based on device switch ID and port ifindex. If port is bridged, use the bridge ifindex rather than the port ifindex. Signed-off-by: Scott Feldman --- drivers/net/ethernet/rocker/rocker.c | 14 +- drivers/net/ethernet/rocker/rocker.h |1 + 2 files changed, 14 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/rocker/rocker.c b/drivers/net/ethernet/rocker/rocker.c index c005167..a4ced91 100644 --- a/drivers/net/ethernet/rocker/rocker.c +++ b/drivers/net/ethernet/rocker/rocker.c @@ -4787,6 +4787,7 @@ static int rocker_port_rx_proc(const struct rocker *rocker, const struct rocker_tlv *attrs[ROCKER_TLV_RX_MAX + 1]; struct sk_buff *skb = rocker_desc_cookie_ptr_get(desc_info); size_t rx_len; + u16 rx_flags = 0; if (!skb) return -ENOENT; @@ -4794,6 +4795,8 @@ static int rocker_port_rx_proc(const struct rocker *rocker, rocker_tlv_parse_desc(attrs, ROCKER_TLV_RX_MAX, desc_info); if (!attrs[ROCKER_TLV_RX_FRAG_LEN]) return -EINVAL; + if (attrs[ROCKER_TLV_RX_FLAGS]) + rx_flags = rocker_tlv_get_u16(attrs[ROCKER_TLV_RX_FLAGS]); rocker_dma_rx_ring_skb_unmap(rocker, attrs); @@ -4801,6 +4804,9 @@ static int rocker_port_rx_proc(const struct rocker *rocker, skb_put(skb, rx_len); skb->protocol = eth_type_trans(skb, rocker_port->dev); + if (rx_flags & ROCKER_RX_FLAGS_FWD_OFFLOAD) + skb->offload_fwd_mark = rocker_port->dev->offload_fwd_mark; + rocker_port->dev->stats.rx_packets++; rocker_port->dev->stats.rx_bytes += skb->len; @@ -4938,11 +4944,13 @@ static int rocker_probe_port(struct rocker *rocker, unsigned int port_number) } rocker->ports[port_number] = rocker_port; + switchdev_port_fwd_mark_set(rocker_port->dev, NULL, false); + rocker_port_set_learning(rocker_port, SWITCHDEV_TRANS_NONE); err = rocker_port_ig_tbl(rocker_port, SWITCHDEV_TRANS_NONE, 0); if (err) { - dev_err(&pdev->dev, "install ig port table failed\n"); + netdev_err(rocker_port->dev, "install ig port table failed\n"); goto err_port_ig_tbl; } @@ -4962,6 +4970,7 @@ err_untagged_vlan: rocker_port_ig_tbl(rocker_port, SWITCHDEV_TRANS_NONE, ROCKER_OP_FLAG_REMOVE); err_port_ig_tbl: + rocker->ports[port_number] = NULL; unregister_netdev(dev); err_register_netdev: free_netdev(dev); @@ -5217,6 +5226,7 @@ static int rocker_port_bridge_join(struct rocker_port *rocker_port, rocker_port_internal_vlan_id_get(rocker_port, bridge->ifindex); rocker_port->bridge_dev = bridge; + switchdev_port_fwd_mark_set(rocker_port->dev, bridge, true); return rocker_port_vlan_add(rocker_port, SWITCHDEV_TRANS_NONE, untagged_vid, 0); @@ -5237,6 +5247,8 @@ static int rocker_port_bridge_leave(struct rocker_port *rocker_port) rocker_port_internal_vlan_id_get(rocker_port, rocker_port->dev->ifindex); + switchdev_port_fwd_mark_set(rocker_port->dev, rocker_port->bridge_dev, + false); rocker_port->bridge_dev = NULL; err = rocker_port_vlan_add(rocker_port, SWITCHDEV_TRANS_NONE, diff --git a/drivers/net/ethernet/rocker/rocker.h b/drivers/net/ethernet/rocker/rocker.h index 08b2c3d..12490b2 100644 --- a/drivers/net/ethernet/rocker/rocker.h +++ b/drivers/net/ethernet/rocker/rocker.h @@ -246,6 +246,7 @@ enum { #define ROCKER_RX_FLAGS_TCPBIT(5) #define ROCKER_RX_FLAGS_UDPBIT(6) #define ROCKER_RX_FLAGS_TCP_UDP_CSUM_GOOD BIT(7) +#define ROCKER_RX_FLAGS_FWD_OFFLOADBIT(8) enum { ROCKER_TLV_TX_UNSPEC, -- 1.7.10.4 -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH net-next] rocker: add change MTU support
From: Scott Feldman Implement ndo_change_mtu: on MTU change, reallocate Rx ring bufs and signal HW of new port MTU value. Signed-off-by: Scott Feldman --- drivers/net/ethernet/rocker/rocker.c | 61 ++ drivers/net/ethernet/rocker/rocker.h |1 + 2 files changed, 62 insertions(+) diff --git a/drivers/net/ethernet/rocker/rocker.c b/drivers/net/ethernet/rocker/rocker.c index 2d8578cade..c005167 100644 --- a/drivers/net/ethernet/rocker/rocker.c +++ b/drivers/net/ethernet/rocker/rocker.c @@ -1818,6 +1818,30 @@ rocker_cmd_set_port_settings_macaddr_prep(const struct rocker_port *rocker_port, } static int +rocker_cmd_set_port_settings_mtu_prep(const struct rocker_port *rocker_port, + struct rocker_desc_info *desc_info, + void *priv) +{ + int mtu = *(int *)priv; + struct rocker_tlv *cmd_info; + + if (rocker_tlv_put_u16(desc_info, ROCKER_TLV_CMD_TYPE, + ROCKER_TLV_CMD_TYPE_SET_PORT_SETTINGS)) + return -EMSGSIZE; + cmd_info = rocker_tlv_nest_start(desc_info, ROCKER_TLV_CMD_INFO); + if (!cmd_info) + return -EMSGSIZE; + if (rocker_tlv_put_u32(desc_info, ROCKER_TLV_CMD_PORT_SETTINGS_PPORT, + rocker_port->pport)) + return -EMSGSIZE; + if (rocker_tlv_put_u16(desc_info, ROCKER_TLV_CMD_PORT_SETTINGS_MTU, + mtu)) + return -EMSGSIZE; + rocker_tlv_nest_end(desc_info, cmd_info); + return 0; +} + +static int rocker_cmd_set_port_learning_prep(const struct rocker_port *rocker_port, struct rocker_desc_info *desc_info, void *priv) @@ -1874,6 +1898,14 @@ static int rocker_cmd_set_port_settings_macaddr(struct rocker_port *rocker_port, macaddr, NULL, NULL); } +static int rocker_cmd_set_port_settings_mtu(struct rocker_port *rocker_port, + int mtu) +{ + return rocker_cmd_exec(rocker_port, SWITCHDEV_TRANS_NONE, 0, + rocker_cmd_set_port_settings_mtu_prep, + &mtu, NULL, NULL); +} + static int rocker_port_set_learning(struct rocker_port *rocker_port, enum switchdev_trans trans) { @@ -4152,6 +4184,34 @@ static int rocker_port_set_mac_address(struct net_device *dev, void *p) return 0; } +static int rocker_port_change_mtu(struct net_device *dev, int new_mtu) +{ + struct rocker_port *rocker_port = netdev_priv(dev); + int running = netif_running(dev); + int err; + +#define ROCKER_PORT_MIN_MTU68 +#define ROCKER_PORT_MAX_MTU9000 + + if (new_mtu < ROCKER_PORT_MIN_MTU || new_mtu > ROCKER_PORT_MAX_MTU) + return -EINVAL; + + if (running) + rocker_port_stop(dev); + + netdev_info(dev, "MTU change from %d to %d\n", dev->mtu, new_mtu); + dev->mtu = new_mtu; + + err = rocker_cmd_set_port_settings_mtu(rocker_port, new_mtu); + if (err) + return err; + + if (running) + err = rocker_port_open(dev); + + return err; +} + static int rocker_port_get_phys_port_name(struct net_device *dev, char *buf, size_t len) { @@ -4172,6 +4232,7 @@ static const struct net_device_ops rocker_port_netdev_ops = { .ndo_stop = rocker_port_stop, .ndo_start_xmit = rocker_port_xmit, .ndo_set_mac_address= rocker_port_set_mac_address, + .ndo_change_mtu = rocker_port_change_mtu, .ndo_bridge_getlink = switchdev_port_bridge_getlink, .ndo_bridge_setlink = switchdev_port_bridge_setlink, .ndo_bridge_dellink = switchdev_port_bridge_dellink, diff --git a/drivers/net/ethernet/rocker/rocker.h b/drivers/net/ethernet/rocker/rocker.h index c61fbf9..08b2c3d 100644 --- a/drivers/net/ethernet/rocker/rocker.h +++ b/drivers/net/ethernet/rocker/rocker.h @@ -159,6 +159,7 @@ enum { ROCKER_TLV_CMD_PORT_SETTINGS_MODE, /* u8 */ ROCKER_TLV_CMD_PORT_SETTINGS_LEARNING, /* u8 */ ROCKER_TLV_CMD_PORT_SETTINGS_PHYS_NAME, /* binary */ + ROCKER_TLV_CMD_PORT_SETTINGS_MTU, /* u16 */ __ROCKER_TLV_CMD_PORT_SETTINGS_MAX, ROCKER_TLV_CMD_PORT_SETTINGS_MAX = -- 1.7.10.4 -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH net-next 1/2] switchdev: rename vlan vid_start to vid_begin
From: Scott Feldman Use vid_begin/end to be consistent with BRIDGE_VLAN_INFO_RANGE_BEGIN/END. Signed-off-by: Scott Feldman --- drivers/net/ethernet/rocker/rocker.c |4 ++-- include/net/switchdev.h |2 +- net/bridge/br_vlan.c |4 ++-- net/switchdev/switchdev.c| 12 ++-- 4 files changed, 11 insertions(+), 11 deletions(-) diff --git a/drivers/net/ethernet/rocker/rocker.c b/drivers/net/ethernet/rocker/rocker.c index b72674c..76c3086 100644 --- a/drivers/net/ethernet/rocker/rocker.c +++ b/drivers/net/ethernet/rocker/rocker.c @@ -4295,7 +4295,7 @@ static int rocker_port_vlans_add(struct rocker_port *rocker_port, u16 vid; int err; - for (vid = vlan->vid_start; vid <= vlan->vid_end; vid++) { + for (vid = vlan->vid_begin; vid <= vlan->vid_end; vid++) { err = rocker_port_vlan_add(rocker_port, trans, vid, vlan->flags); if (err) @@ -4378,7 +4378,7 @@ static int rocker_port_vlans_del(struct rocker_port *rocker_port, u16 vid; int err; - for (vid = vlan->vid_start; vid <= vlan->vid_end; vid++) { + for (vid = vlan->vid_begin; vid <= vlan->vid_end; vid++) { err = rocker_port_vlan_del(rocker_port, vid, vlan->flags); if (err) return err; diff --git a/include/net/switchdev.h b/include/net/switchdev.h index d882902..89da893 100644 --- a/include/net/switchdev.h +++ b/include/net/switchdev.h @@ -57,7 +57,7 @@ struct switchdev_obj { union { struct switchdev_obj_vlan { /* PORT_VLAN */ u16 flags; - u16 vid_start; + u16 vid_begin; u16 vid_end; } vlan; struct switchdev_obj_ipv4_fib { /* IPV4_FIB */ diff --git a/net/bridge/br_vlan.c b/net/bridge/br_vlan.c index 17fc358..574feea 100644 --- a/net/bridge/br_vlan.c +++ b/net/bridge/br_vlan.c @@ -54,7 +54,7 @@ static int __vlan_vid_add(struct net_device *dev, struct net_bridge *br, .id = SWITCHDEV_OBJ_PORT_VLAN, .u.vlan = { .flags = flags, - .vid_start = vid, + .vid_begin = vid, .vid_end = vid, }, }; @@ -132,7 +132,7 @@ static void __vlan_vid_del(struct net_device *dev, struct net_bridge *br, struct switchdev_obj vlan_obj = { .id = SWITCHDEV_OBJ_PORT_VLAN, .u.vlan = { - .vid_start = vid, + .vid_begin = vid, .vid_end = vid, }, }; diff --git a/net/switchdev/switchdev.c b/net/switchdev/switchdev.c index afce9cc..70764c5 100644 --- a/net/switchdev/switchdev.c +++ b/net/switchdev/switchdev.c @@ -511,23 +511,23 @@ static int switchdev_port_br_afspec(struct net_device *dev, vinfo = nla_data(attr); vlan->flags = vinfo->flags; if (vinfo->flags & BRIDGE_VLAN_INFO_RANGE_BEGIN) { - if (vlan->vid_start) + if (vlan->vid_begin) return -EINVAL; - vlan->vid_start = vinfo->vid; + vlan->vid_begin = vinfo->vid; } else if (vinfo->flags & BRIDGE_VLAN_INFO_RANGE_END) { - if (!vlan->vid_start) + if (!vlan->vid_begin) return -EINVAL; vlan->vid_end = vinfo->vid; - if (vlan->vid_end <= vlan->vid_start) + if (vlan->vid_end <= vlan->vid_begin) return -EINVAL; err = f(dev, &obj); if (err) return err; memset(vlan, 0, sizeof(*vlan)); } else { - if (vlan->vid_start) + if (vlan->vid_begin) return -EINVAL; - vlan->vid_start = vinfo->vid; + vlan->vid_begin = vinfo->vid; vlan->vid_end = vinfo->vid; err = f(dev, &obj); if (err) -- 1.7.10.4 -- To unsubscribe from this list: send the line "unsubscribe netdev" in
[PATCH net-next 2/2] switchdev; add VLAN support for port's bridge_getlink
From: Scott Feldman One more missing piece of the puzzle. Add vlan dump support to switchdev port's bridge_getlink. iproute2 "bridge vlan show" cmd already knows how to show the vlans installed on the bridge and the device , but (until now) no one implemented the port vlan part of the netlink PF_BRIDGE:RTM_GETLINK msg. Before this patch, "bridge vlan show": $ bridge -c vlan show portvlan ids sw1p130-34 << bridge side vlans 57 sw1p1 << device side vlans (missing) sw1p257 sw1p2 sw1p3 sw1p4 br0 None (When the port is bridged, the output repeats the vlan list for the vlans on the bridge side of the port and the vlans on the device side of the port. The listing above show no vlans for the device side even though they are installed). After this patch: $ bridge -c vlan show portvlan ids sw1p130-34 << bridge side vlan 57 sw1p130-34 << device side vlans 57 3840 PVID sw1p257 sw1p257 3840 PVID sw1p33842 PVID sw1p43843 PVID br0 None I re-used ndo_dflt_bridge_getlink to add vlan fill call-back func. switchdev support adds an obj dump for VLAN objects, using the same call-back scheme as FDB dump. Support included for both compressed and un-compressed vlan dumps. Signed-off-by: Scott Feldman --- drivers/net/ethernet/emulex/benet/be_main.c |2 +- drivers/net/ethernet/intel/i40e/i40e_main.c |4 +- drivers/net/ethernet/intel/ixgbe/ixgbe_main.c |3 +- drivers/net/ethernet/rocker/rocker.c | 25 + include/linux/rtnetlink.h |6 +- net/core/rtnetlink.c | 18 +++- net/switchdev/switchdev.c | 123 - 7 files changed, 172 insertions(+), 9 deletions(-) diff --git a/drivers/net/ethernet/emulex/benet/be_main.c b/drivers/net/ethernet/emulex/benet/be_main.c index c0f3484..6f64242 100644 --- a/drivers/net/ethernet/emulex/benet/be_main.c +++ b/drivers/net/ethernet/emulex/benet/be_main.c @@ -5096,7 +5096,7 @@ static int be_ndo_bridge_getlink(struct sk_buff *skb, u32 pid, u32 seq, return ndo_dflt_bridge_getlink(skb, pid, seq, dev, hsw_mode == PORT_FWD_TYPE_VEPA ? BRIDGE_MODE_VEPA : BRIDGE_MODE_VEB, - 0, 0, nlflags); + 0, 0, nlflags, filter_mask, NULL); } #ifdef CONFIG_BE2NET_VXLAN diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c index 52d7d8b..48a52b3 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_main.c +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c @@ -8069,7 +8069,7 @@ static int i40e_ndo_bridge_setlink(struct net_device *dev, #ifdef HAVE_BRIDGE_FILTER static int i40e_ndo_bridge_getlink(struct sk_buff *skb, u32 pid, u32 seq, struct net_device *dev, - u32 __always_unused filter_mask, int nlflags) + u32 filter_mask, int nlflags) #else static int i40e_ndo_bridge_getlink(struct sk_buff *skb, u32 pid, u32 seq, struct net_device *dev, int nlflags) @@ -8095,7 +8095,7 @@ static int i40e_ndo_bridge_getlink(struct sk_buff *skb, u32 pid, u32 seq, return 0; return ndo_dflt_bridge_getlink(skb, pid, seq, dev, veb->bridge_mode, - nlflags); + nlflags, 0, 0, filter_mask, NULL); } #endif /* HAVE_BRIDGE_ATTRIBS */ diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c index 3bf2f3c..9aa6104 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c @@ -8095,7 +8095,8 @@ static int ixgbe_ndo_bridge_getlink(struct sk_buff *skb, u32 pid, u32 seq, return 0; return ndo_dflt_bridge_getlink(skb, pid, seq, dev, - adapter->bridge_mode, 0, 0, nlflags); + adapter->bridge_mode, 0, 0, nlflags, + filter_mask, NULL); } static void *ixgbe_fwd_add(struct net_device *pdev, struct net_device *vdev) diff --git a/drivers/net/ethernet/rocker/rocker.c b/drivers/net/ethernet/rocker/rocker.c index 76c3086..e6ae0a1 100644 --- a/drivers/net/ethernet/rocker/rocker.c +++ b/drivers/net/ethernet/rocker/rocker.c @@ -4456,6 +4456,28 @@ static int rocker_port_fdb_dump(const struct rocker_port *rocker_port, return err; } +static int rocker_port_vlan_dump(const struct rocker_
[PATCH net-next 0/2] switchdev; add VLAN support for port's bridge_getlink
From: Scott Feldman One more missing piece of the puzzle. Add vlan dump support to switchdev port's bridge_getlink. iproute2 "bridge vlan show" cmd already knows how to show the vlans installed on the bridge and the device , but (until now) no one implemented the port vlan part of the netlink PF_BRIDGE:RTM_GETLINK msg. Before this patch, "bridge vlan show": $ bridge -c vlan show portvlan ids sw1p130-34 << bridge side vlans 57 sw1p1 << device side vlans (missing) sw1p257 sw1p2 sw1p3 sw1p4 br0 None (When the port is bridged, the output repeats the vlan list for the vlans on the bridge side of the port and the vlans on the device side of the port. The listing above show no vlans for the device side even though they are installed). After this patch: $ bridge -c vlan show portvlan ids sw1p130-34 << bridge side vlan 57 sw1p130-34 << device side vlans 57 3840 PVID sw1p257 sw1p257 3840 PVID sw1p33842 PVID sw1p43843 PVID br0 None Scott Feldman (2): switchdev: rename vlan vid_start to vid_begin switchdev; add VLAN support for port's bridge_getlink drivers/net/ethernet/emulex/benet/be_main.c |2 +- drivers/net/ethernet/intel/i40e/i40e_main.c |4 +- drivers/net/ethernet/intel/ixgbe/ixgbe_main.c |3 +- drivers/net/ethernet/rocker/rocker.c | 29 +- include/linux/rtnetlink.h |6 +- include/net/switchdev.h |2 +- net/bridge/br_vlan.c |4 +- net/core/rtnetlink.c | 18 +++- net/switchdev/switchdev.c | 135 +++-- 9 files changed, 183 insertions(+), 20 deletions(-) -- 1.7.10.4 -- To unsubscribe from this list: send the line "unsubscribe netdev" in
[PATCH net-next] switchdev: fdb filter_dev is always NULL for self (device), so remove check
From: Scott Feldman Remove the filter_dev check when dumping fdb entries, otherwise dump returns empty list. filter_dev is always passed as NULL when dumping fdbs on SELF. We want the fdbs installed on the device to be listed in the dump. Signed-off-by: Scott Feldman Fixes: 45d4122c ("switchdev: add support for fdb add/del/dump via switchdev_port_obj ops") --- net/switchdev/switchdev.c |6 -- 1 file changed, 6 deletions(-) diff --git a/net/switchdev/switchdev.c b/net/switchdev/switchdev.c index a5d0f8e..7dda437 100644 --- a/net/switchdev/switchdev.c +++ b/net/switchdev/switchdev.c @@ -657,7 +657,6 @@ struct switchdev_fdb_dump { struct switchdev_obj obj; struct sk_buff *skb; struct netlink_callback *cb; - struct net_device *filter_dev; int idx; }; @@ -670,14 +669,10 @@ static int switchdev_port_fdb_dump_cb(struct net_device *dev, u32 seq = dump->cb->nlh->nlmsg_seq; struct nlmsghdr *nlh; struct ndmsg *ndm; - struct net_device *master = netdev_master_upper_dev_get(dev); if (dump->idx < dump->cb->args[0]) goto skip; - if (master && dump->filter_dev != master) - goto skip; - nlh = nlmsg_put(dump->skb, portid, seq, RTM_NEWNEIGH, sizeof(*ndm), NLM_F_MULTI); if (!nlh) @@ -731,7 +726,6 @@ int switchdev_port_fdb_dump(struct sk_buff *skb, struct netlink_callback *cb, }, .skb = skb, .cb = cb, - .filter_dev = filter_dev, .idx = idx, }; int err; -- 1.7.10.4 -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[RFC PATCH net-next v2 1/5] net: don't reforward packets already forwarded by offload device
From: Scott Feldman Just before queuing skb for xmit on port, check if skb has been marked by switchdev port driver as already fordwarded by device. If so, drop skb. A non-zero skb->offload_fwd_mark field is set by the switchdev port driver/device on ingress to indicate the skb has already been forwarded by the device to egress ports with matching dev->skb_mark. The switchdev port driver would assign a non-zero dev->skb_mark for each device port netdev during registration, for example. Signed-off-by: Scott Feldman --- include/linux/netdevice.h |6 ++ include/linux/skbuff.h|4 net/core/dev.c| 10 ++ 3 files changed, 20 insertions(+) diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index e20979d..7be616e1 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1448,6 +1448,8 @@ enum netdev_priv_flags { * * @xps_maps: XXX: need comments on this one * + * @offload_fwd_mark: Offload device fwding mark + * * @trans_start: Time (in jiffies) of last Tx * @watchdog_timeo:Represents the timeout that is used by * the watchdog ( see dev_watchdog() ) @@ -1685,6 +1687,10 @@ struct net_device { struct xps_dev_maps __rcu *xps_maps; #endif +#ifdef CONFIG_NET_SWITCHDEV + u32 offload_fwd_mark; +#endif + /* These may be needed for future network-power-down code. */ /* diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index d6cdd6e..1533c4f 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -506,6 +506,7 @@ static inline u32 skb_mstamp_us_delta(const struct skb_mstamp *t1, * @no_fcs: Request NIC to treat last 4 bytes as Ethernet FCS *@napi_id: id of the NAPI struct this skb came from * @secmark: security marking + * @offload_fwd_mark: fwding offload mark * @mark: Generic packet mark * @vlan_proto: vlan encapsulation protocol * @vlan_tci: vlan tag control information @@ -653,6 +654,9 @@ struct sk_buff { #ifdef CONFIG_NETWORK_SECMARK __u32 secmark; #endif +#ifdef CONFIG_NET_SWITCHDEV + __u32 offload_fwd_mark; +#endif union { __u32 mark; __u32 reserved_tailroom; diff --git a/net/core/dev.c b/net/core/dev.c index 6778a99..9eb517e 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -3065,6 +3065,16 @@ static int __dev_queue_xmit(struct sk_buff *skb, void *accel_priv) else skb_dst_force(skb); +#ifdef CONFIG_NET_SWITCHDEV + /* Don't forward if offload device already forwarded */ + if (skb->offload_fwd_mark && + skb->offload_fwd_mark == dev->offload_fwd_mark) { + consume_skb(skb); + rc = NET_XMIT_SUCCESS; + goto out; + } +#endif + txq = netdev_pick_tx(dev, skb, accel_priv); q = rcu_dereference_bh(txq->qdisc); -- 1.7.10.4 -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[RFC PATCH net-next v2 3/5] switchdev: add offload_fwd_mark generator helper
From: Scott Feldman skb->offload_fwd_mark and dev->offload_fwd_mark are 32-bit and should be unique for device and may even be unique for a sub-set of ports within device, so add switchdev helper function to generate unique marks based on port's switch ID and group_ifindex. group_ifindex would typically be the container dev's ifindex, such as the bridge's ifindex. The generator uses a global hash table to store offload_fwd_marks hashed by {switch ID, group_ifindex} key. Signed-off-by: Scott Feldman --- include/net/switchdev.h |9 net/switchdev/switchdev.c | 103 + 2 files changed, 112 insertions(+) diff --git a/include/net/switchdev.h b/include/net/switchdev.h index 437f8fe..d882902 100644 --- a/include/net/switchdev.h +++ b/include/net/switchdev.h @@ -157,6 +157,9 @@ int switchdev_port_fdb_del(struct ndmsg *ndm, struct nlattr *tb[], int switchdev_port_fdb_dump(struct sk_buff *skb, struct netlink_callback *cb, struct net_device *dev, struct net_device *filter_dev, int idx); +void switchdev_port_fwd_mark_set(struct net_device *dev, +struct net_device *group_dev, +bool joining); #else @@ -271,6 +274,12 @@ static inline int switchdev_port_fdb_dump(struct sk_buff *skb, return -EOPNOTSUPP; } +static inline void switchdev_port_fwd_mark_set(struct net_device *dev, + struct net_device *group_dev, + bool joining) +{ +} + #endif #endif /* _LINUX_SWITCHDEV_H_ */ diff --git a/net/switchdev/switchdev.c b/net/switchdev/switchdev.c index 00c67a5..6cb30bf 100644 --- a/net/switchdev/switchdev.c +++ b/net/switchdev/switchdev.c @@ -920,3 +920,106 @@ void switchdev_fib_ipv4_abort(struct fib_info *fi) fi->fib_net->ipv4.fib_offload_disabled = true; } EXPORT_SYMBOL_GPL(switchdev_fib_ipv4_abort); + +static bool switchdev_port_same_parent_id(struct net_device *a, + struct net_device *b) +{ + struct switchdev_attr a_attr = { + .id = SWITCHDEV_ATTR_PORT_PARENT_ID, + .flags = SWITCHDEV_F_NO_RECURSE, + }; + struct switchdev_attr b_attr = { + .id = SWITCHDEV_ATTR_PORT_PARENT_ID, + .flags = SWITCHDEV_F_NO_RECURSE, + }; + + if (switchdev_port_attr_get(a, &a_attr) || + switchdev_port_attr_get(b, &b_attr)) + return false; + + return netdev_phys_item_id_same(&a_attr.u.ppid, &b_attr.u.ppid); +} + +static u32 switchdev_port_fwd_mark_get(struct net_device *dev, + struct net_device *group_dev) +{ + struct net_device *lower_dev; + struct list_head *iter; + + netdev_for_each_lower_dev(group_dev, lower_dev, iter) { + if (lower_dev == dev) + continue; + if (switchdev_port_same_parent_id(dev, lower_dev)) + return lower_dev->offload_fwd_mark; + return switchdev_port_fwd_mark_get(dev, lower_dev); + } + + return dev->ifindex; +} + +static void switchdev_port_fwd_mark_reset(struct net_device *group_dev, + u32 old_mark, u32 *reset_mark) +{ + struct net_device *lower_dev; + struct list_head *iter; + + netdev_for_each_lower_dev(group_dev, lower_dev, iter) { + if (lower_dev->offload_fwd_mark == old_mark) { + if (!*reset_mark) + *reset_mark = lower_dev->ifindex; + lower_dev->offload_fwd_mark = *reset_mark; + } + switchdev_port_fwd_mark_reset(lower_dev, old_mark, reset_mark); + } +} + +/** + * switchdev_port_fwd_mark_set - Set port offload forwarding mark + * + * @dev: port device + * @group_dev: containing device + * @joining: true if dev is joining group; false if leaving group + * + * An ungrouped port's offload mark is just its ifindex. A grouped + * port's (member of a bridge, for example) offload mark is the ifindex + * of one of the ports in the group with the same parent (switch) ID. + * Ports on the same device in the same group will have the same mark. + * + * Example: + * + * br0 ifindex=9 + * sw1p1 ifindex=2 mark=2 + * sw1p2 ifindex=3 mark=2 + * sw2p1 ifindex=4 mark=5 + * sw2p2 ifindex=5 mark=5 + * + * If sw2p2 leaves the bridge, we'll have: + * + * br0 ifindex=9 + * sw1p1 ifindex=2 mark=2 + * sw1p2 ifindex=3 mark=2 + * sw2p1 ifindex=4 mark=4 + * sw2p2 ifindex=5
[RFC PATCH net-next v2 2/5] net: add phys ID compare helper to test if two IDs are the same
From: Scott Feldman Signed-off-by: Scott Feldman --- include/linux/netdevice.h |7 +++ net/switchdev/switchdev.c |8 ++-- 2 files changed, 9 insertions(+), 6 deletions(-) diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 7be616e1..63090ce 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -766,6 +766,13 @@ struct netdev_phys_item_id { unsigned char id_len; }; +static inline bool netdev_phys_item_id_same(struct netdev_phys_item_id *a, + struct netdev_phys_item_id *b) +{ + return ((a->id_len == b->id_len) && + (memcmp(a->id, b->id, a->id_len) == 0)); +} + typedef u16 (*select_queue_fallback_t)(struct net_device *dev, struct sk_buff *skb); diff --git a/net/switchdev/switchdev.c b/net/switchdev/switchdev.c index a5d0f8e..00c67a5 100644 --- a/net/switchdev/switchdev.c +++ b/net/switchdev/switchdev.c @@ -791,13 +791,9 @@ static struct net_device *switchdev_get_dev_by_nhs(struct fib_info *fi) if (switchdev_port_attr_get(dev, &attr)) return NULL; - if (nhsel > 0) { - if (prev_attr.u.ppid.id_len != attr.u.ppid.id_len) + if (nhsel > 0 && + !netdev_phys_item_id_same(&prev_attr.u.ppid, &attr.u.ppid)) return NULL; - if (memcmp(prev_attr.u.ppid.id, attr.u.ppid.id, - attr.u.ppid.id_len)) - return NULL; - } prev_attr = attr; } -- 1.7.10.4 -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[RFC PATCH net-next v2 5/5] switchdev: update documentation for offload_fwd_mark
From: Scott Feldman Signed-off-by: Scott Feldman --- Documentation/networking/switchdev.txt | 14 -- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/Documentation/networking/switchdev.txt b/Documentation/networking/switchdev.txt index c5d7ade..b864e47 100644 --- a/Documentation/networking/switchdev.txt +++ b/Documentation/networking/switchdev.txt @@ -279,8 +279,18 @@ and unknown unicast packets to all ports in domain, if allowed by port's current STP state. The switch driver, knowing which ports are within which vlan L2 domain, can program the switch device for flooding. The packet should also be sent to the port netdev for processing by the bridge driver. The -bridge should not reflood the packet to the same ports the device flooded. -XXX: the mechanism to avoid duplicate flood packets is being discuseed. +bridge should not reflood the packet to the same ports the device flooded, +otherwise there will be duplicate packets on the wire. + +To avoid duplicate packets, the device/driver can mark a packet as already +forwarded using skb->offload_fwd_mark. The same mark is set on the device +ports in the domain using dev->offload_fwd_mark. If the skb->offload_fwd_mark +is non-zero and matches the forwarding egress port's dev->skb_mark, the kernel +will drop the skb right before transmit on the egress port, with the +understanding that the device already forwarded the packet on same egress port. +The driver can use switchdev_port_fwd_mark_set() to set a globally unique mark +for port's dev->offload_fwd_mark, based on the port's parent ID (switch ID) and +a group ifindex. It is possible for the switch device to not handle flooding and push the packets up to the bridge driver for flooding. This is not ideal as the number -- 1.7.10.4 -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[RFC PATCH net-next v2 4/5] rocker: add offload_fwd_mark support
From: Scott Feldman If device flags ingress packet as "fwd offload", mark the skb->offlaod_fwd_mark using the ingress port's dev->offlaod_fwd_mark. This will be the hint to the kernel that this packet has already been forwarded by device to egress ports matching skb->offlaod_fwd_mark. For rocker, derive port dev->offlaod_fwd_mark based on device switch ID and port ifindex. If port is bridged, use the bridge ifindex rather than the port ifindex. Signed-off-by: Scott Feldman --- drivers/net/ethernet/rocker/rocker.c | 14 +- drivers/net/ethernet/rocker/rocker.h |1 + 2 files changed, 14 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/rocker/rocker.c b/drivers/net/ethernet/rocker/rocker.c index d4ec660..b72674c 100644 --- a/drivers/net/ethernet/rocker/rocker.c +++ b/drivers/net/ethernet/rocker/rocker.c @@ -4701,6 +4701,7 @@ static int rocker_port_rx_proc(const struct rocker *rocker, const struct rocker_tlv *attrs[ROCKER_TLV_RX_MAX + 1]; struct sk_buff *skb = rocker_desc_cookie_ptr_get(desc_info); size_t rx_len; + u16 rx_flags = 0; if (!skb) return -ENOENT; @@ -4708,6 +4709,8 @@ static int rocker_port_rx_proc(const struct rocker *rocker, rocker_tlv_parse_desc(attrs, ROCKER_TLV_RX_MAX, desc_info); if (!attrs[ROCKER_TLV_RX_FRAG_LEN]) return -EINVAL; + if (attrs[ROCKER_TLV_RX_FLAGS]) + rx_flags = rocker_tlv_get_u16(attrs[ROCKER_TLV_RX_FLAGS]); rocker_dma_rx_ring_skb_unmap(rocker, attrs); @@ -4715,6 +4718,9 @@ static int rocker_port_rx_proc(const struct rocker *rocker, skb_put(skb, rx_len); skb->protocol = eth_type_trans(skb, rocker_port->dev); + if (rx_flags & ROCKER_RX_FLAGS_FWD_OFFLOAD) + skb->offload_fwd_mark = rocker_port->dev->offload_fwd_mark; + rocker_port->dev->stats.rx_packets++; rocker_port->dev->stats.rx_bytes += skb->len; @@ -4852,11 +4858,13 @@ static int rocker_probe_port(struct rocker *rocker, unsigned int port_number) } rocker->ports[port_number] = rocker_port; + switchdev_port_fwd_mark_set(rocker_port->dev, NULL, false); + rocker_port_set_learning(rocker_port, SWITCHDEV_TRANS_NONE); err = rocker_port_ig_tbl(rocker_port, SWITCHDEV_TRANS_NONE, 0); if (err) { - dev_err(&pdev->dev, "install ig port table failed\n"); + netdev_err(rocker_port->dev, "install ig port table failed\n"); goto err_port_ig_tbl; } @@ -4876,6 +4884,7 @@ err_untagged_vlan: rocker_port_ig_tbl(rocker_port, SWITCHDEV_TRANS_NONE, ROCKER_OP_FLAG_REMOVE); err_port_ig_tbl: + rocker->ports[port_number] = NULL; unregister_netdev(dev); err_register_netdev: free_netdev(dev); @@ -5131,6 +5140,7 @@ static int rocker_port_bridge_join(struct rocker_port *rocker_port, rocker_port_internal_vlan_id_get(rocker_port, bridge->ifindex); rocker_port->bridge_dev = bridge; + switchdev_port_fwd_mark_set(rocker_port->dev, bridge, true); return rocker_port_vlan_add(rocker_port, SWITCHDEV_TRANS_NONE, untagged_vid, 0); @@ -5151,6 +5161,8 @@ static int rocker_port_bridge_leave(struct rocker_port *rocker_port) rocker_port_internal_vlan_id_get(rocker_port, rocker_port->dev->ifindex); + switchdev_port_fwd_mark_set(rocker_port->dev, rocker_port->bridge_dev, + false); rocker_port->bridge_dev = NULL; err = rocker_port_vlan_add(rocker_port, SWITCHDEV_TRANS_NONE, diff --git a/drivers/net/ethernet/rocker/rocker.h b/drivers/net/ethernet/rocker/rocker.h index c61fbf9..f846c0d 100644 --- a/drivers/net/ethernet/rocker/rocker.h +++ b/drivers/net/ethernet/rocker/rocker.h @@ -245,6 +245,7 @@ enum { #define ROCKER_RX_FLAGS_TCPBIT(5) #define ROCKER_RX_FLAGS_UDPBIT(6) #define ROCKER_RX_FLAGS_TCP_UDP_CSUM_GOOD BIT(7) +#define ROCKER_RX_FLAGS_FWD_OFFLOADBIT(8) enum { ROCKER_TLV_TX_UNSPEC, -- 1.7.10.4 -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[RFC PATCH net-next v2 0/5] switchdev: avoid duplicate packet forwarding
From: Scott Feldman (RFC because we're at rc7+ now) v2: - s/fwd_mark/offload_fwd_mark - use consume_skb rather than kfree_skb when dropping pkt on egress. - Use Jiri's suggestion to use ifindex of one of the ports in a group as the mark for all the ports in the group. This can be done with no additional storage (no hashtable from v1). To pull it off, we need some simple recursive routines to walk the netdev tree ensuring all leaves in the tree (ports) in the same group (e.g. bridge) belonging to the same switch device will have the same offload fwd mark. Maybe someone sees a better design for the recusive routines? They're not too bad, and should cover the stacked driver cases. v1: With switchdev support for offloading L2/L3 forwarding data path to a switch device, we have a general problem where both the device and the kernel may forward the packet, resulting in duplicate packets on the wire. Anytime a packet is forwarded by the device and a copy is sent to the CPU, there is potential for duplicate forwarding, as the kernel may also do a forwarding lookup and send the packet on the wire. The specific problem this patch series is interested in solving is avoiding duplicate packets on bridged ports. There was a previous RFC from Roopa (http://marc.info/?l=linux-netdev&m=142687073314252&w=2) to address this problem, but didn't solve the problem of mixed ports in the bridge from different devices; there was no way to exclude some ports from forwarding and include others. This RFC solves that problem by tagging the ingressing packet with a unique mark, and then comparing the packet mark with the egress port mark, and skip forwarding when there is a match. For the mixed ports bridge case, only those ports with matching marks are skipped. The switchdev port driver must do two things: 1) Generate a fwd_mark for each switch port, using some unique key of the switch device (and optionally port). This is done when the port netdev is registered or if the port's group membership changes (joins/leaves a bridge, for example). 2) On packet ingress from port, mark the skb with the ingress port's fwd_mark. If the device supports it, it's useful to only mark skbs which were already forwarded by the device. If the device does not support such indication, all skbs can be marked, even if they're local dst. Two new 32-bit fields are added to struct sk_buff and struct netdevice to hold the fwd_mark. I've wrapped these with CONFIG_NET_SWITCHDEV for now. I tried using skb->mark for this purpose, but ebtables can overwrite the skb->mark before the bridge gets it, so that will not work. In general, this fwd_mark can be used for any case where a packet is forwarded by the device and a copy is sent to the CPU, to avoid the kernel re-forwarding the packet. sFlow is another use-case that comes to mind, but I haven't explored the details. Scott Feldman (5): net: don't reforward packets already forwarded by offload device net: add phys ID compare helper to test if two IDs are the same switchdev: add offload_fwd_mark generator helper rocker: add offload_fwd_mark support switchdev: update documentation for offload_fwd_mark Documentation/networking/switchdev.txt | 14 +++- drivers/net/ethernet/rocker/rocker.c | 14 +++- drivers/net/ethernet/rocker/rocker.h |1 + include/linux/netdevice.h | 13 include/linux/skbuff.h |4 ++ include/net/switchdev.h|9 +++ net/core/dev.c | 10 +++ net/switchdev/switchdev.c | 111 ++-- 8 files changed, 167 insertions(+), 9 deletions(-) -- 1.7.10.4 -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH net-next] bridge: del external_learned fdbs from device on flush or ageout
From: Scott Feldman We need to delete from offload the device externally learnded fdbs when any one of these events happen: 1) Bridge ages out fdb. (When bridge is doing ageing vs. device doing ageing. If device is doing ageing, it would send SWITCHDEV_FDB_DEL directly). 2) STP state change flushes fdbs on port. 3) User uses sysfs interface to flush fdbs from bridge or bridge port: echo 1 >/sys/class/net/BR_DEV/bridge/flush echo 1 >/sys/class/net/BR_PORT/brport/flush 4) Offload driver send event SWITCHDEV_FDB_DEL to delete fdb entry. For rocker, we can now get called to delete fdb entry in wait and nowait contexts, so set NOWAIT flag when deleting fdb entry. Signed-off-by: Scott Feldman --- Documentation/networking/switchdev.txt | 11 ++- drivers/net/ethernet/rocker/rocker.c |2 +- net/bridge/br_fdb.c| 17 + 3 files changed, 20 insertions(+), 10 deletions(-) diff --git a/Documentation/networking/switchdev.txt b/Documentation/networking/switchdev.txt index 4a94ebc..e460007 100644 --- a/Documentation/networking/switchdev.txt +++ b/Documentation/networking/switchdev.txt @@ -251,15 +251,8 @@ out stale FDB entries. To keep an FDB entry "alive", the driver should refresh the FDB entry by calling call_switchdev_notifiers(SWITCHDEV_FDB_ADD, ...). The notification will reset the FDB entry's last-used time to now. The driver should rate limit refresh notifications, for example, no more than once a -second. If the FDB entry expires, ndo_fdb_del is called to remove entry from -the device. XXX: this last part isn't currently correct: ndo_fdb_del isn't -called, so the stale entry remains in device...this need to get fixed. - -FDB Flush -^ - -XXX: Unimplemented. Need to support FDB flush by bridge driver for port and -remove both static and learned FDB entries. +second. If the FDB entry expires, fdb_delete is called to remove entry from +the device. STP State Change on Port diff --git a/drivers/net/ethernet/rocker/rocker.c b/drivers/net/ethernet/rocker/rocker.c index 3aa6caf..22d68c1 100644 --- a/drivers/net/ethernet/rocker/rocker.c +++ b/drivers/net/ethernet/rocker/rocker.c @@ -4392,7 +4392,7 @@ static int rocker_port_fdb_del(struct rocker_port *rocker_port, const struct switchdev_obj_fdb *fdb) { __be16 vlan_id = rocker_port_vid_to_vlan(rocker_port, fdb->vid, NULL); - int flags = ROCKER_OP_FLAG_REMOVE; + int flags = ROCKER_OP_FLAG_NOWAIT | ROCKER_OP_FLAG_REMOVE; if (!rocker_port_is_bridged(rocker_port)) return -EINVAL; diff --git a/net/bridge/br_fdb.c b/net/bridge/br_fdb.c index 13949a7..be84b7e 100644 --- a/net/bridge/br_fdb.c +++ b/net/bridge/br_fdb.c @@ -24,6 +24,7 @@ #include #include #include +#include #include "br_private.h" static struct kmem_cache *br_fdb_cache __read_mostly; @@ -130,11 +131,27 @@ static void fdb_del_hw_addr(struct net_bridge *br, const unsigned char *addr) } } +static void fdb_del_external_learn(struct net_bridge_fdb_entry *f) +{ + struct switchdev_obj obj = { + .id = SWITCHDEV_OBJ_PORT_FDB, + .u.fdb = { + .addr = f->addr.addr, + .vid = f->vlan_id, + }, + }; + + switchdev_port_obj_del(f->dst->dev, &obj); +} + static void fdb_delete(struct net_bridge *br, struct net_bridge_fdb_entry *f) { if (f->is_static) fdb_del_hw_addr(br, f->addr.addr); + if (f->added_by_external_learn) + fdb_del_external_learn(f); + hlist_del_rcu(&f->hlist); fdb_notify(br, f, RTM_DELNEIGH); call_rcu(&f->rcu, fdb_rcu_free); -- 1.7.10.4 -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[RFC PATCH net-next 2/4] switchdev: add fwd_mark generator helper
From: Scott Feldman skb->fwd_mark and dev->fwd_mark are 32-bit and should be unique for device and maybe even unique for a sub-set of ports within device, so add switchdev helper function to generate unique marks based on driver-supplied key. Typically, the driver would use device switch ID for key, and maybe additional fields in key for grouped ports such as bridge ifindex. The key can be of arbitrary length. The generator uses a global hash table to store fwd_marks hashed by key. Signed-off-by: Scott Feldman --- include/net/switchdev.h |6 net/switchdev/switchdev.c | 72 + 2 files changed, 78 insertions(+) diff --git a/include/net/switchdev.h b/include/net/switchdev.h index 437f8fe..6eaceee 100644 --- a/include/net/switchdev.h +++ b/include/net/switchdev.h @@ -157,6 +157,7 @@ int switchdev_port_fdb_del(struct ndmsg *ndm, struct nlattr *tb[], int switchdev_port_fdb_dump(struct sk_buff *skb, struct netlink_callback *cb, struct net_device *dev, struct net_device *filter_dev, int idx); +u32 switchdev_mark_get(void *key, size_t key_len); #else @@ -271,6 +272,11 @@ static inline int switchdev_port_fdb_dump(struct sk_buff *skb, return -EOPNOTSUPP; } +static inline u32 switchdev_mark_get(void *key, size_t key_len) +{ + return 0; +} + #endif #endif /* _LINUX_SWITCHDEV_H_ */ diff --git a/net/switchdev/switchdev.c b/net/switchdev/switchdev.c index a5d0f8e..9ca37b3 100644 --- a/net/switchdev/switchdev.c +++ b/net/switchdev/switchdev.c @@ -16,6 +16,8 @@ #include #include #include +#include +#include #include #include @@ -924,3 +926,73 @@ void switchdev_fib_ipv4_abort(struct fib_info *fi) fi->fib_net->ipv4.fib_offload_disabled = true; } EXPORT_SYMBOL_GPL(switchdev_fib_ipv4_abort); + +#define SWITCHDEV_MARK_HT_BITS 5 +static DEFINE_HASHTABLE(switchdev_mark_ht, SWITCHDEV_MARK_HT_BITS); +static DEFINE_SPINLOCK(switchdev_mark_lock); +static u32 switchdev_mark_next = 1; + +/** + * switchdev_mark_get - Generate a unique mark for key + * + * @key: key used to generate mark + * @key_len: length of key in bytes + * + * Returns unqiue 32-bit mark for given key, or 0 if error. + * A small global hash table stores the marks for each key. + * The length of the key and key contents are arbitrary. + * The marks can be used, for example, to skb->fwd_mark a pkt + * to associate the skb with a key. + */ +u32 switchdev_mark_get(void *key, size_t key_len) +{ + struct switchdev_mark_ht_entry { + struct hlist_node entry; + void *key; + size_t key_len; + u32 key_crc32; + u32 mark; + } *entry; + u32 key_crc32 = crc32(~0, key, key_len); + u32 mark = 0; + unsigned long flags; + + spin_lock_irqsave(&switchdev_mark_lock, flags); + hash_for_each_possible(switchdev_mark_ht, entry, + entry, key_crc32) { + if (entry->key_len != key_len) + continue; + if (memcmp(entry->key, key, key_len) == 0) { + mark = entry->mark; + break; + } + } + spin_unlock_irqrestore(&switchdev_mark_lock, flags); + + if (mark) + goto out; + + entry = kmalloc(GFP_KERNEL, sizeof(*entry)); + if (!entry) + goto out; + + entry->key = kmalloc(GFP_KERNEL, key_len); + if (!entry->key) { + kfree(entry); + goto out; + } + + memcpy(entry->key, key, key_len); + entry->key_len = key_len; + entry->key_crc32 = key_crc32; + + spin_lock_irqsave(&switchdev_mark_lock, flags); + mark = switchdev_mark_next++; + entry->mark = mark; + hash_add(switchdev_mark_ht, &entry->entry, key_crc32); + spin_unlock_irqrestore(&switchdev_mark_lock, flags); + +out: + return mark; +} +EXPORT_SYMBOL_GPL(switchdev_mark_get); -- 1.7.10.4 -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[RFC PATCH net-next 3/4] rocker: add fwd_mark support
From: Scott Feldman If device flags ingress packet as "fwd offload", mark the skb->fwd_mark using the ingress port's dev->fwd_mark. This will be the hint to the kernel that this packet has already been forwarded by device to egress ports matching skb->fwd_mark. For rocker, derive port dev->fwd_mark based on device switch ID. If port is bridged, include the bridge's ifindex in the key for deriving dev->fwd_mark. Signed-off-by: Scott Feldman --- drivers/net/ethernet/rocker/rocker.c | 24 drivers/net/ethernet/rocker/rocker.h |1 + 2 files changed, 25 insertions(+) diff --git a/drivers/net/ethernet/rocker/rocker.c b/drivers/net/ethernet/rocker/rocker.c index a06b93d..81407d8 100644 --- a/drivers/net/ethernet/rocker/rocker.c +++ b/drivers/net/ethernet/rocker/rocker.c @@ -4701,6 +4701,7 @@ static int rocker_port_rx_proc(const struct rocker *rocker, const struct rocker_tlv *attrs[ROCKER_TLV_RX_MAX + 1]; struct sk_buff *skb = rocker_desc_cookie_ptr_get(desc_info); size_t rx_len; + u16 rx_flags = 0; if (!skb) return -ENOENT; @@ -4708,6 +4709,8 @@ static int rocker_port_rx_proc(const struct rocker *rocker, rocker_tlv_parse_desc(attrs, ROCKER_TLV_RX_MAX, desc_info); if (!attrs[ROCKER_TLV_RX_FRAG_LEN]) return -EINVAL; + if (attrs[ROCKER_TLV_RX_FLAGS]) + rx_flags = rocker_tlv_get_u16(attrs[ROCKER_TLV_RX_FLAGS]); rocker_dma_rx_ring_skb_unmap(rocker, attrs); @@ -4715,6 +4718,9 @@ static int rocker_port_rx_proc(const struct rocker *rocker, skb_put(skb, rx_len); skb->protocol = eth_type_trans(skb, rocker_port->dev); + if (rx_flags & ROCKER_RX_FLAGS_FWD_OFFLOAD) + skb->fwd_mark = rocker_port->dev->fwd_mark; + rocker_port->dev->stats.rx_packets++; rocker_port->dev->stats.rx_bytes += skb->len; @@ -4814,6 +4820,21 @@ static void rocker_port_dev_addr_init(struct rocker_port *rocker_port) } } +static void rocker_port_fwd_mark_set(struct rocker_port *rocker_port) +{ + struct rocker *rocker = rocker_port->rocker; + struct { + u64 hw_id; + int ifindex; + } key = { + .hw_id = rocker->hw.id, + .ifindex = rocker_port_is_bridged(rocker_port) ? + rocker_port->bridge_dev->ifindex : 0, + }; + + rocker_port->dev->fwd_mark = switchdev_mark_get(&key, sizeof(key)); +} + static int rocker_probe_port(struct rocker *rocker, unsigned int port_number) { const struct pci_dev *pdev = rocker->pdev; @@ -4832,6 +4853,7 @@ static int rocker_probe_port(struct rocker *rocker, unsigned int port_number) rocker_port->pport = port_number + 1; rocker_port->brport_flags = BR_LEARNING | BR_LEARNING_SYNC; INIT_LIST_HEAD(&rocker_port->trans_mem); + rocker_port_fwd_mark_set(rocker_port); rocker_port_dev_addr_init(rocker_port); dev->netdev_ops = &rocker_port_netdev_ops; @@ -5131,6 +5153,7 @@ static int rocker_port_bridge_join(struct rocker_port *rocker_port, rocker_port_internal_vlan_id_get(rocker_port, bridge->ifindex); rocker_port->bridge_dev = bridge; + rocker_port_fwd_mark_set(rocker_port); return rocker_port_vlan_add(rocker_port, SWITCHDEV_TRANS_NONE, untagged_vid, 0); @@ -5152,6 +5175,7 @@ static int rocker_port_bridge_leave(struct rocker_port *rocker_port) rocker_port->dev->ifindex); rocker_port->bridge_dev = NULL; + rocker_port_fwd_mark_set(rocker_port); err = rocker_port_vlan_add(rocker_port, SWITCHDEV_TRANS_NONE, untagged_vid, 0); diff --git a/drivers/net/ethernet/rocker/rocker.h b/drivers/net/ethernet/rocker/rocker.h index c61fbf9..f846c0d 100644 --- a/drivers/net/ethernet/rocker/rocker.h +++ b/drivers/net/ethernet/rocker/rocker.h @@ -245,6 +245,7 @@ enum { #define ROCKER_RX_FLAGS_TCPBIT(5) #define ROCKER_RX_FLAGS_UDPBIT(6) #define ROCKER_RX_FLAGS_TCP_UDP_CSUM_GOOD BIT(7) +#define ROCKER_RX_FLAGS_FWD_OFFLOADBIT(8) enum { ROCKER_TLV_TX_UNSPEC, -- 1.7.10.4 -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[RFC PATCH net-next 4/4] switchdev: update documentation for fwd_mark
From: Scott Feldman Signed-off-by: Scott Feldman --- Documentation/networking/switchdev.txt | 13 +++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/Documentation/networking/switchdev.txt b/Documentation/networking/switchdev.txt index da82cd7..d6a8695 100644 --- a/Documentation/networking/switchdev.txt +++ b/Documentation/networking/switchdev.txt @@ -286,8 +286,17 @@ and unknown unicast packets to all ports in domain, if allowed by port's current STP state. The switch driver, knowing which ports are within which vlan L2 domain, can program the switch device for flooding. The packet should also be sent to the port netdev for processing by the bridge driver. The -bridge should not reflood the packet to the same ports the device flooded. -XXX: the mechanism to avoid duplicate flood packets is being discuseed. +bridge should not reflood the packet to the same ports the device flooded, +otherwise there will be duplicate packets on the wire. + +To avoid duplicate packets, the device/driver can mark a packet as already +forwarded using skb->fwd_mark. The same mark is set on the device ports in the +domain using dev->fwd_mark. If the skb->fwd_mark is non-zero and matches the +forwarding egress port's dev->skb_mark, the kernel will drop the skb right +before transmit on the egress port, with the understanding that the device +already forwarded the packet on same egress port. The driver can use +switchdev_mark_get() to get a globally unique mark for egress port(s)' +dev->fwd_mark, based on a driver/device-supplied key. It is possible for the switch device to not handle flooding and push the packets up to the bridge driver for flooding. This is not ideal as the number -- 1.7.10.4 -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[RFC PATCH net-next 0/4] switchdev: avoid duplicate packet forwarding
From: Scott Feldman (RFC because we're at rc7+ now) With switchdev support for offloading L2/L3 forwarding data path to a switch device, we have a general problem where both the device and the kernel may forward the packet, resulting in duplicate packets on the wire. Anytime a packet is forwarded by the device and a copy is sent to the CPU, there is potential for duplicate forwarding, as the kernel may also do a forwarding lookup and send the packet on the wire. The specific problem this patch series is interested in solving is avoiding duplicate packets on bridged ports. There was a previous RFC from Roopa (http://marc.info/?l=linux-netdev&m=142687073314252&w=2) to address this problem, but didn't solve the problem of mixed ports in the bridge from different devices; there was no way to exclude some ports from forwarding and include others. This RFC solves that problem by tagging the ingressing packet with a unique mark, and then comparing the packet mark with the egress port mark, and skip forwarding when there is a match. For the mixed ports bridge case, only those ports with matching marks are skipped. The switchdev port driver must do two things: 1) Generate a fwd_mark for each switch port, using some unique key of the switch device (and optionally port). This is a one-time operation done when port's netdev is setup. 2) On packet ingress from port, mark the skb with the ingress port's fwd_mark. If the device supports it, it's useful to only mark skbs which were already forwarded by the device. If the device does not support such indication, all skbs can be marked, even if they're local dst. Two new 32-bit fields are added to struct sk_buff and struct netdevice to hold the fwd_mark. I've wrapped these with CONFIG_NET_SWITCHDEV for now. I tried using skb->mark for this purpose, but ebtables can overwrite the skb->mark before the bridge gets it, so that will not work. In general, this fwd_mark can be used for any case where a packet is forwarded by the device and a copy is sent to the CPU, to avoid the kernel re-forwarding the packet. sFlow is another use-case that comes to mind, but I haven't explored the details. Scott Feldman (4): net: don't reforward packets already forwarded by offload device switchdev: add fwd_mark generator helper rocker: add fwd_mark support switchdev: update documentation for fwd_mark Documentation/networking/switchdev.txt | 13 +- drivers/net/ethernet/rocker/rocker.c | 24 +++ drivers/net/ethernet/rocker/rocker.h |1 + include/linux/netdevice.h |6 +++ include/linux/skbuff.h |4 ++ include/net/switchdev.h|6 +++ net/core/dev.c |9 net/switchdev/switchdev.c | 72 8 files changed, 133 insertions(+), 2 deletions(-) -- 1.7.10.4 -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[RFC PATCH net-next 1/4] net: don't reforward packets already forwarded by offload device
From: Scott Feldman Just before queuing skb for xmit on port, check if skb has been marked by switchdev port driver as already fordwarded by device. If so, drop skb. A non-zero skb->fwd_mark field is set by the switchdev port driver/device on ingress to indicate the skb has already been forwarded by the device to egress ports with matching dev->skb_mark. The switchdev port driver would assign a non-zero dev->skb_mark for each device port netdev during registration, for example. Signed-off-by: Scott Feldman --- include/linux/netdevice.h |6 ++ include/linux/skbuff.h|4 net/core/dev.c|9 + 3 files changed, 19 insertions(+) diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 6f5f71f..181b08f 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1444,6 +1444,8 @@ enum netdev_priv_flags { * * @xps_maps: XXX: need comments on this one * + * @fwd_mark: Offload device fwding mark + * * @trans_start: Time (in jiffies) of last Tx * @watchdog_timeo:Represents the timeout that is used by * the watchdog ( see dev_watchdog() ) @@ -1681,6 +1683,10 @@ struct net_device { struct xps_dev_maps __rcu *xps_maps; #endif +#ifdef CONFIG_NET_SWITCHDEV + u32 fwd_mark; +#endif + /* These may be needed for future network-power-down code. */ /* diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index cc612fc..ba98c05 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -501,6 +501,7 @@ static inline u32 skb_mstamp_us_delta(const struct skb_mstamp *t1, * @no_fcs: Request NIC to treat last 4 bytes as Ethernet FCS *@napi_id: id of the NAPI struct this skb came from * @secmark: security marking + * @fwd_mark: fwding offload mark * @mark: Generic packet mark * @vlan_proto: vlan encapsulation protocol * @vlan_tci: vlan tag control information @@ -648,6 +649,9 @@ struct sk_buff { #ifdef CONFIG_NETWORK_SECMARK __u32 secmark; #endif +#ifdef CONFIG_NET_SWITCHDEV + __u32 fwd_mark; +#endif union { __u32 mark; __u32 reserved_tailroom; diff --git a/net/core/dev.c b/net/core/dev.c index 6778a99..558bf33 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -3065,6 +3065,15 @@ static int __dev_queue_xmit(struct sk_buff *skb, void *accel_priv) else skb_dst_force(skb); +#ifdef CONFIG_NET_SWITCHDEV + /* Don't forward if offload device already forwarded */ + if (skb->fwd_mark && skb->fwd_mark == dev->fwd_mark) { + kfree_skb(skb); + rc = NET_XMIT_SUCCESS; + goto out; + } +#endif + txq = netdev_pick_tx(dev, skb, accel_priv); q = rcu_dereference_bh(txq->qdisc); -- 1.7.10.4 -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH net-next 3/5] rocker: mark STP update as 'no wait' processing
From: Scott Feldman We can get STP updates from the bridge driver in atomic and non-atomic contexts. Since we can't test what context we're getting called in, do the STP processing as 'no wait', which will cover all cases. Signed-off-by: Scott Feldman --- drivers/net/ethernet/rocker/rocker.c |3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/rocker/rocker.c b/drivers/net/ethernet/rocker/rocker.c index 1995b59..6c15c2e 100644 --- a/drivers/net/ethernet/rocker/rocker.c +++ b/drivers/net/ethernet/rocker/rocker.c @@ -4286,7 +4286,8 @@ static int rocker_port_attr_set(struct net_device *dev, switch (attr->id) { case SWITCHDEV_ATTR_PORT_STP_STATE: - err = rocker_port_stp_update(rocker_port, attr->trans, 0, + err = rocker_port_stp_update(rocker_port, attr->trans, +ROCKER_OP_FLAG_NOWAIT, attr->u.stp_state); break; case SWITCHDEV_ATTR_PORT_BRIDGE_FLAGS: -- 1.7.10.4 -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH net-next 1/5] rocker: revert back to support for nowait processes
From: Scott Feldman One of the items removed from the rocker driver in the Spring Cleanup patch series was the ability to mark processing in the driver as "no wait" for those contexts where we cannot sleep. Turns out, we have "no wait" contexts where we want to program the device. So re-add the ROCKER_OP_FLAG_NOWAIT flag to mark such processes, and propagate flags to mem allocator and to the device cmd executor. With NOWAIT, mem allocs are GFP_ATOMIC and device cmds are queued to the device, but the driver will not wait (sleep) for the response back from the device. My bad for removing NOWAIT support in the first place; I thought we could swing non-sleep contexts to process context using a work queue, for example, but there is push-back to keep processing in original context. Signed-off-by: Scott Feldman --- drivers/net/ethernet/rocker/rocker.c | 202 +++--- 1 file changed, 112 insertions(+), 90 deletions(-) diff --git a/drivers/net/ethernet/rocker/rocker.c b/drivers/net/ethernet/rocker/rocker.c index a9d1559..c1910c1 100644 --- a/drivers/net/ethernet/rocker/rocker.c +++ b/drivers/net/ethernet/rocker/rocker.c @@ -326,10 +326,18 @@ static bool rocker_port_is_bridged(const struct rocker_port *rocker_port) return !!rocker_port->bridge_dev; } +#define ROCKER_OP_FLAG_REMOVE BIT(0) +#define ROCKER_OP_FLAG_NOWAIT BIT(1) +#define ROCKER_OP_FLAG_LEARNED BIT(2) +#define ROCKER_OP_FLAG_REFRESH BIT(3) + static void *__rocker_port_mem_alloc(struct rocker_port *rocker_port, -enum switchdev_trans trans, size_t size) +enum switchdev_trans trans, int flags, +size_t size) { struct list_head *elem = NULL; + gfp_t gfp_flags = (flags & ROCKER_OP_FLAG_NOWAIT) ? + GFP_ATOMIC : GFP_KERNEL; /* If in transaction prepare phase, allocate the memory * and enqueue it on a per-port list. If in transaction @@ -342,7 +350,7 @@ static void *__rocker_port_mem_alloc(struct rocker_port *rocker_port, switch (trans) { case SWITCHDEV_TRANS_PREPARE: - elem = kzalloc(size + sizeof(*elem), GFP_KERNEL); + elem = kzalloc(size + sizeof(*elem), gfp_flags); if (!elem) return NULL; list_add_tail(elem, &rocker_port->trans_mem); @@ -353,7 +361,7 @@ static void *__rocker_port_mem_alloc(struct rocker_port *rocker_port, list_del_init(elem); break; case SWITCHDEV_TRANS_NONE: - elem = kzalloc(size + sizeof(*elem), GFP_KERNEL); + elem = kzalloc(size + sizeof(*elem), gfp_flags); if (elem) INIT_LIST_HEAD(elem); break; @@ -365,16 +373,17 @@ static void *__rocker_port_mem_alloc(struct rocker_port *rocker_port, } static void *rocker_port_kzalloc(struct rocker_port *rocker_port, -enum switchdev_trans trans, size_t size) +enum switchdev_trans trans, int flags, +size_t size) { - return __rocker_port_mem_alloc(rocker_port, trans, size); + return __rocker_port_mem_alloc(rocker_port, trans, flags, size); } static void *rocker_port_kcalloc(struct rocker_port *rocker_port, -enum switchdev_trans trans, size_t n, -size_t size) +enum switchdev_trans trans, int flags, +size_t n, size_t size) { - return __rocker_port_mem_alloc(rocker_port, trans, n * size); + return __rocker_port_mem_alloc(rocker_port, trans, flags, n * size); } static void rocker_port_kfree(enum switchdev_trans trans, const void *mem) @@ -397,11 +406,13 @@ static void rocker_port_kfree(enum switchdev_trans trans, const void *mem) struct rocker_wait { wait_queue_head_t wait; bool done; + bool nowait; }; static void rocker_wait_reset(struct rocker_wait *wait) { wait->done = false; + wait->nowait = false; } static void rocker_wait_init(struct rocker_wait *wait) @@ -411,11 +422,12 @@ static void rocker_wait_init(struct rocker_wait *wait) } static struct rocker_wait *rocker_wait_create(struct rocker_port *rocker_port, - enum switchdev_trans trans) + enum switchdev_trans trans, + int flags) { struct rocker_wait *wait; - wait = rocker_port_kzalloc(rocker_port, trans, sizeof(*wait)); + wait = rocker_port_kzalloc(rocker_port, trans, flags, sizeof(*wait)); if (!wait) return NULL; rocker_wait_init(wait); @@ -1386,7 +1398,12 @@ static irqreturn_t rocker_cmd_ir
[PATCH net-next 4/5] rocker: move MAC learn event back to 'no wait' processing
From: Scott Feldman Signed-off-by: Scott Feldman --- drivers/net/ethernet/rocker/rocker.c | 40 +++--- 1 file changed, 3 insertions(+), 37 deletions(-) diff --git a/drivers/net/ethernet/rocker/rocker.c b/drivers/net/ethernet/rocker/rocker.c index 6c15c2e..8430cb3 100644 --- a/drivers/net/ethernet/rocker/rocker.c +++ b/drivers/net/ethernet/rocker/rocker.c @@ -1459,36 +1459,14 @@ static int rocker_port_fdb(struct rocker_port *rocker_port, const unsigned char *addr, __be16 vlan_id, int flags); -struct rocker_mac_vlan_seen_work { - struct work_struct work; - struct rocker_port *rocker_port; - int flags; - unsigned char addr[ETH_ALEN]; - __be16 vlan_id; -}; - -static void rocker_event_mac_vlan_seen_work(struct work_struct *work) -{ - const struct rocker_mac_vlan_seen_work *sw = - container_of(work, struct rocker_mac_vlan_seen_work, work); - - rtnl_lock(); - rocker_port_fdb(sw->rocker_port, SWITCHDEV_TRANS_NONE, - sw->addr, sw->vlan_id, sw->flags); - rtnl_unlock(); - - kfree(work); -} - static int rocker_event_mac_vlan_seen(const struct rocker *rocker, const struct rocker_tlv *info) { - struct rocker_mac_vlan_seen_work *sw; const struct rocker_tlv *attrs[ROCKER_TLV_EVENT_MAC_VLAN_MAX + 1]; unsigned int port_number; struct rocker_port *rocker_port; const unsigned char *addr; - int flags = ROCKER_OP_FLAG_LEARNED; + int flags = ROCKER_OP_FLAG_NOWAIT | ROCKER_OP_FLAG_LEARNED; __be16 vlan_id; rocker_tlv_parse_nested(attrs, ROCKER_TLV_EVENT_MAC_VLAN_MAX, info); @@ -1510,20 +1488,8 @@ static int rocker_event_mac_vlan_seen(const struct rocker *rocker, rocker_port->stp_state != BR_STATE_FORWARDING) return 0; - sw = kmalloc(sizeof(*sw), GFP_ATOMIC); - if (!sw) - return -ENOMEM; - - INIT_WORK(&sw->work, rocker_event_mac_vlan_seen_work); - - sw->rocker_port = rocker_port; - sw->flags = flags; - ether_addr_copy(sw->addr, addr); - sw->vlan_id = vlan_id; - - schedule_work(&sw->work); - - return 0; + return rocker_port_fdb(rocker_port, SWITCHDEV_TRANS_NONE, + addr, vlan_id, flags); } static int rocker_event_process(const struct rocker *rocker, -- 1.7.10.4 -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH net-next 5/5] rocker: move port stop to 'no wait' processing
From: Scott Feldman rocker_port_stop can be called from atomic and non-atomic contexts. Since we can't test what context we're getting called in, do the processing as 'no wait', which will cover all cases. Signed-off-by: Scott Feldman --- drivers/net/ethernet/rocker/rocker.c |3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/rocker/rocker.c b/drivers/net/ethernet/rocker/rocker.c index 8430cb3..a06b93d 100644 --- a/drivers/net/ethernet/rocker/rocker.c +++ b/drivers/net/ethernet/rocker/rocker.c @@ -4004,7 +4004,8 @@ static int rocker_port_stop(struct net_device *dev) rocker_port_set_enable(rocker_port, false); napi_disable(&rocker_port->napi_rx); napi_disable(&rocker_port->napi_tx); - rocker_port_fwd_disable(rocker_port, SWITCHDEV_TRANS_NONE, 0); + rocker_port_fwd_disable(rocker_port, SWITCHDEV_TRANS_NONE, + ROCKER_OP_FLAG_NOWAIT); free_irq(rocker_msix_rx_vector(rocker_port), rocker_port); free_irq(rocker_msix_tx_vector(rocker_port), rocker_port); rocker_port_dma_rings_fini(rocker_port); -- 1.7.10.4 -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html