[Bridge] [PATCH net-next v3 15/16] selftests: forwarding: lib: Add helpers to build IGMP/MLD leave packets

2023-02-02 Thread Petr Machata via Bridge
The testsuite that checks for mcast_max_groups functionality will need to
wipe the added groups as well. Add helpers to build an IGMP or MLD packets
announcing that host is leaving a given group.

Signed-off-by: Petr Machata 
Acked-by: Nikolay Aleksandrov 
---
 tools/testing/selftests/net/forwarding/lib.sh | 50 +++
 1 file changed, 50 insertions(+)

diff --git a/tools/testing/selftests/net/forwarding/lib.sh 
b/tools/testing/selftests/net/forwarding/lib.sh
index 12ef34ebcbbf..969e570f609e 100755
--- a/tools/testing/selftests/net/forwarding/lib.sh
+++ b/tools/testing/selftests/net/forwarding/lib.sh
@@ -1815,6 +1815,21 @@ igmpv3_is_in_get()
payload_template_expand_checksum "$igmpv3" $checksum
 }
 
+igmpv2_leave_get()
+{
+   local GRP=$1; shift
+
+   local payload=$(:
+   )"17:"$(: Type - Leave Group
+   )"00:"$(: Max Resp Time - not meaningful
+   )"CHECKSUM:"$(  : Checksum
+   )"$(ipv4_to_bytes $GRP)"$(  : Group Address
+   )
+   local checksum=$(payload_template_calc_checksum "$payload")
+
+   payload_template_expand_checksum "$payload" $checksum
+}
+
 mldv2_is_in_get()
 {
local SIP=$1; shift
@@ -1858,3 +1873,38 @@ mldv2_is_in_get()
 
payload_template_expand_checksum "$hbh$icmpv6" $checksum
 }
+
+mldv1_done_get()
+{
+   local SIP=$1; shift
+   local GRP=$1; shift
+
+   local hbh
+   local icmpv6
+
+   hbh=$(:
+   )"3a:"$(: Next Header - ICMPv6
+   )"00:"$(: Hdr Ext Len
+   )"00:00:00:00:00:00:"$( : Options and Padding
+   )
+
+   icmpv6=$(:
+   )"84:"$(: Type - MLDv1 Done
+   )"00:"$(: Code
+   )"CHECKSUM:"$(  : Checksum
+   )"00:00:"$( : Max Resp Delay - not 
meaningful
+   )"00:00:"$( : Reserved
+   )"$(ipv6_to_bytes $GRP):"$( : Multicast address
+   )
+
+   local len=$(u16_to_bytes $(payload_template_nbytes $icmpv6))
+   local sudohdr=$(:
+   )"$(ipv6_to_bytes $SIP):"$( : SIP
+   )"$(ipv6_to_bytes $GRP):"$( : DIP is multicast address
+   )"${len}:"$(: Upper-layer length
+   )"00:3a:"$( : Zero and next-header
+   )
+   local checksum=$(payload_template_calc_checksum ${sudohdr}${icmpv6})
+
+   payload_template_expand_checksum "$hbh$icmpv6" $checksum
+}
-- 
2.39.0



[Bridge] [PATCH net-next v3 16/16] selftests: forwarding: bridge_mdb_max: Add a new selftest

2023-02-02 Thread Petr Machata via Bridge
Add a suite covering mcast_n_groups and mcast_max_groups bridge features.

Signed-off-by: Petr Machata 
---

Notes:
v2:
- Adjust the tests that check setting max below n and
  reset of max on VLAN snooping enablement
- Make test naming uniform
- Enable testing of control path (IGMP/MLD) in
  mcast_vlan_snooping bridge
- Reorganize the code so that test instances (per bridge
  type and configuration type) always come right after
  the test, in order of {d,q,qvs}{4,6}{cfg,ctl}.
  Then groups of selftests are at the end of the file.
  Similarly adjust invocation order of the tests.

 .../testing/selftests/net/forwarding/Makefile |1 +
 .../net/forwarding/bridge_mdb_max.sh  | 1336 +
 2 files changed, 1337 insertions(+)
 create mode 100755 tools/testing/selftests/net/forwarding/bridge_mdb_max.sh

diff --git a/tools/testing/selftests/net/forwarding/Makefile 
b/tools/testing/selftests/net/forwarding/Makefile
index 453ae006fbcf..91201ab3c4fc 100644
--- a/tools/testing/selftests/net/forwarding/Makefile
+++ b/tools/testing/selftests/net/forwarding/Makefile
@@ -4,6 +4,7 @@ TEST_PROGS = bridge_igmp.sh \
bridge_locked_port.sh \
bridge_mdb.sh \
bridge_mdb_host.sh \
+   bridge_mdb_max.sh \
bridge_mdb_port_down.sh \
bridge_mld.sh \
bridge_port_isolation.sh \
diff --git a/tools/testing/selftests/net/forwarding/bridge_mdb_max.sh 
b/tools/testing/selftests/net/forwarding/bridge_mdb_max.sh
new file mode 100755
index ..ae255b662ba3
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/bridge_mdb_max.sh
@@ -0,0 +1,1336 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# +---+  ++
+# | H1 (vrf)  |  | H2 (vrf)   |
+# | + $h1.10  |  | + $h2.10   |
+# | | 192.0.2.1/28|  | | 192.0.2.2/28 |
+# | | 2001:db8:1::1/64|  | | 2001:db8:1::2/64 |
+# | | |  | |  |
+# | |  + $h1.20   |  | |  + $h2.20|
+# | \  | 198.51.100.1/24  |  | \  | 198.51.100.2/24   |
+# |  \ | 2001:db8:2::1/64 |  |  \ | 2001:db8:2::2/64  |
+# |   \|  |  |   \|   |
+# |+ $h1  |  |+ $h2   |
+# +|--+  +|---+
+#  |  |
+# +|--|---+
+# | SW |  |   |
+# | +--|--|-+ |
+# | |  + $swp1   BR0 (802.1q) + $swp2   | |
+# | | vid 10 vid 10 | |
+# | | vid 20 vid 20 | |
+# | |   | |
+# | +---+ |
+# +---+
+
+ALL_TESTS="
+   test_8021d
+   test_8021q
+   test_8021qvs
+"
+
+NUM_NETIFS=4
+source lib.sh
+source tc_common.sh
+
+h1_create()
+{
+   simple_if_init $h1
+   vlan_create $h1 10 v$h1 192.0.2.1/28 2001:db8:1::1/64
+   vlan_create $h1 20 v$h1 198.51.100.1/24 2001:db8:2::1/64
+}
+
+h1_destroy()
+{
+   vlan_destroy $h1 20
+   vlan_destroy $h1 10
+   simple_if_fini $h1
+}
+
+h2_create()
+{
+   simple_if_init $h2
+   vlan_create $h2 10 v$h2 192.0.2.2/28
+   vlan_create $h2 20 v$h2 198.51.100.2/24
+}
+
+h2_destroy()
+{
+   vlan_destroy $h2 20
+   vlan_destroy $h2 10
+   simple_if_fini $h2
+}
+
+switch_create_8021d()
+{
+   log_info "802.1d tests"
+
+   ip link add name br0 type bridge vlan_filtering 0 \
+   mcast_snooping 1 \
+   mcast_igmp_version 3 mcast_mld_version 2
+   ip link set dev br0 up
+
+   ip link set dev $swp1 master br0
+   ip link set dev $swp1 up
+   bridge link set dev $swp1 fastleave on
+
+   ip link set dev $swp2 master br0
+   ip link set dev $swp2 up
+}
+
+switch_create_8021q()
+{
+   local br_flags=$1; shift
+
+   log_info "802.1q $br_flags${br_flags:+ }tests"
+
+   ip link add name br0 type bridge vlan_filtering 1 vlan_default_pvid 0 \
+   mcast_snooping 1 $br_flags \
+   mcast_igmp_version 3 mcast_mld_version 2
+   bridge vlan add vid 10 dev br0 self
+   bridge vlan add vid 20 dev br0 self
+   ip link set dev br0 

[Bridge] [PATCH net-next v3 13/16] selftests: forwarding: lib: Parameterize IGMPv3/MLDv2 generation

2023-02-02 Thread Petr Machata
In order to generate IGMPv3 and MLDv2 packets on the fly, the
functions that generate these packets need to be able to generate
packets for different groups and different sources. Generating MLDv2
packets further needs the source address of the packet for purposes of
checksum calculation. Add the necessary parameters, and generate the
payload accordingly by dispatching to helpers added in the previous
patches.

Adjust the sole client, bridge_mdb.sh, as well.

Signed-off-by: Petr Machata 
Acked-by: Nikolay Aleksandrov 
---
 .../selftests/net/forwarding/bridge_mdb.sh|  9 ++---
 tools/testing/selftests/net/forwarding/lib.sh | 36 +--
 2 files changed, 31 insertions(+), 14 deletions(-)

diff --git a/tools/testing/selftests/net/forwarding/bridge_mdb.sh 
b/tools/testing/selftests/net/forwarding/bridge_mdb.sh
index 4e16677f02ba..b48867d8cadf 100755
--- a/tools/testing/selftests/net/forwarding/bridge_mdb.sh
+++ b/tools/testing/selftests/net/forwarding/bridge_mdb.sh
@@ -1029,7 +1029,7 @@ ctrl_igmpv3_is_in_test()
 
# IS_IN ( 192.0.2.2 )
$MZ $h1.10 -c 1 -A 192.0.2.1 -B 239.1.1.1 \
-   -t ip proto=2,p=$(igmpv3_is_in_get) -q
+   -t ip proto=2,p=$(igmpv3_is_in_get 239.1.1.1 192.0.2.2) -q
 
bridge -d mdb show dev br0 vid 10 | grep 239.1.1.1 | grep -q 192.0.2.2
check_fail $? "Permanent entry affected by IGMP packet"
@@ -1042,7 +1042,7 @@ ctrl_igmpv3_is_in_test()
 
# IS_IN ( 192.0.2.2 )
$MZ $h1.10 -c 1 -A 192.0.2.1 -B 239.1.1.1 \
-   -t ip proto=2,p=$(igmpv3_is_in_get) -q
+   -t ip proto=2,p=$(igmpv3_is_in_get 239.1.1.1 192.0.2.2) -q
 
bridge -d mdb show dev br0 vid 10 | grep 239.1.1.1 | grep -v "src" | \
grep -q 192.0.2.2
@@ -1067,8 +1067,9 @@ ctrl_mldv2_is_in_test()
filter_mode include source_list 2001:db8:1::1
 
# IS_IN ( 2001:db8:1::2 )
+   local p=$(mldv2_is_in_get fe80::1 ff0e::1 2001:db8:1::2)
$MZ -6 $h1.10 -c 1 -A fe80::1 -B ff0e::1 \
-   -t ip hop=1,next=0,p=$(mldv2_is_in_get) -q
+   -t ip hop=1,next=0,p="$p" -q
 
bridge -d mdb show dev br0 vid 10 | grep ff0e::1 | \
grep -q 2001:db8:1::2
@@ -1082,7 +1083,7 @@ ctrl_mldv2_is_in_test()
 
# IS_IN ( 2001:db8:1::2 )
$MZ -6 $h1.10 -c 1 -A fe80::1 -B ff0e::1 \
-   -t ip hop=1,next=0,p=$(mldv2_is_in_get) -q
+   -t ip hop=1,next=0,p="$p" -q
 
bridge -d mdb show dev br0 vid 10 | grep ff0e::1 | grep -v "src" | \
grep -q 2001:db8:1::2
diff --git a/tools/testing/selftests/net/forwarding/lib.sh 
b/tools/testing/selftests/net/forwarding/lib.sh
index b10c903d9abd..190e49e60508 100755
--- a/tools/testing/selftests/net/forwarding/lib.sh
+++ b/tools/testing/selftests/net/forwarding/lib.sh
@@ -1788,26 +1788,35 @@ payload_template_nbytes()
 
 igmpv3_is_in_get()
 {
+   local GRP=$1; shift
+   local IP=$1; shift
+
local igmpv3
 
+   # IS_IN ( $IP )
igmpv3=$(:
)"22:"$(: Type - Membership Report
)"00:"$(: Reserved
-   )"2a:f8:"$( : Checksum
+   )"CHECKSUM:"$(  : Checksum
)"00:00:"$( : Reserved
)"00:01:"$( : Number of Group Records
)"01:"$(: Record Type - IS_IN
)"00:"$(: Aux Data Len
)"00:01:"$( : Number of Sources
-   )"ef:01:01:01:"$(   : Multicast Address - 239.1.1.1
-   )"c0:00:02:02"$(: Source Address - 192.0.2.2
+   )"$(ipv4_to_bytes $GRP):"$( : Multicast Address
+   )"$(ipv4_to_bytes $IP)"$(   : Source Address
)
+   local checksum=$(payload_template_calc_checksum "$igmpv3")
 
-   echo $igmpv3
+   payload_template_expand_checksum "$igmpv3" $checksum
 }
 
 mldv2_is_in_get()
 {
+   local SIP=$1; shift
+   local GRP=$1; shift
+   local IP=$1; shift
+
local hbh
local icmpv6
 
@@ -1820,17 +1829,24 @@ mldv2_is_in_get()
icmpv6=$(:
)"8f:"$(: Type - MLDv2 Report
)"00:"$(: Code
-   )"45:39:"$( : Checksum
+   )"CHECKSUM:"$(  : Checksum
)"00:00:"$( : Reserved
)"00:01:"$( : Number of Group Records
)"01:"$(: Record Type - IS_IN
)"00:"$(: Aux Data Len
)"00:01:"$( : Number of Sources
-   )"ff:0e:00:00:00:00:00:00:"$(   : Multicast address - ff0e::1
-   

[Bridge] [PATCH net-next v3 10/16] selftests: forwarding: bridge_mdb: Fix a typo

2023-02-02 Thread Petr Machata via Bridge
Add the letter missing from the word "INCLUDE".

Signed-off-by: Petr Machata 
Reviewed-by: Ido Schimmel 
Acked-by: Nikolay Aleksandrov 
---
 tools/testing/selftests/net/forwarding/bridge_mdb.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/testing/selftests/net/forwarding/bridge_mdb.sh 
b/tools/testing/selftests/net/forwarding/bridge_mdb.sh
index 51f2b0d77067..4e16677f02ba 100755
--- a/tools/testing/selftests/net/forwarding/bridge_mdb.sh
+++ b/tools/testing/selftests/net/forwarding/bridge_mdb.sh
@@ -1054,7 +1054,7 @@ ctrl_igmpv3_is_in_test()
 
bridge mdb del dev br0 port $swp1 grp 239.1.1.1 vid 10
 
-   log_test "IGMPv3 MODE_IS_INCLUE tests"
+   log_test "IGMPv3 MODE_IS_INCLUDE tests"
 }
 
 ctrl_mldv2_is_in_test()
-- 
2.39.0



[Bridge] [PATCH net-next v3 14/16] selftests: forwarding: lib: Allow list of IPs for IGMPv3/MLDv2

2023-02-02 Thread Petr Machata via Bridge
The testsuite that checks for mcast_max_groups functionality will need
to generate IGMP and MLD packets with configurable number of (S,G)
addresses. To that end, further extend igmpv3_is_in_get() and
mldv2_is_in_get() to allow a list of IP addresses instead of one
address.

Signed-off-by: Petr Machata 
Acked-by: Nikolay Aleksandrov 
---
 tools/testing/selftests/net/forwarding/lib.sh | 22 +--
 1 file changed, 15 insertions(+), 7 deletions(-)

diff --git a/tools/testing/selftests/net/forwarding/lib.sh 
b/tools/testing/selftests/net/forwarding/lib.sh
index 190e49e60508..12ef34ebcbbf 100755
--- a/tools/testing/selftests/net/forwarding/lib.sh
+++ b/tools/testing/selftests/net/forwarding/lib.sh
@@ -1789,11 +1789,12 @@ payload_template_nbytes()
 igmpv3_is_in_get()
 {
local GRP=$1; shift
-   local IP=$1; shift
+   local sources=("$@")
 
local igmpv3
+   local nsources=$(u16_to_bytes ${#sources[@]})
 
-   # IS_IN ( $IP )
+   # IS_IN ( $sources )
igmpv3=$(:
)"22:"$(: Type - Membership Report
)"00:"$(: Reserved
@@ -1802,9 +1803,12 @@ igmpv3_is_in_get()
)"00:01:"$( : Number of Group Records
)"01:"$(: Record Type - IS_IN
)"00:"$(: Aux Data Len
-   )"00:01:"$( : Number of Sources
+   )"${nsources}:"$(   : Number of Sources
)"$(ipv4_to_bytes $GRP):"$( : Multicast Address
-   )"$(ipv4_to_bytes $IP)"$(   : Source Address
+   )"$(for src in "${sources[@]}"; do
+   ipv4_to_bytes $src
+   echo -n :
+   done)"$(: Source Addresses
)
local checksum=$(payload_template_calc_checksum "$igmpv3")
 
@@ -1815,10 +1819,11 @@ mldv2_is_in_get()
 {
local SIP=$1; shift
local GRP=$1; shift
-   local IP=$1; shift
+   local sources=("$@")
 
local hbh
local icmpv6
+   local nsources=$(u16_to_bytes ${#sources[@]})
 
hbh=$(:
)"3a:"$(: Next Header - ICMPv6
@@ -1834,9 +1839,12 @@ mldv2_is_in_get()
)"00:01:"$( : Number of Group Records
)"01:"$(: Record Type - IS_IN
)"00:"$(: Aux Data Len
-   )"00:01:"$( : Number of Sources
+   )"${nsources}:"$(   : Number of Sources
)"$(ipv6_to_bytes $GRP):"$( : Multicast address
-   )"$(ipv6_to_bytes $IP):"$(  : Source Address
+   )"$(for src in "${sources[@]}"; do
+   ipv6_to_bytes $src
+   echo -n :
+   done)"$(: Source Addresses
)
 
local len=$(u16_to_bytes $(payload_template_nbytes $icmpv6))
-- 
2.39.0



[Bridge] [PATCH net-next v3 09/16] selftests: forwarding: Move IGMP- and MLD-related functions to lib

2023-02-02 Thread Petr Machata
These functions will be helpful for other testsuites as well. Extract them
to a common place.

Signed-off-by: Petr Machata 
Reviewed-by: Ido Schimmel 
Acked-by: Nikolay Aleksandrov 
---
 .../selftests/net/forwarding/bridge_mdb.sh| 49 ---
 tools/testing/selftests/net/forwarding/lib.sh | 49 +++
 2 files changed, 49 insertions(+), 49 deletions(-)

diff --git a/tools/testing/selftests/net/forwarding/bridge_mdb.sh 
b/tools/testing/selftests/net/forwarding/bridge_mdb.sh
index 2fa5973c0c28..51f2b0d77067 100755
--- a/tools/testing/selftests/net/forwarding/bridge_mdb.sh
+++ b/tools/testing/selftests/net/forwarding/bridge_mdb.sh
@@ -1018,26 +1018,6 @@ fwd_test()
ip -6 address del fe80::1/64 dev br0
 }
 
-igmpv3_is_in_get()
-{
-   local igmpv3
-
-   igmpv3=$(:
-   )"22:"$(: Type - Membership Report
-   )"00:"$(: Reserved
-   )"2a:f8:"$( : Checksum
-   )"00:00:"$( : Reserved
-   )"00:01:"$( : Number of Group Records
-   )"01:"$(: Record Type - IS_IN
-   )"00:"$(: Aux Data Len
-   )"00:01:"$( : Number of Sources
-   )"ef:01:01:01:"$(   : Multicast Address - 239.1.1.1
-   )"c0:00:02:02"$(: Source Address - 192.0.2.2
-   )
-
-   echo $igmpv3
-}
-
 ctrl_igmpv3_is_in_test()
 {
RET=0
@@ -1077,35 +1057,6 @@ ctrl_igmpv3_is_in_test()
log_test "IGMPv3 MODE_IS_INCLUE tests"
 }
 
-mldv2_is_in_get()
-{
-   local hbh
-   local icmpv6
-
-   hbh=$(:
-   )"3a:"$(: Next Header - ICMPv6
-   )"00:"$(: Hdr Ext Len
-   )"00:00:00:00:00:00:"$( : Options and Padding
-   )
-
-   icmpv6=$(:
-   )"8f:"$(: Type - MLDv2 Report
-   )"00:"$(: Code
-   )"45:39:"$( : Checksum
-   )"00:00:"$( : Reserved
-   )"00:01:"$( : Number of Group Records
-   )"01:"$(: Record Type - IS_IN
-   )"00:"$(: Aux Data Len
-   )"00:01:"$( : Number of Sources
-   )"ff:0e:00:00:00:00:00:00:"$(   : Multicast address - ff0e::1
-   )"00:00:00:00:00:00:00:01:"$(   :
-   )"20:01:0d:b8:00:01:00:00:"$(   : Source Address - 2001:db8:1::2
-   )"00:00:00:00:00:00:00:02:"$(   :
-   )
-
-   echo ${hbh}${icmpv6}
-}
-
 ctrl_mldv2_is_in_test()
 {
RET=0
diff --git a/tools/testing/selftests/net/forwarding/lib.sh 
b/tools/testing/selftests/net/forwarding/lib.sh
index ded967d204d3..0cfa0b699803 100755
--- a/tools/testing/selftests/net/forwarding/lib.sh
+++ b/tools/testing/selftests/net/forwarding/lib.sh
@@ -1692,3 +1692,52 @@ hw_stats_monitor_test()
 
log_test "${type}_stats notifications"
 }
+
+igmpv3_is_in_get()
+{
+   local igmpv3
+
+   igmpv3=$(:
+   )"22:"$(: Type - Membership Report
+   )"00:"$(: Reserved
+   )"2a:f8:"$( : Checksum
+   )"00:00:"$( : Reserved
+   )"00:01:"$( : Number of Group Records
+   )"01:"$(: Record Type - IS_IN
+   )"00:"$(: Aux Data Len
+   )"00:01:"$( : Number of Sources
+   )"ef:01:01:01:"$(   : Multicast Address - 239.1.1.1
+   )"c0:00:02:02"$(: Source Address - 192.0.2.2
+   )
+
+   echo $igmpv3
+}
+
+mldv2_is_in_get()
+{
+   local hbh
+   local icmpv6
+
+   hbh=$(:
+   )"3a:"$(: Next Header - ICMPv6
+   )"00:"$(: Hdr Ext Len
+   )"00:00:00:00:00:00:"$( : Options and Padding
+   )
+
+   icmpv6=$(:
+   )"8f:"$(: Type - MLDv2 Report
+   )"00:"$(: Code
+   )"45:39:"$( : Checksum
+   )"00:00:"$( : Reserved
+   )"00:01:"$( : Number of Group Records
+   )"01:"$(: Record Type - IS_IN
+   )"00:"$(: Aux Data Len
+   )"00:01:"$( : Number of Sources
+   )"ff:0e:00:00:00:00:00:00:"$(   : Multicast address - ff0e::1
+   )"00:00:00:00:00:00:00:01:"$(   :
+

[Bridge] [PATCH net-next v3 12/16] selftests: forwarding: lib: Add helpers for checksum handling

2023-02-02 Thread Petr Machata
In order to generate IGMPv3 and MLDv2 packets on the fly, we will need
helpers to calculate the packet checksum.

The approach presented in this patch revolves around payload templates
for mausezahn. These are mausezahn-like payload strings (01:23:45:...)
with possibly one 2-byte sequence replaced with the word PAYLOAD. The
main function is payload_template_calc_checksum(), which calculates
RFC 1071 checksum of the message. There are further helpers to then
convert the checksum to the payload format, and to expand it.

For IPv6, MLDv2 message checksum is computed using a pseudoheader that
differs from the header used in the payload itself. The fact that the
two messages are different means that the checksum needs to be
returned as a separate quantity, instead of being expanded in-place in
the payload itself. Furthermore, the pseudoheader includes a length of
the message. Much like the checksum, this needs to be expanded in
mausezahn format. And likewise for number of addresses for (S,G)
entries. Thus we have several places where a computed quantity needs
to be presented in the payload format. Add a helper u16_to_bytes(),
which will be used in all these cases.

Signed-off-by: Petr Machata 
Acked-by: Nikolay Aleksandrov 
---

Notes:
v2:
- In the comment at payload_template_calc_checksum(),
  s/%#02x/%02x/, that's the mausezahn payload format.

 tools/testing/selftests/net/forwarding/lib.sh | 56 +++
 1 file changed, 56 insertions(+)

diff --git a/tools/testing/selftests/net/forwarding/lib.sh 
b/tools/testing/selftests/net/forwarding/lib.sh
index 409ff3799b55..b10c903d9abd 100755
--- a/tools/testing/selftests/net/forwarding/lib.sh
+++ b/tools/testing/selftests/net/forwarding/lib.sh
@@ -1730,6 +1730,62 @@ ipv6_to_bytes()
expand_ipv6 "$IP" :
 }
 
+u16_to_bytes()
+{
+   local u16=$1; shift
+
+   printf "%04x" $u16 | sed 's/^/000/;s/^.*\(..\)\(..\)$/\1:\2/'
+}
+
+# Given a mausezahn-formatted payload (colon-separated bytes given as %02x),
+# possibly with a keyword CHECKSUM stashed where a 16-bit checksum should be,
+# calculate checksum as per RFC 1071, assuming the CHECKSUM field (if any)
+# stands for 00:00.
+payload_template_calc_checksum()
+{
+   local payload=$1; shift
+
+   (
+   # Set input radix.
+   echo "16i"
+   # Push zero for the initial checksum.
+   echo 0
+
+   # Pad the payload with a terminating 00: in case we get an odd
+   # number of bytes.
+   echo "${payload%:}:00:" |
+   sed 's/CHECKSUM/00:00/g' |
+   tr '[:lower:]' '[:upper:]' |
+   # Add the word to the checksum.
+   sed 's/\(..\):\(..\):/\1\2+\n/g' |
+   # Strip the extra odd byte we pushed if left unconverted.
+   sed 's/\(..\):$//'
+
+   echo "1 ~ +"# Calculate and add carry.
+   echo " r - p"   # Bit-flip and print.
+   ) |
+   dc |
+   tr '[:upper:]' '[:lower:]'
+}
+
+payload_template_expand_checksum()
+{
+   local payload=$1; shift
+   local checksum=$1; shift
+
+   local ckbytes=$(u16_to_bytes $checksum)
+
+   echo "$payload" | sed "s/CHECKSUM/$ckbytes/g"
+}
+
+payload_template_nbytes()
+{
+   local payload=$1; shift
+
+   payload_template_expand_checksum "${payload%:}" 0 |
+   sed 's/:/\n/g' | wc -l
+}
+
 igmpv3_is_in_get()
 {
local igmpv3
-- 
2.39.0



[Bridge] [PATCH net-next v3 11/16] selftests: forwarding: lib: Add helpers for IP address handling

2023-02-02 Thread Petr Machata
In order to generate IGMPv3 and MLDv2 packets on the fly, we will need
helpers to expand IPv4 and IPv6 addresses given as parameters in
mausezahn payload notation. Add helpers that do it.

Signed-off-by: Petr Machata 
Acked-by: Nikolay Aleksandrov 
---
 tools/testing/selftests/net/forwarding/lib.sh | 37 +++
 1 file changed, 37 insertions(+)

diff --git a/tools/testing/selftests/net/forwarding/lib.sh 
b/tools/testing/selftests/net/forwarding/lib.sh
index 0cfa0b699803..409ff3799b55 100755
--- a/tools/testing/selftests/net/forwarding/lib.sh
+++ b/tools/testing/selftests/net/forwarding/lib.sh
@@ -1693,6 +1693,43 @@ hw_stats_monitor_test()
log_test "${type}_stats notifications"
 }
 
+ipv4_to_bytes()
+{
+   local IP=$1; shift
+
+   printf '%02x:' ${IP//./ } |
+   sed 's/:$//'
+}
+
+# Convert a given IPv6 address, `IP' such that the :: token, if present, is
+# expanded, and each 16-bit group is padded with zeroes to be 4 hexadecimal
+# digits. An optional `BYTESEP' parameter can be given to further separate
+# individual bytes of each 16-bit group.
+expand_ipv6()
+{
+   local IP=$1; shift
+   local bytesep=$1; shift
+
+   local cvt_ip=${IP/::/_}
+   local colons=${cvt_ip//[^:]/}
+   local allcol=:::
+   # IP where :: -> the appropriate number of colons:
+   local allcol_ip=${cvt_ip/_/${allcol:${#colons}}}
+
+   echo $allcol_ip | tr : '\n' |
+   sed s/^// |
+   sed 's/.*\(..\)\(..\)/\1'"$bytesep"'\2/' |
+   tr '\n' : |
+   sed 's/:$//'
+}
+
+ipv6_to_bytes()
+{
+   local IP=$1; shift
+
+   expand_ipv6 "$IP" :
+}
+
 igmpv3_is_in_get()
 {
local igmpv3
-- 
2.39.0



[Bridge] [PATCH net-next v3 08/16] net: bridge: Add netlink knobs for number / maximum MDB entries

2023-02-02 Thread Petr Machata via Bridge
The previous patch added accounting for number of MDB entries per port and
per port-VLAN, and the logic to verify that these values stay within
configured bounds. However it didn't provide means to actually configure
those bounds or read the occupancy. This patch does that.

Two new netlink attributes are added for the MDB occupancy:
IFLA_BRPORT_MCAST_N_GROUPS for the per-port occupancy and
BRIDGE_VLANDB_ENTRY_MCAST_N_GROUPS for the per-port-VLAN occupancy.
And another two for the maximum number of MDB entries:
IFLA_BRPORT_MCAST_MAX_GROUPS for the per-port maximum, and
BRIDGE_VLANDB_ENTRY_MCAST_MAX_GROUPS for the per-port-VLAN one.

Note that the two new IFLA_BRPORT_ attributes prompt bumping of
RTNL_SLAVE_MAX_TYPE to size the slave attribute tables large enough.

The new attributes are used like this:

 # ip link add name br up type bridge vlan_filtering 1 mcast_snooping 1 \
  mcast_vlan_snooping 1 mcast_querier 1
 # ip link set dev v1 master br
 # bridge vlan add dev v1 vid 2

 # bridge vlan set dev v1 vid 1 mcast_max_groups 1
 # bridge mdb add dev br port v1 grp 230.1.2.3 temp vid 1
 # bridge mdb add dev br port v1 grp 230.1.2.4 temp vid 1
 Error: bridge: Port-VLAN is already in 1 groups, and mcast_max_groups=1.

 # bridge link set dev v1 mcast_max_groups 1
 # bridge mdb add dev br port v1 grp 230.1.2.3 temp vid 2
 Error: bridge: Port is already in 1 groups, and mcast_max_groups=1.

 # bridge -d link show
 5: v1@v2:  mtu 1500 master br [...]
 [...] mcast_n_groups 1 mcast_max_groups 1

 # bridge -d vlan show
 port  vlan-id
 br1 PVID Egress Untagged
 state forwarding mcast_router 1
 v11 PVID Egress Untagged
 [...] mcast_n_groups 1 mcast_max_groups 1
   2
 [...] mcast_n_groups 0 mcast_max_groups 0

Signed-off-by: Petr Machata 
---

Notes:
v3:
- Move the br_multicast_port_ctx_vlan_disabled() check
  out to the _vlan_ helpers callers. Thus these helpers
  cannot fail, which makes them very similar to the
  _port_ helpers. Have them take the MC context directly
  and unify them.

v2:
- Drop locks around accesses in
  br_multicast_{port,vlan}_ngroups_{get,set_max}(),
- Drop bounces due to maxmulticast_ctx);
 }
 
+u32 br_multicast_ngroups_get(const struct net_bridge_mcast_port *pmctx)
+{
+   return READ_ONCE(pmctx->mdb_n_entries);
+}
+
+void br_multicast_ngroups_set_max(struct net_bridge_mcast_port *pmctx, u32 max)
+{
+   WRITE_ONCE(pmctx->mdb_max_entries, max);
+}
+
+u32 br_multicast_ngroups_get_max(const struct net_bridge_mcast_port *pmctx)
+{
+   return READ_ONCE(pmctx->mdb_max_entries);
+}
+
 static void br_multicast_destroy_port_group(struct net_bridge_mcast_gc *gc)
 {
struct net_bridge_port_group *pg;
diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c
index a6133d469885..9173e52b89e2 100644
--- a/net/bridge/br_netlink.c
+++ b/net/bridge/br_netlink.c
@@ -202,6 +202,8 @@ static inline size_t br_port_info_size(void)
+ nla_total_size_64bit(sizeof(u64)) /* IFLA_BRPORT_HOLD_TIMER */
 #ifdef CONFIG_BRIDGE_IGMP_SNOOPING
+ nla_total_size(sizeof(u8))/* IFLA_BRPORT_MULTICAST_ROUTER 
*/
+   + nla_total_size(sizeof(u32))   /* IFLA_BRPORT_MCAST_N_GROUPS */
+   + nla_total_size(sizeof(u32))   /* IFLA_BRPORT_MCAST_MAX_GROUPS 
*/
 #endif
+ nla_total_size(sizeof(u16))   /* IFLA_BRPORT_GROUP_FWD_MASK */
+ nla_total_size(sizeof(u8))/* IFLA_BRPORT_MRP_RING_OPEN */
@@ -298,7 +300,11 @@ static int br_port_fill_attrs(struct sk_buff *skb,
nla_put_u32(skb, IFLA_BRPORT_MCAST_EHT_HOSTS_LIMIT,
p->multicast_eht_hosts_limit) ||
nla_put_u32(skb, IFLA_BRPORT_MCAST_EHT_HOSTS_CNT,
-   p->multicast_eht_hosts_cnt))
+   p->multicast_eht_hosts_cnt) ||
+   nla_put_u32(skb, IFLA_BRPORT_MCAST_N_GROUPS,
+   br_multicast_ngroups_get(>multicast_ctx)) ||
+   nla_put_u32(skb, IFLA_BRPORT_MCAST_MAX_GROUPS,
+   br_multicast_ngroups_get_max(>multicast_ctx)))
return -EMSGSIZE;
 #endif
 
@@ -883,6 +889,8 @@ static const struct nla_policy 
br_port_policy[IFLA_BRPORT_MAX + 1] = {
[IFLA_BRPORT_MAB] = { .type = NLA_U8 },
[IFLA_BRPORT_BACKUP_PORT] = { .type = NLA_U32 },
[IFLA_BRPORT_MCAST_EHT_HOSTS_LIMIT] = { .type = NLA_U32 },
+   [IFLA_BRPORT_MCAST_N_GROUPS] = { .type = NLA_REJECT },
+   [IFLA_BRPORT_MCAST_MAX_GROUPS] = { .type = NLA_U32 },
 };
 
 /* Change the state of the port and notify spanning tree */
@@ -1017,6 +1025,13 @@ static int br_setport(struct net_bridge_port *p, struct 
nlattr *tb[],
if (err)
return err;
}
+
+   if (tb[IFLA_BRPORT_MCAST_MAX_GROUPS]) {
+   u32 

[Bridge] [PATCH net-next v3 06/16] net: bridge: Add a tracepoint for MDB overflows

2023-02-02 Thread Petr Machata via Bridge
The following patch will add two more maximum MDB allowances to the global
one, mcast_hash_max, that exists today. In all these cases, attempts to add
MDB entries above the configured maximums through netlink, fail noisily and
obviously. Such visibility is missing when adding entries through the
control plane traffic, by IGMP or MLD packets.

To improve visibility in those cases, add a trace point that reports the
violation, including the relevant netdevice (be it a slave or the bridge
itself), and the MDB entry parameters:

# perf record -e bridge:br_mdb_full &
# [...]
# perf script | cut -d: -f4-
 dev v2 af 2 src :::0.0.0.0 grp 
:::239.1.1.112/00:00:00:00:00:00 vid 0
 dev v2 af 10 src :: grp ff0e::112/00:00:00:00:00:00 vid 0
 dev v2 af 2 src :::0.0.0.0 grp 
:::239.1.1.112/00:00:00:00:00:00 vid 10
 dev v2 af 10 src 2001:db8:1::1 grp ff0e::1/00:00:00:00:00:00 vid 10
 dev v2 af 2 src :::192.0.2.1 grp 
:::239.1.1.1/00:00:00:00:00:00 vid 10

CC: Steven Rostedt 
CC: linux-trace-ker...@vger.kernel.org
Signed-off-by: Petr Machata 
Reviewed-by: Steven Rostedt (Google) 
---

Notes:
v2:
- Report IPv4 as an IPv6-mapped address through the IPv6 buffer
  as well, to save ring buffer space.

 include/trace/events/bridge.h | 58 +++
 net/core/net-traces.c |  1 +
 2 files changed, 59 insertions(+)

diff --git a/include/trace/events/bridge.h b/include/trace/events/bridge.h
index 6b200059c2c5..a6b3a4e409f0 100644
--- a/include/trace/events/bridge.h
+++ b/include/trace/events/bridge.h
@@ -122,6 +122,64 @@ TRACE_EVENT(br_fdb_update,
  __entry->flags)
 );
 
+TRACE_EVENT(br_mdb_full,
+
+   TP_PROTO(const struct net_device *dev,
+const struct br_ip *group),
+
+   TP_ARGS(dev, group),
+
+   TP_STRUCT__entry(
+   __string(dev, dev->name)
+   __field(int, af)
+   __field(u16, vid)
+   __array(__u8, src, 16)
+   __array(__u8, grp, 16)
+   __array(__u8, grpmac, ETH_ALEN) /* For af == 0. */
+   ),
+
+   TP_fast_assign(
+   struct in6_addr *in6;
+
+   __assign_str(dev, dev->name);
+   __entry->vid = group->vid;
+
+   if (!group->proto) {
+   __entry->af = 0;
+
+   memset(__entry->src, 0, sizeof(__entry->src));
+   memset(__entry->grp, 0, sizeof(__entry->grp));
+   memcpy(__entry->grpmac, group->dst.mac_addr, ETH_ALEN);
+   } else if (group->proto == htons(ETH_P_IP)) {
+   __entry->af = AF_INET;
+
+   in6 = (struct in6_addr *)__entry->src;
+   ipv6_addr_set_v4mapped(group->src.ip4, in6);
+
+   in6 = (struct in6_addr *)__entry->grp;
+   ipv6_addr_set_v4mapped(group->dst.ip4, in6);
+
+   memset(__entry->grpmac, 0, ETH_ALEN);
+
+#if IS_ENABLED(CONFIG_IPV6)
+   } else {
+   __entry->af = AF_INET6;
+
+   in6 = (struct in6_addr *)__entry->src;
+   *in6 = group->src.ip6;
+
+   in6 = (struct in6_addr *)__entry->grp;
+   *in6 = group->dst.ip6;
+
+   memset(__entry->grpmac, 0, ETH_ALEN);
+#endif
+   }
+   ),
+
+   TP_printk("dev %s af %u src %pI6c grp %pI6c/%pM vid %u",
+ __get_str(dev), __entry->af, __entry->src, __entry->grp,
+ __entry->grpmac, __entry->vid)
+);
 
 #endif /* _TRACE_BRIDGE_H */
 
diff --git a/net/core/net-traces.c b/net/core/net-traces.c
index c40cd8dd75c7..c6820ad2183f 100644
--- a/net/core/net-traces.c
+++ b/net/core/net-traces.c
@@ -41,6 +41,7 @@ EXPORT_TRACEPOINT_SYMBOL_GPL(br_fdb_add);
 EXPORT_TRACEPOINT_SYMBOL_GPL(br_fdb_external_learn_add);
 EXPORT_TRACEPOINT_SYMBOL_GPL(fdb_delete);
 EXPORT_TRACEPOINT_SYMBOL_GPL(br_fdb_update);
+EXPORT_TRACEPOINT_SYMBOL_GPL(br_mdb_full);
 #endif
 
 #if IS_ENABLED(CONFIG_PAGE_POOL)
-- 
2.39.0



[Bridge] [PATCH net-next v3 07/16] net: bridge: Maintain number of MDB entries in net_bridge_mcast_port

2023-02-02 Thread Petr Machata
The MDB maintained by the bridge is limited. When the bridge is configured
for IGMP / MLD snooping, a buggy or malicious client can easily exhaust its
capacity. In SW datapath, the capacity is configurable through the
IFLA_BR_MCAST_HASH_MAX parameter, but ultimately is finite. Obviously a
similar limit exists in the HW datapath for purposes of offloading.

In order to prevent the issue of unilateral exhaustion of MDB resources,
introduce two parameters in each of two contexts:

- Per-port and per-port-VLAN number of MDB entries that the port
  is member in.

- Per-port and (when BROPT_MCAST_VLAN_SNOOPING_ENABLED is enabled)
  per-port-VLAN maximum permitted number of MDB entries, or 0 for
  no limit.

The per-port multicast context is used for tracking of MDB entries for the
port as a whole. This is available for all bridges.

The per-port-VLAN multicast context is then only available on
VLAN-filtering bridges on VLANs that have multicast snooping on.

With these changes in place, it will be possible to configure MDB limit for
bridge as a whole, or any one port as a whole, or any single port-VLAN.

Note that unlike the global limit, exhaustion of the per-port and
per-port-VLAN maximums does not cause disablement of multicast snooping.
It is also permitted to configure the local limit larger than hash_max,
even though that is not useful.

In this patch, introduce only the accounting for number of entries, and the
max field itself, but not the means to toggle the max. The next patch
introduces the netlink APIs to toggle and read the values.

Signed-off-by: Petr Machata 
---

Notes:
v3:
- Access mdb_max_/_n_entries through READ_/WRITE_ONCE
- Move extack setting to br_multicast_port_ngroups_inc_one().
  Since we use NL_SET_ERR_MSG_FMT_MOD, the correct context
  (port / port-vlan) can be passed through an argument.
  This also removes the need for more READ/WRITE_ONCE's
  at the extack-setting site.

v2:
- In br_multicast_port_ngroups_inc_one(), bounce
  if n>=max, not if n==max
- Adjust extack messages to mention ngroups, now that
  the bounces appear when n>=max, not n==max
- In __br_multicast_enable_port_ctx(), do not reset
  max to 0. Also do not count number of entries by
  going through _inc, as that would end up incorrectly
  bouncing the entries.

 net/bridge/br_multicast.c | 136 +-
 net/bridge/br_private.h   |   2 +
 2 files changed, 137 insertions(+), 1 deletion(-)

diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c
index 51b622afdb67..b6aa0bad5817 100644
--- a/net/bridge/br_multicast.c
+++ b/net/bridge/br_multicast.c
@@ -31,6 +31,7 @@
 #include 
 #include 
 #endif
+#include 
 
 #include "br_private.h"
 #include "br_private_mcast_eht.h"
@@ -234,6 +235,29 @@ br_multicast_pg_to_port_ctx(const struct 
net_bridge_port_group *pg)
return pmctx;
 }
 
+static struct net_bridge_mcast_port *
+br_multicast_port_vid_to_port_ctx(struct net_bridge_port *port, u16 vid)
+{
+   struct net_bridge_mcast_port *pmctx = NULL;
+   struct net_bridge_vlan *vlan;
+
+   lockdep_assert_held_once(>br->multicast_lock);
+
+   if (!br_opt_get(port->br, BROPT_MCAST_VLAN_SNOOPING_ENABLED))
+   return NULL;
+
+   /* Take RCU to access the vlan. */
+   rcu_read_lock();
+
+   vlan = br_vlan_find(nbp_vlan_group_rcu(port), vid);
+   if (vlan && !br_multicast_port_ctx_vlan_disabled(>port_mcast_ctx))
+   pmctx = >port_mcast_ctx;
+
+   rcu_read_unlock();
+
+   return pmctx;
+}
+
 /* when snooping we need to check if the contexts should be used
  * in the following order:
  * - if pmctx is non-NULL (port), check if it should be used
@@ -668,6 +692,86 @@ void br_multicast_del_group_src(struct 
net_bridge_group_src *src,
__br_multicast_del_group_src(src);
 }
 
+static int
+br_multicast_port_ngroups_inc_one(struct net_bridge_mcast_port *pmctx,
+ struct netlink_ext_ack *extack,
+ const char *what)
+{
+   u32 max = READ_ONCE(pmctx->mdb_max_entries);
+   u32 n = READ_ONCE(pmctx->mdb_n_entries);
+
+   if (max && n >= max) {
+   NL_SET_ERR_MSG_FMT_MOD(extack, "%s is already in %u groups, and 
mcast_max_groups=%u",
+  what, n, max);
+   return -E2BIG;
+   }
+
+   WRITE_ONCE(pmctx->mdb_n_entries, n + 1);
+   return 0;
+}
+
+static void br_multicast_port_ngroups_dec_one(struct net_bridge_mcast_port 
*pmctx)
+{
+   u32 n = READ_ONCE(pmctx->mdb_n_entries);
+
+   WARN_ON_ONCE(n == 0);
+   WRITE_ONCE(pmctx->mdb_n_entries, n - 1);
+}
+
+static int br_multicast_port_ngroups_inc(struct net_bridge_port *port,
+const struct br_ip *group,
+struct netlink_ext_ack *extack)
+{
+   struct net_bridge_mcast_port *pmctx;
+   int 

[Bridge] [PATCH net-next v3 04/16] net: bridge: Add br_multicast_del_port_group()

2023-02-02 Thread Petr Machata via Bridge
Since cleaning up the effects of br_multicast_new_port_group() just
consists of delisting and freeing the memory, the function
br_mdb_add_group_star_g() inlines the corresponding code. In the following
patches, number of per-port and per-port-VLAN MDB entries is going to be
maintained, and that counter will have to be updated. Because that logic
is going to be hidden in the br_multicast module, introduce a new hook
intended to again remove a newly-created group.

Signed-off-by: Petr Machata 
Reviewed-by: Ido Schimmel 
Acked-by: Nikolay Aleksandrov 
---
 net/bridge/br_mdb.c   |  3 +--
 net/bridge/br_multicast.c | 11 +++
 net/bridge/br_private.h   |  1 +
 3 files changed, 13 insertions(+), 2 deletions(-)

diff --git a/net/bridge/br_mdb.c b/net/bridge/br_mdb.c
index 139de8ac532c..9f22ebfdc518 100644
--- a/net/bridge/br_mdb.c
+++ b/net/bridge/br_mdb.c
@@ -1099,8 +1099,7 @@ static int br_mdb_add_group_star_g(const struct 
br_mdb_config *cfg,
return 0;
 
 err_del_port_group:
-   hlist_del_init(>mglist);
-   kfree(p);
+   br_multicast_del_port_group(p);
return err;
 }
 
diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c
index f9f4d54226fd..08da724ebfdd 100644
--- a/net/bridge/br_multicast.c
+++ b/net/bridge/br_multicast.c
@@ -1326,6 +1326,17 @@ struct net_bridge_port_group 
*br_multicast_new_port_group(
return p;
 }
 
+void br_multicast_del_port_group(struct net_bridge_port_group *p)
+{
+   struct net_bridge_port *port = p->key.port;
+
+   hlist_del_init(>mglist);
+   if (!br_multicast_is_star_g(>key.addr))
+   rhashtable_remove_fast(>br->sg_port_tbl, >rhnode,
+  br_sg_port_rht_params);
+   kfree(p);
+}
+
 void br_multicast_host_join(const struct net_bridge_mcast *brmctx,
struct net_bridge_mdb_entry *mp, bool notify)
 {
diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h
index 1805c468ae03..e4069e27b5c6 100644
--- a/net/bridge/br_private.h
+++ b/net/bridge/br_private.h
@@ -958,6 +958,7 @@ br_multicast_new_port_group(struct net_bridge_port *port,
unsigned char flags, const unsigned char *src,
u8 filter_mode, u8 rt_protocol,
struct netlink_ext_ack *extack);
+void br_multicast_del_port_group(struct net_bridge_port_group *p);
 int br_mdb_hash_init(struct net_bridge *br);
 void br_mdb_hash_fini(struct net_bridge *br);
 void br_mdb_notify(struct net_device *dev, struct net_bridge_mdb_entry *mp,
-- 
2.39.0



[Bridge] [PATCH net-next v3 02/16] net: bridge: Add extack to br_multicast_new_port_group()

2023-02-02 Thread Petr Machata via Bridge
Make it possible to set an extack in br_multicast_new_port_group().
Eventually, this function will check for per-port and per-port-vlan
MDB maximums, and will use the extack to communicate the reason for
the bounce.

Signed-off-by: Petr Machata 
Reviewed-by: Ido Schimmel 
Acked-by: Nikolay Aleksandrov 
---
 net/bridge/br_mdb.c   | 5 +++--
 net/bridge/br_multicast.c | 5 +++--
 net/bridge/br_private.h   | 3 ++-
 3 files changed, 8 insertions(+), 5 deletions(-)

diff --git a/net/bridge/br_mdb.c b/net/bridge/br_mdb.c
index 00e5743647b0..069061366541 100644
--- a/net/bridge/br_mdb.c
+++ b/net/bridge/br_mdb.c
@@ -849,7 +849,7 @@ static int br_mdb_add_group_sg(const struct br_mdb_config 
*cfg,
}
 
p = br_multicast_new_port_group(cfg->p, >group, *pp, flags, NULL,
-   MCAST_INCLUDE, cfg->rt_protocol);
+   MCAST_INCLUDE, cfg->rt_protocol, 
extack);
if (unlikely(!p)) {
NL_SET_ERR_MSG_MOD(extack, "Couldn't allocate new (S, G) port 
group");
return -ENOMEM;
@@ -1075,7 +1075,8 @@ static int br_mdb_add_group_star_g(const struct 
br_mdb_config *cfg,
}
 
p = br_multicast_new_port_group(cfg->p, >group, *pp, flags, NULL,
-   cfg->filter_mode, cfg->rt_protocol);
+   cfg->filter_mode, cfg->rt_protocol,
+   extack);
if (unlikely(!p)) {
NL_SET_ERR_MSG_MOD(extack, "Couldn't allocate new (*, G) port 
group");
return -ENOMEM;
diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c
index dea1ee1bd095..de67d176838f 100644
--- a/net/bridge/br_multicast.c
+++ b/net/bridge/br_multicast.c
@@ -1284,7 +1284,8 @@ struct net_bridge_port_group *br_multicast_new_port_group(
unsigned char flags,
const unsigned char *src,
u8 filter_mode,
-   u8 rt_protocol)
+   u8 rt_protocol,
+   struct netlink_ext_ack *extack)
 {
struct net_bridge_port_group *p;
 
@@ -1387,7 +1388,7 @@ __br_multicast_add_group(struct net_bridge_mcast *brmctx,
}
 
p = br_multicast_new_port_group(pmctx->port, group, *pp, 0, src,
-   filter_mode, RTPROT_KERNEL);
+   filter_mode, RTPROT_KERNEL, NULL);
if (unlikely(!p)) {
p = ERR_PTR(-ENOMEM);
goto out;
diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h
index 15ef7fd508ee..1805c468ae03 100644
--- a/net/bridge/br_private.h
+++ b/net/bridge/br_private.h
@@ -956,7 +956,8 @@ br_multicast_new_port_group(struct net_bridge_port *port,
const struct br_ip *group,
struct net_bridge_port_group __rcu *next,
unsigned char flags, const unsigned char *src,
-   u8 filter_mode, u8 rt_protocol);
+   u8 filter_mode, u8 rt_protocol,
+   struct netlink_ext_ack *extack);
 int br_mdb_hash_init(struct net_bridge *br);
 void br_mdb_hash_fini(struct net_bridge *br);
 void br_mdb_notify(struct net_device *dev, struct net_bridge_mdb_entry *mp,
-- 
2.39.0



[Bridge] [PATCH net-next v3 05/16] net: bridge: Change a cleanup in br_multicast_new_port_group() to goto

2023-02-02 Thread Petr Machata
This function is getting more to clean up in the following patches.
Structuring the cleanups in one labeled block will allow reusing the same
cleanup from several places.

Signed-off-by: Petr Machata 
Reviewed-by: Ido Schimmel 
Acked-by: Nikolay Aleksandrov 
---
 net/bridge/br_multicast.c | 7 +--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c
index 08da724ebfdd..51b622afdb67 100644
--- a/net/bridge/br_multicast.c
+++ b/net/bridge/br_multicast.c
@@ -1309,8 +1309,7 @@ struct net_bridge_port_group *br_multicast_new_port_group(
rhashtable_lookup_insert_fast(>br->sg_port_tbl, >rhnode,
  br_sg_port_rht_params)) {
NL_SET_ERR_MSG_MOD(extack, "Couldn't insert new port group");
-   kfree(p);
-   return NULL;
+   goto free_out;
}
 
rcu_assign_pointer(p->next, next);
@@ -1324,6 +1323,10 @@ struct net_bridge_port_group 
*br_multicast_new_port_group(
eth_broadcast_addr(p->eth_addr);
 
return p;
+
+free_out:
+   kfree(p);
+   return NULL;
 }
 
 void br_multicast_del_port_group(struct net_bridge_port_group *p)
-- 
2.39.0



[Bridge] [PATCH net-next v3 03/16] net: bridge: Move extack-setting to br_multicast_new_port_group()

2023-02-02 Thread Petr Machata via Bridge
Now that br_multicast_new_port_group() takes an extack argument, move
setting the extack there. The downside is that the error messages end
up being less specific (the function cannot distinguish between (S,G)
and (*,G) groups). However, the alternative is to check in the caller
whether the callee set the extack, and if it didn't, set it. But that
is only done when the callee is not exactly known. (E.g. in case of a
notifier invocation.)

Signed-off-by: Petr Machata 
Reviewed-by: Ido Schimmel 
Acked-by: Nikolay Aleksandrov 
---
 net/bridge/br_mdb.c   | 9 +++--
 net/bridge/br_multicast.c | 5 -
 2 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/net/bridge/br_mdb.c b/net/bridge/br_mdb.c
index 069061366541..139de8ac532c 100644
--- a/net/bridge/br_mdb.c
+++ b/net/bridge/br_mdb.c
@@ -850,10 +850,9 @@ static int br_mdb_add_group_sg(const struct br_mdb_config 
*cfg,
 
p = br_multicast_new_port_group(cfg->p, >group, *pp, flags, NULL,
MCAST_INCLUDE, cfg->rt_protocol, 
extack);
-   if (unlikely(!p)) {
-   NL_SET_ERR_MSG_MOD(extack, "Couldn't allocate new (S, G) port 
group");
+   if (unlikely(!p))
return -ENOMEM;
-   }
+
rcu_assign_pointer(*pp, p);
if (!(flags & MDB_PG_FLAGS_PERMANENT) && !cfg->src_entry)
mod_timer(>timer,
@@ -1077,10 +1076,8 @@ static int br_mdb_add_group_star_g(const struct 
br_mdb_config *cfg,
p = br_multicast_new_port_group(cfg->p, >group, *pp, flags, NULL,
cfg->filter_mode, cfg->rt_protocol,
extack);
-   if (unlikely(!p)) {
-   NL_SET_ERR_MSG_MOD(extack, "Couldn't allocate new (*, G) port 
group");
+   if (unlikely(!p))
return -ENOMEM;
-   }
 
err = br_mdb_add_group_srcs(cfg, p, brmctx, extack);
if (err)
diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c
index de67d176838f..f9f4d54226fd 100644
--- a/net/bridge/br_multicast.c
+++ b/net/bridge/br_multicast.c
@@ -1290,8 +1290,10 @@ struct net_bridge_port_group 
*br_multicast_new_port_group(
struct net_bridge_port_group *p;
 
p = kzalloc(sizeof(*p), GFP_ATOMIC);
-   if (unlikely(!p))
+   if (unlikely(!p)) {
+   NL_SET_ERR_MSG_MOD(extack, "Couldn't allocate new port group");
return NULL;
+   }
 
p->key.addr = *group;
p->key.port = port;
@@ -1306,6 +1308,7 @@ struct net_bridge_port_group *br_multicast_new_port_group(
if (!br_multicast_is_star_g(group) &&
rhashtable_lookup_insert_fast(>br->sg_port_tbl, >rhnode,
  br_sg_port_rht_params)) {
+   NL_SET_ERR_MSG_MOD(extack, "Couldn't insert new port group");
kfree(p);
return NULL;
}
-- 
2.39.0



[Bridge] [PATCH net-next v3 01/16] net: bridge: Set strict_start_type at two policies

2023-02-02 Thread Petr Machata via Bridge
Make any attributes newly-added to br_port_policy or vlan_tunnel_policy
parsed strictly, to prevent userspace from passing garbage. Note that this
patchset only touches the former policy. The latter was adjusted for
completeness' sake. There do not appear to be other _deprecated calls
with non-NULL policies.

Suggested-by: Ido Schimmel 
Signed-off-by: Petr Machata 
Reviewed-by: Ido Schimmel 
Acked-by: Nikolay Aleksandrov 
---
 net/bridge/br_netlink.c| 2 ++
 net/bridge/br_netlink_tunnel.c | 3 +++
 2 files changed, 5 insertions(+)

diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c
index 4316cc82ae17..a6133d469885 100644
--- a/net/bridge/br_netlink.c
+++ b/net/bridge/br_netlink.c
@@ -858,6 +858,8 @@ static int br_afspec(struct net_bridge *br,
 }
 
 static const struct nla_policy br_port_policy[IFLA_BRPORT_MAX + 1] = {
+   [IFLA_BRPORT_UNSPEC]= { .strict_start_type =
+   IFLA_BRPORT_MCAST_EHT_HOSTS_LIMIT + 1 },
[IFLA_BRPORT_STATE] = { .type = NLA_U8 },
[IFLA_BRPORT_COST]  = { .type = NLA_U32 },
[IFLA_BRPORT_PRIORITY]  = { .type = NLA_U16 },
diff --git a/net/bridge/br_netlink_tunnel.c b/net/bridge/br_netlink_tunnel.c
index 8914290c75d4..17abf092f7ca 100644
--- a/net/bridge/br_netlink_tunnel.c
+++ b/net/bridge/br_netlink_tunnel.c
@@ -188,6 +188,9 @@ int br_fill_vlan_tunnel_info(struct sk_buff *skb,
 }
 
 static const struct nla_policy vlan_tunnel_policy[IFLA_BRIDGE_VLAN_TUNNEL_MAX 
+ 1] = {
+   [IFLA_BRIDGE_VLAN_TUNNEL_UNSPEC] = {
+   .strict_start_type = IFLA_BRIDGE_VLAN_TUNNEL_FLAGS + 1
+   },
[IFLA_BRIDGE_VLAN_TUNNEL_ID] = { .type = NLA_U32 },
[IFLA_BRIDGE_VLAN_TUNNEL_VID] = { .type = NLA_U16 },
[IFLA_BRIDGE_VLAN_TUNNEL_FLAGS] = { .type = NLA_U16 },
-- 
2.39.0



[Bridge] [PATCH net-next v3 00/16] bridge: Limit number of MDB entries per port, port-vlan

2023-02-02 Thread Petr Machata via Bridge
The MDB maintained by the bridge is limited. When the bridge is configured
for IGMP / MLD snooping, a buggy or malicious client can easily exhaust its
capacity. In SW datapath, the capacity is configurable through the
IFLA_BR_MCAST_HASH_MAX parameter, but ultimately is finite. Obviously a
similar limit exists in the HW datapath for purposes of offloading.

In order to prevent the issue of unilateral exhaustion of MDB resources,
introduce two parameters in each of two contexts:

- Per-port and (when BROPT_MCAST_VLAN_SNOOPING_ENABLED is enabled)
  per-port-VLAN number of MDB entries that the port is member in.

- Per-port and (when BROPT_MCAST_VLAN_SNOOPING_ENABLED is enabled)
  per-port-VLAN maximum permitted number of MDB entries, or 0 for
  no limit.

Per-port number of entries keeps track of the total number of MDB entries
configured on a given port. The per-port-VLAN value then keeps track of the
subset of MDB entries configured specifically for the given VLAN, on that
port. The number is adjusted as port_groups are created and deleted, and
therefore under multicast lock.

A maximum value, if non-zero, then places a limit on the number of entries
that can be configured in a given context. Attempts to add entries above
the maximum are rejected.

Rejection reason of netlink-based requests to add MDB entries is
communicated through extack. This channel is unavailable for rejections
triggered from the control path. To address this lack of visibility, the
patchset adds a tracepoint, bridge:br_mdb_full:

# perf record -e bridge:br_mdb_full &
# [...]
# perf script | cut -d: -f4-
 dev v2 af 2 src :::0.0.0.0 grp 
:::239.1.1.112/00:00:00:00:00:00 vid 0
 dev v2 af 10 src :: grp ff0e::112/00:00:00:00:00:00 vid 0
 dev v2 af 2 src :::0.0.0.0 grp 
:::239.1.1.112/00:00:00:00:00:00 vid 10
 dev v2 af 10 src 2001:db8:1::1 grp ff0e::1/00:00:00:00:00:00 vid 10
 dev v2 af 2 src :::192.0.2.1 grp 
:::239.1.1.1/00:00:00:00:00:00 vid 10

Another option to consume the tracepoint is e.g. through the bpftrace tool:

# bpftrace -e ' tracepoint:bridge:br_mdb_full /args->af != 0/ {
printf("dev %s src %s grp %s vid %u\n",
   str(args->dev), ntop(args->src),
   ntop(args->grp), args->vid);
}
tracepoint:bridge:br_mdb_full /args->af == 0/ {
printf("dev %s grp %s vid %u\n",
   str(args->dev),
   macaddr(args->grpmac), args->vid);
}'

This tracepoint is triggered for mcast_hash_max exhaustions as well.

The following is an example of how the feature is used. A more extensive
example is available in patch #8:

# bridge vlan set dev v1 vid 1 mcast_max_groups 1
# bridge mdb add dev br port v1 grp 230.1.2.3 temp vid 1
# bridge mdb add dev br port v1 grp 230.1.2.4 temp vid 1
Error: bridge: Port-VLAN is already in 1 groups, and mcast_max_groups=1.

The patchset progresses as follows:

- In patch #1, set strict_start_type at two bridge-related policies. The
  reason is we are adding a new attribute to one of these, and want the new
  attribute to be parsed strictly. The other was adjusted for completeness'
  sake.

- In patches #2 to #5, br_mdb and br_multicast code is adjusted to make the
  following additions smoother.

- In patch #6, add the tracepoint.

- In patch #7, the code to maintain number of MDB entries is added as
  struct net_bridge_mcast_port::mdb_n_entries. The maximum is added, too,
  as struct net_bridge_mcast_port::mdb_max_entries, however at this point
  there is no way to set the value yet, and since 0 is treated as "no
  limit", the functionality doesn't change at this point. Note however,
  that mcast_hash_max violations already do trigger at this point.

- In patch #8, netlink plumbing is added: reading of number of entries, and
  reading and writing of maximum.

  The per-port values are passed through RTM_NEWLINK / RTM_GETLINK messages
  in IFLA_BRPORT_MCAST_N_GROUPS and _MAX_GROUPS, inside IFLA_PROTINFO nest.

  The per-port-vlan values are passed through RTM_GETVLAN / RTM_NEWVLAN
  messages in BRIDGE_VLANDB_ENTRY_MCAST_N_GROUPS, _MAX_GROUPS, inside
  BRIDGE_VLANDB_ENTRY.

The following patches deal with the selftest:

- Patches #9 and #10 clean up and move around some selftest code.

- Patches #11 to #14 add helpers and generalize the existing IGMP / MLD
  support to allow generating packets with configurable group addresses and
  varying source lists for (S,G) memberships.

- Patch #15 adds code to generate IGMP leave and MLD done packets.

- Patch #16 finally adds the selftest itself.

v3:
- Patch #7:
- Access mdb_max_/_n_entries through READ_/WRITE_ONCE
- Move extack setting to br_multicast_port_ngroups_inc_one().
  Since we use 

Re: [Bridge] [PATCH net-next 0/5] ATU and FDB synchronization on locked ports

2023-02-02 Thread netdev

On 2023-01-31 20:25, Ido Schimmel wrote:

command like:

bridge fdb replace ADDR dev  master dynamic

We choose only to support this feature on locked ports, as it involves
utilizing the CPU to handle ATU related switchcore events (typically
interrupts) and thus can result in significant performance loss if
exposed to heavy traffic.


Not sure I understand this reasoning. I was under the impression that
hostapd is installing dynamic entries instead of static ones since the
latter are not flushed when carrier is lost. Therefore, with static
entries it is possible to unplug a host (potentially plugging a
different one) and not lose authentication.



Both auth schemes 802.1X and MAB install dynamic entries as you point 
out, and both use locked ports.
In the case of non locked ports, they just learn normally and age and 
refresh their entries, so the use case of a userspace added dynamic FDB 
entry is hard for me to see. And having userspace being notified of an 
ordinary event that a FDB entry has been aged out could maybe be used, 
but for the reasons mentioned it is not supported here.




On locked ports it is important for userspace to know when an 
authorized

station has become silent, hence not breaking the communication of a
station that has been authorized based on the MAC-Authentication 
Bypass
(MAB) scheme. Thus if the station keeps being active after 
authorization,
it will continue to have an open port as long as it is active. Only 
after
a silent period will it have to be reauthorized. As the ageing process 
in

the ATU is dependent on incoming traffic to the switchcore port, it is
necessary for the ATU to signal that an entry has aged out, so that 
the

FDB can be updated at the correct time.


Why mention MAB at all? Don't you want user space to always use dynamic
entries to authenticate hosts regardless of 802.1X/MAB?


Yes, you are right about that. I guess it came about as this was 
developed much in the same time and with the code of MAB.


Re: [Bridge] [PATCH net-next 5/5] net: dsa: mv88e6xxx: implementation of dynamic ATU entries

2023-02-02 Thread netdev

On 2023-01-31 19:56, Simon Horman wrote:

--- a/drivers/net/dsa/mv88e6xxx/chip.c
+++ b/drivers/net/dsa/mv88e6xxx/chip.c
@@ -42,6 +42,7 @@
 #include "ptp.h"
 #include "serdes.h"
 #include "smi.h"
+#include "switchdev.h"

 static void assert_reg_lock(struct mv88e6xxx_chip *chip)
 {
@@ -2726,18 +2727,25 @@ static int mv88e6xxx_port_fdb_add(struct 
dsa_switch *ds, int port,

  const unsigned char *addr, u16 vid,
  u16 fdb_flags, struct dsa_db db)
 {
+   bool is_dynamic = !!(fdb_flags & DSA_FDB_FLAG_DYNAMIC);
struct mv88e6xxx_chip *chip = ds->priv;
+   u8 state;
int err;

-   /* Ignore entries with flags set */
-   if (fdb_flags)
-   return 0;
+   state = MV88E6XXX_G1_ATU_DATA_STATE_UC_STATIC;
+   if (is_dynamic)
+   state = MV88E6XXX_G1_ATU_DATA_STATE_UC_AGE_7_NEWEST;


What if flags other than DSA_FDB_FLAG_DYNAMIC are set (in future)?


They will have to be caught and handled here if there is support for it, 
e.g. something like...


else if (someflag)
dosomething();

For now only one flag will actually be set and they are mutually 
exclusive, as they will not make sense together with the potential flags 
I know, but that can change at some time of course.





+   else
+   if (fdb_flags)


nit: else if (fdb_flags)


+   return 0;



...


Re: [Bridge] [PATCH net-next 3/5] drivers: net: dsa: add fdb entry flags incoming to switchcore drivers

2023-02-02 Thread netdev

On 2023-01-31 19:54, Simon Horman wrote:

--- a/drivers/net/dsa/b53/b53_common.c
+++ b/drivers/net/dsa/b53/b53_common.c
@@ -1684,11 +1684,15 @@ static int b53_arl_op(struct b53_device *dev, 
int op, int port,


 int b53_fdb_add(struct dsa_switch *ds, int port,
const unsigned char *addr, u16 vid,
-   struct dsa_db db)
+   u16 fdb_flags, struct dsa_db db)
 {
struct b53_device *priv = ds->priv;
int ret;

+   /* Ignore entries with set flags */
+   if (fdb_flags)
+   return 0;



Would returning -EOPNOTSUPP be more appropriate?

...


I don't think that would be so good, as the command

bridge fdb replace ADDR dev  master dynamic

is a valid command and should not generate errors. When ignored by the 
driver, it will just install a dynamic FDB entry in the bridge, and the 
bridge will age it.


Re: [Bridge] [PATCH net-next 1/5] net: bridge: add dynamic flag to switchdev notifier

2023-02-02 Thread netdev

On 2023-02-02 17:11, Ido Schimmel wrote:
On Thu, Feb 02, 2023 at 08:28:36AM +0100, net...@kapio-technology.com 
wrote:

On 2023-02-01 19:10, Ido Schimmel wrote:
> On Mon, Jan 30, 2023 at 06:34:25PM +0100, Hans J. Schultz wrote:
> > To be able to add dynamic FDB entries to drivers from userspace, the
> > dynamic flag must be added when sending RTM_NEWNEIGH events down.
> >
> > Signed-off-by: Hans J. Schultz 
> > ---
> >  include/net/switchdev.h   | 1 +
> >  net/bridge/br_switchdev.c | 2 ++
> >  2 files changed, 3 insertions(+)
> >
> > diff --git a/include/net/switchdev.h b/include/net/switchdev.h
> > index ca0312b78294..aaf918d4ba67 100644
> > --- a/include/net/switchdev.h
> > +++ b/include/net/switchdev.h
> > @@ -249,6 +249,7 @@ struct switchdev_notifier_fdb_info {
> >   u8 added_by_user:1,
> >  is_local:1,
> >  locked:1,
> > +is_dyn:1,
> >  offloaded:1;
> >  };
> >
> > diff --git a/net/bridge/br_switchdev.c b/net/bridge/br_switchdev.c
> > index 7eb6fd5bb917..4420fcbbfdb2 100644
> > --- a/net/bridge/br_switchdev.c
> > +++ b/net/bridge/br_switchdev.c
> > @@ -136,6 +136,8 @@ static void br_switchdev_fdb_populate(struct
> > net_bridge *br,
> >   item->added_by_user = test_bit(BR_FDB_ADDED_BY_USER, >flags);
> >   item->offloaded = test_bit(BR_FDB_OFFLOADED, >flags);
> >   item->is_local = test_bit(BR_FDB_LOCAL, >flags);
> > + item->is_dyn = !test_bit(BR_FDB_STATIC, >flags) &&
>
> Why not 'is_static' and be consistent with the bridge flag like all the
> other fields?
>
> Regardless of how you name this field, it is irrelevant for
> 'SWITCHDEV_FDB_ADD_TO_BRIDGE' notifications that all add FDB entries
> with the 'BR_FDB_ADDED_BY_EXT_LEARN' flag set, which makes
> 'BR_FDB_STATIC' irrelevant.
>
> > + item->added_by_user;
>
> Unclear why this is needed...
>

The answer to those two questions lies in my earlier correspondences 
(with

Oltean) on the RFC version.


It is not up to me as a reviewer to dig up old versions of the patch 
and

find out what was changed and why. It is up to you as the submitter of
the patch to provide all this information in the patch posting. Please
read:
https://www.kernel.org/doc/html/latest/process/submitting-patches.html

Specifically:

"Review comments or questions that do not lead to a code change should
almost certainly bring about a comment or changelog entry so that the
next reviewer better understands what is going on."

And:

"Other comments relevant only to the moment or the maintainer, not
suitable for the permanent changelog, should also go here. A good
example of such comments might be patch changelogs which describe what
has changed between the v1 and v2 version of the patch.

Please put this information after the --- line which separates the
changelog from the rest of the patch. The version information is not
part of the changelog which gets committed to the git tree. It is
additional information for the reviewers."

Thanks



Sorry about that. I thought it would be easily found...

On the first question please look here:
https://lore.kernel.org/netdev/20230119134045.fqdt6zrna5x3iavt@skbuf/

On the second question it is what Oltean pointed out to me here...
https://lore.kernel.org/netdev/20230118230135.szu6a7kvt2mjb3i5@skbuf/

Oltean says there:
"This is not true, because it assumes that DSA never called 
port_fdb_add()

up until now for bridge FDB entries with the BR_FDB_STATIC flag unset,
which is incorrect (it did)."

Though as I see it, if it is only from the DSA layer on, the new 
is_dynamic flag would not be set anyway in the case he references. And 
as can be seen the change is in the bridge layer, as the rest is just 
propagating the flag, but it ensures that to set this flag that it comes 
from the user adding an FDB entry.


Re: [Bridge] [PATCH net-next 0/5] ATU and FDB synchronization on locked ports

2023-02-02 Thread Ido Schimmel
On Thu, Feb 02, 2023 at 05:19:07PM +0100, net...@kapio-technology.com wrote:
> On 2023-02-02 16:43, Ido Schimmel wrote:
> > On Thu, Feb 02, 2023 at 08:37:08AM +0100, net...@kapio-technology.com
> > wrote:
> > > On 2023-01-31 20:25, Ido Schimmel wrote:
> > > >
> > > > Will try to review tomorrow, but it looks like this set is missing
> > > > selftests. What about extending bridge_locked_port.sh?
> > > 
> > > I knew you would take this up. :-)
> > > But I am not sure that it's so easy to have selftests here as it is
> > > timing
> > > based and it would take the 5+ minutes just waiting to test in the
> > > stadard
> > > case, and there is opnly support for mv88e6xxx driver with this
> > > patch set.
> > 
> > The ageing time is configurable: See commit 081197591769 ("selftests:
> > net: bridge: Parameterize ageing timeout"). Please add test cases in the
> > next version.
> 
> When I was looking at configuring the ageing time last time, my finding was
> that the ageing time could not be set very low as there was some part in the
> DSA layer etc, and confusion wrt units. I think the minimum secured was like
> around 2 min. (not validated), which is not that much of an improvement for
> fast testing. If you know what would be a good low timeout to set, I would
> like to know.

My point is that the ageing time is parametrized via 'LOW_AGEING_TIME'
in forwarding.config so just use '$LOW_AGEING_TIME' in the selftest and
set it as high as it needs to be for mv88e6xxx in your own
forwarding.config.


Re: [Bridge] [PATCH net-next mlxsw v2 06/16] net: bridge: Add a tracepoint for MDB overflows

2023-02-02 Thread Steven Rostedt
On Wed, 1 Feb 2023 18:28:39 +0100
Petr Machata  wrote:

> The following patch will add two more maximum MDB allowances to the global
> one, mcast_hash_max, that exists today. In all these cases, attempts to add
> MDB entries above the configured maximums through netlink, fail noisily and
> obviously. Such visibility is missing when adding entries through the
> control plane traffic, by IGMP or MLD packets.
> 
> To improve visibility in those cases, add a trace point that reports the
> violation, including the relevant netdevice (be it a slave or the bridge
> itself), and the MDB entry parameters:
> 
>   # perf record -e bridge:br_mdb_full &
>   # [...]
>   # perf script | cut -d: -f4-
>dev v2 af 2 src :::0.0.0.0 grp 
> :::239.1.1.112/00:00:00:00:00:00 vid 0
>dev v2 af 10 src :: grp ff0e::112/00:00:00:00:00:00 vid 0
>dev v2 af 2 src :::0.0.0.0 grp 
> :::239.1.1.112/00:00:00:00:00:00 vid 10
>dev v2 af 10 src 2001:db8:1::1 grp ff0e::1/00:00:00:00:00:00 vid 10
>dev v2 af 2 src :::192.0.2.1 grp 
> :::239.1.1.1/00:00:00:00:00:00 vid 10
> 
> CC: Steven Rostedt 
> CC: linux-trace-ker...@vger.kernel.org
> Signed-off-by: Petr Machata 
> ---
> 
> Notes:
> v2:
> - Report IPv4 as an IPv6-mapped address through the IPv6 buffer
>   as well, to save ring buffer space.
> 
>  include/trace/events/bridge.h | 58 +++
>  net/core/net-traces.c |  1 +
>  2 files changed, 59 insertions(+)
> 

From the tracing point of view:

Reviewed-by: Steven Rostedt (Google) 

-- Steve


Re: [Bridge] [PATCH net-next 0/5] ATU and FDB synchronization on locked ports

2023-02-02 Thread netdev

On 2023-02-02 16:43, Ido Schimmel wrote:
On Thu, Feb 02, 2023 at 08:37:08AM +0100, net...@kapio-technology.com 
wrote:

On 2023-01-31 20:25, Ido Schimmel wrote:
>
> Will try to review tomorrow, but it looks like this set is missing
> selftests. What about extending bridge_locked_port.sh?

I knew you would take this up. :-)
But I am not sure that it's so easy to have selftests here as it is 
timing
based and it would take the 5+ minutes just waiting to test in the 
stadard
case, and there is opnly support for mv88e6xxx driver with this patch 
set.


The ageing time is configurable: See commit 081197591769 ("selftests:
net: bridge: Parameterize ageing timeout"). Please add test cases in 
the

next version.


When I was looking at configuring the ageing time last time, my finding 
was that the ageing time could not be set very low as there was some 
part in the DSA layer etc, and confusion wrt units. I think the minimum 
secured was like around 2 min. (not validated), which is not that much 
of an improvement for fast testing. If you know what would be a good low 
timeout to set, I would like to know.


Re: [Bridge] [PATCH net-next 1/5] net: bridge: add dynamic flag to switchdev notifier

2023-02-02 Thread Ido Schimmel
On Thu, Feb 02, 2023 at 08:28:36AM +0100, net...@kapio-technology.com wrote:
> On 2023-02-01 19:10, Ido Schimmel wrote:
> > On Mon, Jan 30, 2023 at 06:34:25PM +0100, Hans J. Schultz wrote:
> > > To be able to add dynamic FDB entries to drivers from userspace, the
> > > dynamic flag must be added when sending RTM_NEWNEIGH events down.
> > > 
> > > Signed-off-by: Hans J. Schultz 
> > > ---
> > >  include/net/switchdev.h   | 1 +
> > >  net/bridge/br_switchdev.c | 2 ++
> > >  2 files changed, 3 insertions(+)
> > > 
> > > diff --git a/include/net/switchdev.h b/include/net/switchdev.h
> > > index ca0312b78294..aaf918d4ba67 100644
> > > --- a/include/net/switchdev.h
> > > +++ b/include/net/switchdev.h
> > > @@ -249,6 +249,7 @@ struct switchdev_notifier_fdb_info {
> > >   u8 added_by_user:1,
> > >  is_local:1,
> > >  locked:1,
> > > +is_dyn:1,
> > >  offloaded:1;
> > >  };
> > > 
> > > diff --git a/net/bridge/br_switchdev.c b/net/bridge/br_switchdev.c
> > > index 7eb6fd5bb917..4420fcbbfdb2 100644
> > > --- a/net/bridge/br_switchdev.c
> > > +++ b/net/bridge/br_switchdev.c
> > > @@ -136,6 +136,8 @@ static void br_switchdev_fdb_populate(struct
> > > net_bridge *br,
> > >   item->added_by_user = test_bit(BR_FDB_ADDED_BY_USER, >flags);
> > >   item->offloaded = test_bit(BR_FDB_OFFLOADED, >flags);
> > >   item->is_local = test_bit(BR_FDB_LOCAL, >flags);
> > > + item->is_dyn = !test_bit(BR_FDB_STATIC, >flags) &&
> > 
> > Why not 'is_static' and be consistent with the bridge flag like all the
> > other fields?
> > 
> > Regardless of how you name this field, it is irrelevant for
> > 'SWITCHDEV_FDB_ADD_TO_BRIDGE' notifications that all add FDB entries
> > with the 'BR_FDB_ADDED_BY_EXT_LEARN' flag set, which makes
> > 'BR_FDB_STATIC' irrelevant.
> > 
> > > + item->added_by_user;
> > 
> > Unclear why this is needed...
> > 
> 
> The answer to those two questions lies in my earlier correspondences (with
> Oltean) on the RFC version.

It is not up to me as a reviewer to dig up old versions of the patch and
find out what was changed and why. It is up to you as the submitter of
the patch to provide all this information in the patch posting. Please
read:
https://www.kernel.org/doc/html/latest/process/submitting-patches.html

Specifically:

"Review comments or questions that do not lead to a code change should
almost certainly bring about a comment or changelog entry so that the
next reviewer better understands what is going on."

And:

"Other comments relevant only to the moment or the maintainer, not
suitable for the permanent changelog, should also go here. A good
example of such comments might be patch changelogs which describe what
has changed between the v1 and v2 version of the patch.

Please put this information after the --- line which separates the
changelog from the rest of the patch. The version information is not
part of the changelog which gets committed to the git tree. It is
additional information for the reviewers."

Thanks


Re: [Bridge] [PATCH net-next 0/5] ATU and FDB synchronization on locked ports

2023-02-02 Thread Ido Schimmel
On Thu, Feb 02, 2023 at 08:37:08AM +0100, net...@kapio-technology.com wrote:
> On 2023-01-31 20:25, Ido Schimmel wrote:
> > 
> > Will try to review tomorrow, but it looks like this set is missing
> > selftests. What about extending bridge_locked_port.sh?
> 
> I knew you would take this up. :-)
> But I am not sure that it's so easy to have selftests here as it is timing
> based and it would take the 5+ minutes just waiting to test in the stadard
> case, and there is opnly support for mv88e6xxx driver with this patch set.

The ageing time is configurable: See commit 081197591769 ("selftests:
net: bridge: Parameterize ageing timeout"). Please add test cases in the
next version.


Re: [Bridge] [PATCH net-next mlxsw v2 08/16] net: bridge: Add netlink knobs for number / maximum MDB entries

2023-02-02 Thread Petr Machata via Bridge


Nikolay Aleksandrov  writes:

> On 02/02/2023 10:52, Nikolay Aleksandrov wrote:
>> On 01/02/2023 19:28, Petr Machata wrote:
>>> +int br_multicast_vlan_ngroups_set_max(struct net_bridge *br,
>>> + struct net_bridge_vlan *v, u32 max,
>>> + struct netlink_ext_ack *extack)
>>> +{
>>> +   if (br_multicast_port_ctx_vlan_disabled(>port_mcast_ctx)) {
>>> +   NL_SET_ERR_MSG_MOD(extack, "Multicast snooping disabled on this 
>>> VLAN");
>>> +   return -EINVAL;
>>> +   }
>> 
>> same comment about the check
>
> Ok, not exactly the same. I see that for the max case this check is used, 
> please pull it
> in the vlan code and just drop this helper. Both read/write will be doing the 
> same then.

OK. This actually simplifies the code quite a bit.


Re: [Bridge] [PATCH net-next mlxsw v2 07/16] net: bridge: Maintain number of MDB entries in net_bridge_mcast_port

2023-02-02 Thread Petr Machata via Bridge


Nikolay Aleksandrov  writes:

> On 01/02/2023 19:28, Petr Machata wrote:
>> @@ -668,6 +692,82 @@ void br_multicast_del_group_src(struct 
>> net_bridge_group_src *src,
>>  __br_multicast_del_group_src(src);
>>  }
>>  
>> +static int
>> +br_multicast_port_ngroups_inc_one(struct net_bridge_mcast_port *pmctx,
>> +  struct netlink_ext_ack *extack)
>> +{
>> +if (pmctx->mdb_max_entries &&
>> +pmctx->mdb_n_entries >= pmctx->mdb_max_entries)
>
> These should be using *_ONCE() because of the next patch.
> KCSAN might be sad otherwise. :)

I didn't see any (relevant) KCSAN warnings on x86_64. But yeah, it
(and the others you cited) should be *_ONCE.


Re: [Bridge] [PATCH net-next mlxsw v2 00/16] bridge: Limit number of MDB entries per port, port-vlan

2023-02-02 Thread Petr Machata via Bridge


Jakub Kicinski  writes:

> On Wed, 1 Feb 2023 18:28:33 +0100 Petr Machata wrote:
>> Subject: [PATCH net-next mlxsw v2 00/16] bridge: Limit number of MDB entries 
>> per port, port-vlan
>
> What do you mean by "net-next mlxsw"?
> Is there a tree called "net-next mlxsw" somewhere?

Sorry about this. "mlxsw" is our internal tree, the tag slipped in by
mistake. Can you simply ignore it, or should I resend?


Re: [Bridge] [PATCH net-next mlxsw v2 08/16] net: bridge: Add netlink knobs for number / maximum MDB entries

2023-02-02 Thread Nikolay Aleksandrov
On 02/02/2023 10:52, Nikolay Aleksandrov wrote:
> On 01/02/2023 19:28, Petr Machata wrote:
>> The previous patch added accounting for number of MDB entries per port and
>> per port-VLAN, and the logic to verify that these values stay within
>> configured bounds. However it didn't provide means to actually configure
>> those bounds or read the occupancy. This patch does that.
>>
>> Two new netlink attributes are added for the MDB occupancy:
>> IFLA_BRPORT_MCAST_N_GROUPS for the per-port occupancy and
>> BRIDGE_VLANDB_ENTRY_MCAST_N_GROUPS for the per-port-VLAN occupancy.
>> And another two for the maximum number of MDB entries:
>> IFLA_BRPORT_MCAST_MAX_GROUPS for the per-port maximum, and
>> BRIDGE_VLANDB_ENTRY_MCAST_MAX_GROUPS for the per-port-VLAN one.
>>
>> Note that the two new IFLA_BRPORT_ attributes prompt bumping of
>> RTNL_SLAVE_MAX_TYPE to size the slave attribute tables large enough.
>>
>> The new attributes are used like this:
>>
>>  # ip link add name br up type bridge vlan_filtering 1 mcast_snooping 1 \
>>   mcast_vlan_snooping 1 mcast_querier 1
>>  # ip link set dev v1 master br
>>  # bridge vlan add dev v1 vid 2
>>
>>  # bridge vlan set dev v1 vid 1 mcast_max_groups 1
>>  # bridge mdb add dev br port v1 grp 230.1.2.3 temp vid 1
>>  # bridge mdb add dev br port v1 grp 230.1.2.4 temp vid 1
>>  Error: bridge: Port-VLAN is already a member in mcast_max_groups (1) groups.
>>
>>  # bridge link set dev v1 mcast_max_groups 1
>>  # bridge mdb add dev br port v1 grp 230.1.2.3 temp vid 2
>>  Error: bridge: Port is already a member in mcast_max_groups (1) groups.
>>
>>  # bridge -d link show
>>  5: v1@v2:  mtu 1500 master br [...]
>>  [...] mcast_n_groups 1 mcast_max_groups 1
>>
>>  # bridge -d vlan show
>>  port  vlan-id
>>  br1 PVID Egress Untagged
>>  state forwarding mcast_router 1
>>  v11 PVID Egress Untagged
>>  [...] mcast_n_groups 1 mcast_max_groups 1
>>2
>>  [...] mcast_n_groups 0 mcast_max_groups 0
>>
>> Signed-off-by: Petr Machata 
>> ---
>>
>> Notes:
>> v2:
>> - Drop locks around accesses in
>>   br_multicast_{port,vlan}_ngroups_{get,set_max}(),
>> - Drop bounces due to max>   br_multicast_{port,vlan}_ngroups_set_max().
>>
>>  include/uapi/linux/if_bridge.h |  2 ++
>>  include/uapi/linux/if_link.h   |  2 ++
>>  net/bridge/br_multicast.c  | 50 ++
>>  net/bridge/br_netlink.c| 17 +++-
>>  net/bridge/br_private.h| 15 +-
>>  net/bridge/br_vlan.c   | 11 +---
>>  net/bridge/br_vlan_options.c   | 33 +-
>>  net/core/rtnetlink.c   |  2 +-
>>  8 files changed, 124 insertions(+), 8 deletions(-)
>>
>> diff --git a/include/uapi/linux/if_bridge.h b/include/uapi/linux/if_bridge.h
>> index d9de241d90f9..d60c456710b3 100644
>> --- a/include/uapi/linux/if_bridge.h
>> +++ b/include/uapi/linux/if_bridge.h
>> @@ -523,6 +523,8 @@ enum {
>>  BRIDGE_VLANDB_ENTRY_TUNNEL_INFO,
>>  BRIDGE_VLANDB_ENTRY_STATS,
>>  BRIDGE_VLANDB_ENTRY_MCAST_ROUTER,
>> +BRIDGE_VLANDB_ENTRY_MCAST_N_GROUPS,
>> +BRIDGE_VLANDB_ENTRY_MCAST_MAX_GROUPS,
>>  __BRIDGE_VLANDB_ENTRY_MAX,
>>  };
>>  #define BRIDGE_VLANDB_ENTRY_MAX (__BRIDGE_VLANDB_ENTRY_MAX - 1)
>> diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h
>> index 1021a7e47a86..1bed3a72939c 100644
>> --- a/include/uapi/linux/if_link.h
>> +++ b/include/uapi/linux/if_link.h
>> @@ -564,6 +564,8 @@ enum {
>>  IFLA_BRPORT_MCAST_EHT_HOSTS_CNT,
>>  IFLA_BRPORT_LOCKED,
>>  IFLA_BRPORT_MAB,
>> +IFLA_BRPORT_MCAST_N_GROUPS,
>> +IFLA_BRPORT_MCAST_MAX_GROUPS,
>>  __IFLA_BRPORT_MAX
>>  };
>>  #define IFLA_BRPORT_MAX (__IFLA_BRPORT_MAX - 1)
>> diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c
>> index e7ae339a8757..393ffc21c3e8 100644
>> --- a/net/bridge/br_multicast.c
>> +++ b/net/bridge/br_multicast.c
>> @@ -768,6 +768,56 @@ static void br_multicast_port_ngroups_dec(struct 
>> net_bridge_port *port, u16 vid)
>>  br_multicast_port_ngroups_dec_one(>multicast_ctx);
>>  }
>>  
>> +u32 br_multicast_port_ngroups_get(const struct net_bridge_port *port)
>> +{
>> +return READ_ONCE(port->multicast_ctx.mdb_n_entries);
>> +}
>> +
>> +int br_multicast_vlan_ngroups_get(struct net_bridge *br,
>> +  const struct net_bridge_vlan *v,
>> +  u32 *n)
>> +{
>> +if (br_multicast_port_ctx_vlan_disabled(>port_mcast_ctx))
>> +return -EINVAL;
> 
> This check seems unnecessary since the helper is used only if 
> !br_multicast_port_ctx_vlan_disabled()
> below.
> 
>> +
>> +*n = READ_ONCE(v->port_mcast_ctx.mdb_n_entries);
>> +return 0;
>> +}
>> +
>> +void br_multicast_port_ngroups_set_max(struct net_bridge_port *port, u32 
>> max)
>> +{
>> +

Re: [Bridge] [PATCH net-next mlxsw v2 07/16] net: bridge: Maintain number of MDB entries in net_bridge_mcast_port

2023-02-02 Thread Nikolay Aleksandrov
On 02/02/2023 10:56, Nikolay Aleksandrov wrote:
> On 01/02/2023 19:28, Petr Machata wrote:
>> The MDB maintained by the bridge is limited. When the bridge is configured
>> for IGMP / MLD snooping, a buggy or malicious client can easily exhaust its
>> capacity. In SW datapath, the capacity is configurable through the
>> IFLA_BR_MCAST_HASH_MAX parameter, but ultimately is finite. Obviously a
>> similar limit exists in the HW datapath for purposes of offloading.
>>
>> In order to prevent the issue of unilateral exhaustion of MDB resources,
>> introduce two parameters in each of two contexts:
>>
>> - Per-port and per-port-VLAN number of MDB entries that the port
>>   is member in.
>>
>> - Per-port and (when BROPT_MCAST_VLAN_SNOOPING_ENABLED is enabled)
>>   per-port-VLAN maximum permitted number of MDB entries, or 0 for
>>   no limit.
>>
>> The per-port multicast context is used for tracking of MDB entries for the
>> port as a whole. This is available for all bridges.
>>
>> The per-port-VLAN multicast context is then only available on
>> VLAN-filtering bridges on VLANs that have multicast snooping on.
>>
>> With these changes in place, it will be possible to configure MDB limit for
>> bridge as a whole, or any one port as a whole, or any single port-VLAN.
>>
>> Note that unlike the global limit, exhaustion of the per-port and
>> per-port-VLAN maximums does not cause disablement of multicast snooping.
>> It is also permitted to configure the local limit larger than hash_max,
>> even though that is not useful.
>>
>> In this patch, introduce only the accounting for number of entries, and the
>> max field itself, but not the means to toggle the max. The next patch
>> introduces the netlink APIs to toggle and read the values.
>>
>> Signed-off-by: Petr Machata 
>> ---
>>
>> Notes:
>> v2:
>> - In br_multicast_port_ngroups_inc_one(), bounce
>>   if n>=max, not if n==max
>> - Adjust extack messages to mention ngroups, now that
>>   the bounces appear when n>=max, not n==max
>> - In __br_multicast_enable_port_ctx(), do not reset
>>   max to 0. Also do not count number of entries by
>>   going through _inc, as that would end up incorrectly
>>   bouncing the entries.
>>
>>  net/bridge/br_multicast.c | 132 +-
>>  net/bridge/br_private.h   |   2 +
>>  2 files changed, 133 insertions(+), 1 deletion(-)
>>
>> diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c
>> index 51b622afdb67..e7ae339a8757 100644
>> --- a/net/bridge/br_multicast.c
>> +++ b/net/bridge/br_multicast.c
>> @@ -31,6 +31,7 @@
>>  #include 
>>  #include 
>>  #endif
>> +#include 
>>  
>>  #include "br_private.h"
>>  #include "br_private_mcast_eht.h"
>> @@ -234,6 +235,29 @@ br_multicast_pg_to_port_ctx(const struct 
>> net_bridge_port_group *pg)
>>  return pmctx;
>>  }
>>  
>> +static struct net_bridge_mcast_port *
>> +br_multicast_port_vid_to_port_ctx(struct net_bridge_port *port, u16 vid)
>> +{
>> +struct net_bridge_mcast_port *pmctx = NULL;
>> +struct net_bridge_vlan *vlan;
>> +
>> +lockdep_assert_held_once(>br->multicast_lock);
>> +
>> +if (!br_opt_get(port->br, BROPT_MCAST_VLAN_SNOOPING_ENABLED))
>> +return NULL;
>> +
>> +/* Take RCU to access the vlan. */
>> +rcu_read_lock();
>> +
>> +vlan = br_vlan_find(nbp_vlan_group_rcu(port), vid);
>> +if (vlan && !br_multicast_port_ctx_vlan_disabled(>port_mcast_ctx))
>> +pmctx = >port_mcast_ctx;
>> +
>> +rcu_read_unlock();
>> +
>> +return pmctx;
>> +}
>> +
>>  /* when snooping we need to check if the contexts should be used
>>   * in the following order:
>>   * - if pmctx is non-NULL (port), check if it should be used
>> @@ -668,6 +692,82 @@ void br_multicast_del_group_src(struct 
>> net_bridge_group_src *src,
>>  __br_multicast_del_group_src(src);
>>  }
>>  
>> +static int
>> +br_multicast_port_ngroups_inc_one(struct net_bridge_mcast_port *pmctx,
>> +  struct netlink_ext_ack *extack)
>> +{
>> +if (pmctx->mdb_max_entries &&
>> +pmctx->mdb_n_entries >= pmctx->mdb_max_entries)
> 
> These should be using *_ONCE() because of the next patch.
> KCSAN might be sad otherwise. :)
> 
>> +return -E2BIG;
>> +
>> +pmctx->mdb_n_entries++;
> 
> WRITE_ONCE()
> 
>> +return 0;
>> +}
>> +
>> +static void br_multicast_port_ngroups_dec_one(struct net_bridge_mcast_port 
>> *pmctx)
>> +{
>> +WARN_ON_ONCE(pmctx->mdb_n_entries-- == 0);
> 
> READ_ONCE()

err, I meant WRITE_ONCE() of course. :)
Need to get coffee.

> 
>> +}
>> +



Re: [Bridge] [PATCH net-next mlxsw v2 07/16] net: bridge: Maintain number of MDB entries in net_bridge_mcast_port

2023-02-02 Thread Nikolay Aleksandrov
On 01/02/2023 19:28, Petr Machata wrote:
> The MDB maintained by the bridge is limited. When the bridge is configured
> for IGMP / MLD snooping, a buggy or malicious client can easily exhaust its
> capacity. In SW datapath, the capacity is configurable through the
> IFLA_BR_MCAST_HASH_MAX parameter, but ultimately is finite. Obviously a
> similar limit exists in the HW datapath for purposes of offloading.
> 
> In order to prevent the issue of unilateral exhaustion of MDB resources,
> introduce two parameters in each of two contexts:
> 
> - Per-port and per-port-VLAN number of MDB entries that the port
>   is member in.
> 
> - Per-port and (when BROPT_MCAST_VLAN_SNOOPING_ENABLED is enabled)
>   per-port-VLAN maximum permitted number of MDB entries, or 0 for
>   no limit.
> 
> The per-port multicast context is used for tracking of MDB entries for the
> port as a whole. This is available for all bridges.
> 
> The per-port-VLAN multicast context is then only available on
> VLAN-filtering bridges on VLANs that have multicast snooping on.
> 
> With these changes in place, it will be possible to configure MDB limit for
> bridge as a whole, or any one port as a whole, or any single port-VLAN.
> 
> Note that unlike the global limit, exhaustion of the per-port and
> per-port-VLAN maximums does not cause disablement of multicast snooping.
> It is also permitted to configure the local limit larger than hash_max,
> even though that is not useful.
> 
> In this patch, introduce only the accounting for number of entries, and the
> max field itself, but not the means to toggle the max. The next patch
> introduces the netlink APIs to toggle and read the values.
> 
> Signed-off-by: Petr Machata 
> ---
> 
> Notes:
> v2:
> - In br_multicast_port_ngroups_inc_one(), bounce
>   if n>=max, not if n==max
> - Adjust extack messages to mention ngroups, now that
>   the bounces appear when n>=max, not n==max
> - In __br_multicast_enable_port_ctx(), do not reset
>   max to 0. Also do not count number of entries by
>   going through _inc, as that would end up incorrectly
>   bouncing the entries.
> 
>  net/bridge/br_multicast.c | 132 +-
>  net/bridge/br_private.h   |   2 +
>  2 files changed, 133 insertions(+), 1 deletion(-)
> 
> diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c
> index 51b622afdb67..e7ae339a8757 100644
> --- a/net/bridge/br_multicast.c
> +++ b/net/bridge/br_multicast.c
> @@ -31,6 +31,7 @@
>  #include 
>  #include 
>  #endif
> +#include 
>  
>  #include "br_private.h"
>  #include "br_private_mcast_eht.h"
> @@ -234,6 +235,29 @@ br_multicast_pg_to_port_ctx(const struct 
> net_bridge_port_group *pg)
>   return pmctx;
>  }
>  
> +static struct net_bridge_mcast_port *
> +br_multicast_port_vid_to_port_ctx(struct net_bridge_port *port, u16 vid)
> +{
> + struct net_bridge_mcast_port *pmctx = NULL;
> + struct net_bridge_vlan *vlan;
> +
> + lockdep_assert_held_once(>br->multicast_lock);
> +
> + if (!br_opt_get(port->br, BROPT_MCAST_VLAN_SNOOPING_ENABLED))
> + return NULL;
> +
> + /* Take RCU to access the vlan. */
> + rcu_read_lock();
> +
> + vlan = br_vlan_find(nbp_vlan_group_rcu(port), vid);
> + if (vlan && !br_multicast_port_ctx_vlan_disabled(>port_mcast_ctx))
> + pmctx = >port_mcast_ctx;
> +
> + rcu_read_unlock();
> +
> + return pmctx;
> +}
> +
>  /* when snooping we need to check if the contexts should be used
>   * in the following order:
>   * - if pmctx is non-NULL (port), check if it should be used
> @@ -668,6 +692,82 @@ void br_multicast_del_group_src(struct 
> net_bridge_group_src *src,
>   __br_multicast_del_group_src(src);
>  }
>  
> +static int
> +br_multicast_port_ngroups_inc_one(struct net_bridge_mcast_port *pmctx,
> +   struct netlink_ext_ack *extack)
> +{
> + if (pmctx->mdb_max_entries &&
> + pmctx->mdb_n_entries >= pmctx->mdb_max_entries)

These should be using *_ONCE() because of the next patch.
KCSAN might be sad otherwise. :)

> + return -E2BIG;
> +
> + pmctx->mdb_n_entries++;

WRITE_ONCE()

> + return 0;
> +}
> +
> +static void br_multicast_port_ngroups_dec_one(struct net_bridge_mcast_port 
> *pmctx)
> +{
> + WARN_ON_ONCE(pmctx->mdb_n_entries-- == 0);

READ_ONCE()

> +}
> +
> +static int br_multicast_port_ngroups_inc(struct net_bridge_port *port,
> +  const struct br_ip *group,
> +  struct netlink_ext_ack *extack)
> +{
> + struct net_bridge_mcast_port *pmctx;
> + int err;
> +
> + lockdep_assert_held_once(>br->multicast_lock);
> +
> + /* Always count on the port context. */
> + err = br_multicast_port_ngroups_inc_one(>multicast_ctx, extack);
> + if (err) {
> + NL_SET_ERR_MSG_FMT_MOD(extack, "Port is already in %u groups, 
> and mcast_max_groups=%u",
> + 

Re: [Bridge] [PATCH net-next mlxsw v2 08/16] net: bridge: Add netlink knobs for number / maximum MDB entries

2023-02-02 Thread Nikolay Aleksandrov
On 01/02/2023 19:28, Petr Machata wrote:
> The previous patch added accounting for number of MDB entries per port and
> per port-VLAN, and the logic to verify that these values stay within
> configured bounds. However it didn't provide means to actually configure
> those bounds or read the occupancy. This patch does that.
> 
> Two new netlink attributes are added for the MDB occupancy:
> IFLA_BRPORT_MCAST_N_GROUPS for the per-port occupancy and
> BRIDGE_VLANDB_ENTRY_MCAST_N_GROUPS for the per-port-VLAN occupancy.
> And another two for the maximum number of MDB entries:
> IFLA_BRPORT_MCAST_MAX_GROUPS for the per-port maximum, and
> BRIDGE_VLANDB_ENTRY_MCAST_MAX_GROUPS for the per-port-VLAN one.
> 
> Note that the two new IFLA_BRPORT_ attributes prompt bumping of
> RTNL_SLAVE_MAX_TYPE to size the slave attribute tables large enough.
> 
> The new attributes are used like this:
> 
>  # ip link add name br up type bridge vlan_filtering 1 mcast_snooping 1 \
>   mcast_vlan_snooping 1 mcast_querier 1
>  # ip link set dev v1 master br
>  # bridge vlan add dev v1 vid 2
> 
>  # bridge vlan set dev v1 vid 1 mcast_max_groups 1
>  # bridge mdb add dev br port v1 grp 230.1.2.3 temp vid 1
>  # bridge mdb add dev br port v1 grp 230.1.2.4 temp vid 1
>  Error: bridge: Port-VLAN is already a member in mcast_max_groups (1) groups.
> 
>  # bridge link set dev v1 mcast_max_groups 1
>  # bridge mdb add dev br port v1 grp 230.1.2.3 temp vid 2
>  Error: bridge: Port is already a member in mcast_max_groups (1) groups.
> 
>  # bridge -d link show
>  5: v1@v2:  mtu 1500 master br [...]
>  [...] mcast_n_groups 1 mcast_max_groups 1
> 
>  # bridge -d vlan show
>  port  vlan-id
>  br1 PVID Egress Untagged
>  state forwarding mcast_router 1
>  v11 PVID Egress Untagged
>  [...] mcast_n_groups 1 mcast_max_groups 1
>2
>  [...] mcast_n_groups 0 mcast_max_groups 0
> 
> Signed-off-by: Petr Machata 
> ---
> 
> Notes:
> v2:
> - Drop locks around accesses in
>   br_multicast_{port,vlan}_ngroups_{get,set_max}(),
> - Drop bounces due to max   br_multicast_{port,vlan}_ngroups_set_max().
> 
>  include/uapi/linux/if_bridge.h |  2 ++
>  include/uapi/linux/if_link.h   |  2 ++
>  net/bridge/br_multicast.c  | 50 ++
>  net/bridge/br_netlink.c| 17 +++-
>  net/bridge/br_private.h| 15 +-
>  net/bridge/br_vlan.c   | 11 +---
>  net/bridge/br_vlan_options.c   | 33 +-
>  net/core/rtnetlink.c   |  2 +-
>  8 files changed, 124 insertions(+), 8 deletions(-)
> 
> diff --git a/include/uapi/linux/if_bridge.h b/include/uapi/linux/if_bridge.h
> index d9de241d90f9..d60c456710b3 100644
> --- a/include/uapi/linux/if_bridge.h
> +++ b/include/uapi/linux/if_bridge.h
> @@ -523,6 +523,8 @@ enum {
>   BRIDGE_VLANDB_ENTRY_TUNNEL_INFO,
>   BRIDGE_VLANDB_ENTRY_STATS,
>   BRIDGE_VLANDB_ENTRY_MCAST_ROUTER,
> + BRIDGE_VLANDB_ENTRY_MCAST_N_GROUPS,
> + BRIDGE_VLANDB_ENTRY_MCAST_MAX_GROUPS,
>   __BRIDGE_VLANDB_ENTRY_MAX,
>  };
>  #define BRIDGE_VLANDB_ENTRY_MAX (__BRIDGE_VLANDB_ENTRY_MAX - 1)
> diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h
> index 1021a7e47a86..1bed3a72939c 100644
> --- a/include/uapi/linux/if_link.h
> +++ b/include/uapi/linux/if_link.h
> @@ -564,6 +564,8 @@ enum {
>   IFLA_BRPORT_MCAST_EHT_HOSTS_CNT,
>   IFLA_BRPORT_LOCKED,
>   IFLA_BRPORT_MAB,
> + IFLA_BRPORT_MCAST_N_GROUPS,
> + IFLA_BRPORT_MCAST_MAX_GROUPS,
>   __IFLA_BRPORT_MAX
>  };
>  #define IFLA_BRPORT_MAX (__IFLA_BRPORT_MAX - 1)
> diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c
> index e7ae339a8757..393ffc21c3e8 100644
> --- a/net/bridge/br_multicast.c
> +++ b/net/bridge/br_multicast.c
> @@ -768,6 +768,56 @@ static void br_multicast_port_ngroups_dec(struct 
> net_bridge_port *port, u16 vid)
>   br_multicast_port_ngroups_dec_one(>multicast_ctx);
>  }
>  
> +u32 br_multicast_port_ngroups_get(const struct net_bridge_port *port)
> +{
> + return READ_ONCE(port->multicast_ctx.mdb_n_entries);
> +}
> +
> +int br_multicast_vlan_ngroups_get(struct net_bridge *br,
> +   const struct net_bridge_vlan *v,
> +   u32 *n)
> +{
> + if (br_multicast_port_ctx_vlan_disabled(>port_mcast_ctx))
> + return -EINVAL;

This check seems unnecessary since the helper is used only if 
!br_multicast_port_ctx_vlan_disabled()
below.

> +
> + *n = READ_ONCE(v->port_mcast_ctx.mdb_n_entries);
> + return 0;
> +}
> +
> +void br_multicast_port_ngroups_set_max(struct net_bridge_port *port, u32 max)
> +{
> + WRITE_ONCE(port->multicast_ctx.mdb_max_entries, max);
> +}
> +
> +int br_multicast_vlan_ngroups_set_max(struct net_bridge *br,
> +   struct