Re: [ovs-dev] [PATCH v3 3/3] dpif-netlink: Remove redundant null assignment

2022-11-04 Thread Justin Pettit
I assume someone else will pull this in as part of the patch set, so:

Acked-by: Justin Pettit 

Ilya, let me know if you'd like me to merge it, though.

--Justin


> On Nov 4, 2022, at 6:06 AM, Roi Dayan  wrote:
> 
> The assignment of the features pointer is not doing
> anything and can be removed.
> 
> CC: Justin Pettit 
> Signed-off-by: Roi Dayan 
> ---
> 
> Notes:
>v3:
>- fix get meter features commit to just remove redundant null assignment.
> 
>v2:
>- move memset from wrapper call
> 
> lib/dpif-netlink.c | 1 -
> 1 file changed, 1 deletion(-)
> 
> diff --git a/lib/dpif-netlink.c b/lib/dpif-netlink.c
> index a620a6ec52dd..026b0daa8d83 100644
> --- a/lib/dpif-netlink.c
> +++ b/lib/dpif-netlink.c
> @@ -4105,7 +4105,6 @@ dpif_netlink_meter_get_features(const struct dpif 
> *dpif_,
> struct ofputil_meter_features *features)
> {
> if (probe_broken_meters(CONST_CAST(struct dpif *, dpif_))) {
> -features = NULL;
> return;
> }
> 
> -- 
> 2.38.0
> 

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH ovn 2/2] northd: Add I-P for syncing SB address sets.

2022-11-04 Thread numans
From: Numan Siddique 

Updates to NB address sets and NB port groups are handled
incrementally for syncing the SB address sets.  This patch
doesn't support syncing the SB Address sets for the router
load balancer vips incrementally, instead a full recompute is
triggered for any changes to NB load balancers, NB load balancer
groups and NB logical routers.

Signed-off-by: Numan Siddique 
---
 northd/en-sb-sync.c  | 195 ---
 northd/en-sb-sync.h  |   6 ++
 northd/inc-proc-northd.c |  18 +++-
 3 files changed, 204 insertions(+), 15 deletions(-)

diff --git a/northd/en-sb-sync.c b/northd/en-sb-sync.c
index c3ba315df..23884d394 100644
--- a/northd/en-sb-sync.c
+++ b/northd/en-sb-sync.c
@@ -22,6 +22,7 @@
 #include "openvswitch/util.h"
 
 #include "en-sb-sync.h"
+#include "include/ovn/expr.h"
 #include "lib/inc-proc-eng.h"
 #include "lib/lb.h"
 #include "lib/ovn-nb-idl.h"
@@ -41,6 +42,13 @@ static void sync_address_sets(const struct 
nbrec_address_set_table *,
   const struct sbrec_address_set_table *,
   struct ovsdb_idl_txn *ovnsb_txn,
   struct hmap *datapaths);
+static const struct sbrec_address_set *sb_address_set_lookup_by_name(
+struct ovsdb_idl_index *, const char *name);
+static void update_sb_addr_set(char **nb_addresses, size_t n_addresses,
+   const struct sbrec_address_set *);
+static void build_port_group_address_set(const struct nbrec_port_group *,
+ struct svec *ipv4_addrs,
+ struct svec *ipv6_addrs);
 
 void *
 en_sb_sync_init(struct engine_node *node OVS_UNUSED,
@@ -94,6 +102,98 @@ en_address_set_sync_cleanup(void *data OVS_UNUSED)
 
 }
 
+bool
+address_set_sync_nb_address_set_handler(struct engine_node *node OVS_UNUSED,
+void *data OVS_UNUSED)
+{
+const struct nbrec_address_set_table *nb_address_set_table =
+EN_OVSDB_GET(engine_get_input("NB_address_set", node));
+
+/* Return false if an address set is created or deleted.
+ * Handle I-P for only updated address sets. */
+const struct nbrec_address_set *nb_addr_set;
+NBREC_ADDRESS_SET_TABLE_FOR_EACH_TRACKED (nb_addr_set,
+  nb_address_set_table) {
+if (nbrec_address_set_is_new(nb_addr_set) ||
+nbrec_address_set_is_deleted(nb_addr_set)) {
+return false;
+}
+}
+
+struct ovsdb_idl_index *sbrec_address_set_by_name =
+engine_ovsdb_node_get_index(
+engine_get_input("SB_address_set", node),
+"sbrec_address_set_by_name");
+
+NBREC_ADDRESS_SET_TABLE_FOR_EACH_TRACKED (nb_addr_set,
+  nb_address_set_table) {
+const struct sbrec_address_set *sb_addr_set =
+sb_address_set_lookup_by_name(sbrec_address_set_by_name,
+  nb_addr_set->name);
+if (!sb_addr_set) {
+return false;
+}
+update_sb_addr_set(nb_addr_set->addresses, nb_addr_set->n_addresses,
+   sb_addr_set);
+}
+
+return true;
+}
+
+bool
+address_set_sync_nb_port_group_handler(struct engine_node *node OVS_UNUSED,
+   void *data OVS_UNUSED)
+{
+const struct nbrec_port_group *nb_pg;
+const struct nbrec_port_group_table *nb_port_group_table =
+EN_OVSDB_GET(engine_get_input("NB_port_group", node));
+NBREC_PORT_GROUP_TABLE_FOR_EACH_TRACKED (nb_pg, nb_port_group_table) {
+if (nbrec_port_group_is_new(nb_pg) ||
+nbrec_port_group_is_deleted(nb_pg)) {
+return false;
+}
+}
+
+struct ovsdb_idl_index *sbrec_address_set_by_name =
+engine_ovsdb_node_get_index(
+engine_get_input("SB_address_set", node),
+"sbrec_address_set_by_name");
+NBREC_PORT_GROUP_TABLE_FOR_EACH_TRACKED (nb_pg, nb_port_group_table) {
+char *ipv4_addrs_name = xasprintf("%s_ip4", nb_pg->name);
+const struct sbrec_address_set *sb_addr_set_v4 =
+sb_address_set_lookup_by_name(sbrec_address_set_by_name,
+  ipv4_addrs_name);
+if (!sb_addr_set_v4) {
+free(ipv4_addrs_name);
+return false;
+}
+char *ipv6_addrs_name = xasprintf("%s_ip6", nb_pg->name);
+const struct sbrec_address_set *sb_addr_set_v6 =
+sb_address_set_lookup_by_name(sbrec_address_set_by_name,
+  ipv6_addrs_name);
+if (!sb_addr_set_v6) {
+free(ipv4_addrs_name);
+free(ipv6_addrs_name);
+return false;
+}
+
+struct svec ipv4_addrs = SVEC_EMPTY_INITIALIZER;
+struct svec ipv6_addrs = SVEC_EMPTY_INITIALIZER;

[ovs-dev] [PATCH ovn 1/2] northd IP: Add a new engine node 'en_sb_sync' to sync SB tables.

2022-11-04 Thread numans
From: Numan Siddique 

A sub-engine node 'en_address_set_sync' is added with-in the
'en_sb_sync' node to sync the Address_Set table in the
SB database.  To start with, it falls back to full recompute
all the time.  Upcoming patch will add the incremental processing
support to sync the SB Address_Set table.

'en_sb_sync' engine node can be enhanced further to sync other
SB tables like - Port_Group, DHCP_Options, DNS etc.

Signed-off-by: Numan Siddique 
---
 lib/ovn-util.c|  30 +
 lib/ovn-util.h|   3 +
 northd/automake.mk|   4 +
 northd/en-northd-output.c |  57 ++
 northd/en-northd-output.h |  17 +++
 northd/en-sb-sync.c   | 230 ++
 northd/en-sb-sync.h   |  14 +++
 northd/inc-proc-northd.c  |  30 -
 northd/northd.c   | 173 ++--
 northd/northd.h   |   1 +
 10 files changed, 394 insertions(+), 165 deletions(-)
 create mode 100644 northd/en-northd-output.c
 create mode 100644 northd/en-northd-output.h
 create mode 100644 northd/en-sb-sync.c
 create mode 100644 northd/en-sb-sync.h

diff --git a/lib/ovn-util.c b/lib/ovn-util.c
index 5dca72714..4f939f460 100644
--- a/lib/ovn-util.c
+++ b/lib/ovn-util.c
@@ -938,3 +938,33 @@ daemon_started_recently(void)
 /* Ensure that at least an amount of time has passed. */
 return time_wall_msec() - startup_ts <= DAEMON_STARTUP_DELAY_MS;
 }
+
+/* Builds a unique address set compatible name ([a-zA-Z_.][a-zA-Z_.0-9]*)
+ * for the router's load balancer VIP address set, combining the logical
+ * router's datapath tunnel key and address family.
+ *
+ * Also prefixes the name with 'prefix'.
+ */
+static char *
+lr_lb_address_set_name_(uint32_t lr_tunnel_key, const char *prefix,
+int addr_family)
+{
+return xasprintf("%s_rtr_lb_%"PRIu32"_ip%s", prefix, lr_tunnel_key,
+ addr_family == AF_INET ? "4" : "6");
+}
+
+/* Builds the router's load balancer VIP address set name. */
+char *
+lr_lb_address_set_name(uint32_t lr_tunnel_key, int addr_family)
+{
+return lr_lb_address_set_name_(lr_tunnel_key, "", addr_family);
+}
+
+/* Builds a string that refers to the the router's load balancer VIP address
+ * set name, that is: $.
+ */
+char *
+lr_lb_address_set_ref(uint32_t lr_tunnel_key, int addr_family)
+{
+return lr_lb_address_set_name_(lr_tunnel_key, "$", addr_family);
+}
diff --git a/lib/ovn-util.h b/lib/ovn-util.h
index a1f1cf0ad..809ff1d36 100644
--- a/lib/ovn-util.h
+++ b/lib/ovn-util.h
@@ -315,4 +315,7 @@ void daemon_started_recently_ignore(void);
 bool daemon_started_recently(void);
 int64_t daemon_startup_ts(void);
 
+char *lr_lb_address_set_name(uint32_t lr_tunnel_key, int addr_family);
+char *lr_lb_address_set_ref(uint32_t lr_tunnel_key, int addr_family);
+
 #endif /* OVN_UTIL_H */
diff --git a/northd/automake.mk b/northd/automake.mk
index 81582867d..138b7b32e 100644
--- a/northd/automake.mk
+++ b/northd/automake.mk
@@ -10,6 +10,10 @@ northd_ovn_northd_SOURCES = \
northd/en-northd.h \
northd/en-lflow.c \
northd/en-lflow.h \
+   northd/en-northd-output.c \
+   northd/en-northd-output.h \
+   northd/en-sb-sync.c \
+   northd/en-sb-sync.h \
northd/inc-proc-northd.c \
northd/inc-proc-northd.h \
northd/ipam.c \
diff --git a/northd/en-northd-output.c b/northd/en-northd-output.c
new file mode 100644
index 0..0033d371e
--- /dev/null
+++ b/northd/en-northd-output.c
@@ -0,0 +1,57 @@
+/*
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include 
+
+#include 
+#include 
+#include 
+
+#include "openvswitch/util.h"
+
+#include "en-northd-output.h"
+#include "lib/inc-proc-eng.h"
+
+void *
+en_northd_output_init(struct engine_node *node OVS_UNUSED,
+  struct engine_arg *arg OVS_UNUSED)
+{
+return NULL;
+}
+
+void
+en_northd_output_run(struct engine_node *node, void *data OVS_UNUSED)
+{
+engine_set_node_state(node, EN_UPDATED);
+}
+
+void
+en_northd_output_cleanup(void *data OVS_UNUSED)
+{
+
+}
+
+bool
+northd_output_sb_sync_handler(struct engine_node *node, void *data OVS_UNUSED)
+{
+engine_set_node_state(node, EN_UPDATED);
+return true;
+}
+
+bool
+northd_output_lflow_handler(struct engine_node *node, void *data OVS_UNUSED)
+{
+engine_set_node_state(node, EN_UPDATED);
+return true;
+}
diff --git a/northd/en-northd-output.h b/northd/en-northd-output.h
new file mode 100644

Re: [ovs-dev] [ovs-build] |fail| pw1699340 dpif-netdev: fix flow allocation size

2022-11-04 Thread Peng He
I've rerun the test 150 in my VM. it runs successfully.
Looks like this is a flaky uint test.

Peng He  于2022年11月4日周五 17:05写道:

> I've rerun the test 150 in my VM. it runs successfully.
> Looks like this is a flaky uint test.
>
>  于2022年11月4日周五 16:24写道:
>
>> Test-Label: intel-ovs-compilation
>> Test-Status: fail
>> http://patchwork.ozlabs.org/api/patches/1699340/
>>
>> AVX-512_compilation: failed
>> DPLCS Test: fail
>> DPIF Test: success
>> MFEX Test: success
>> Errors in DPCLS test:
>> make check-system-userspace TESTSUITEFLAGS='1-148 150-160'
>> make  all-am
>> make[1]: Entering directory '/root/ovs-dev'
>> make[1]: Leaving directory '/root/ovs-dev'
>> set /bin/bash './tests/system-userspace-testsuite' -C tests
>> AUTOTEST_PATH='utilities:vswitchd:ovsdb:vtep:tests:ipsec::'; \
>> "$@" 1-148 150-160 -j1 || (test X'' = Xyes && "$@" --recheck)
>> Illegal "police"
>> ## -- ##
>> ## openvswitch 3.0.90 test suite. ##
>> ## -- ##
>>
>> datapath-sanity
>>
>>   1: datapath - ping between two ports   ok
>>   2: datapath - http between two ports   ok
>>   3: datapath - ping between two ports on vlan   ok
>>   4: datapath - ping between two ports on cvlan  skipped (
>> system-traffic.at:74)
>>   5: datapath - ping6 between two ports  ok
>>   6: datapath - ping6 between two ports on vlan  ok
>>   7: datapath - ping6 between two ports on cvlan skipped (
>> system-traffic.at:165)
>>   8: datapath - ping6 between two ports IPv6 later fragments ok
>>   9: datapath - ping6 between two ports with header modify ok
>>  10: datapath - ping over bond   ok
>>  11: datapath - ping over vxlan tunnel   ok
>>  12: datapath - ping vlan over vxlan tunnel  ok
>>  13: datapath - ping over vxlan6 tunnel  ok
>>  14: datapath - ping over gre tunnel skipped (
>> system-traffic.at:429)
>>  15: datapath - ping over ip6gre L2 tunnel   skipped (
>> system-traffic.at:471)
>>  16: datapath - ping over erspan v1 tunnel   skipped (
>> system-traffic.at:512)
>>  17: datapath - ping over erspan v2 tunnel   skipped (
>> system-traffic.at:549)
>>  18: datapath - ping over ip6erspan v1 tunnelskipped (
>> system-traffic.at:586)
>>  19: datapath - ping over ip6erspan v2 tunnelskipped (
>> system-traffic.at:626)
>>  20: datapath - ping over geneve tunnel  ok
>>  21: datapath - ping over geneve tunnel, delete flow regression ok
>>  22: datapath - flow resume with geneve tun_metadata ok
>>  23: datapath - ping over geneve6 tunnel ok
>>  24: datapath - ping over gre tunnel by simulated packets skipped (
>> system-traffic.at:860)
>>  25: datapath - ping over erspan v1 tunnel by simulated packets skipped (
>> system-traffic.at:907)
>>  26: datapath - ping over erspan v2 tunnel by simulated packets skipped (
>> system-traffic.at:956)
>>  27: datapath - ping over ip6erspan v1 tunnel by simulated packets
>> skipped (system-traffic.at:1010)
>>  28: datapath - ping over ip6erspan v2 tunnel by simulated packets
>> skipped (system-traffic.at:1066)
>>  29: datapath - clone action ok
>>  30: datapath - mpls actions ok
>>  31: datapath - multiple mpls label pop  ok
>>  32: datapath - encap decap mpls actions ok
>>  33: datapath - encap decap mpls_mc actions  ok
>>  34: datapath - multiple encap decap mpls actionsok
>>  35: datapath - multiple encap decap mpls_mc actions ok
>>  36: datapath - encap mpls pop mpls actions  ok
>>  37: datapath - push mpls decap mpls actions ok
>>  38: datapath - basic truncate actionok
>>  39: datapath - truncate and output to gre tunnel by simulated packets
>> skipped (system-traffic.at:1592)
>>  40: datapath - truncate and output to gre tunnelskipped (
>> system-traffic.at:1725)
>>  41: datapath - configure cache size skipped (
>> system-traffic.at:1846)
>>
>> MPLS
>>
>>  42: mpls - encap header dp-support  ok
>>  43: mpls - encap header slow-path   ok
>>  44: mpls_mc - encap header dp-support   ok
>>  45: mpls_mc - encap header slow-pathok
>>  46: mpls - decap header dp-support  ok
>>  47: mpls - decap header slow-path   ok
>>
>> QoS
>>
>>  48: QoS - basic configuration   ok
>>
>> conntrack
>>
>>  49: conntrack - controller  ok
>>  50: conntrack - force commitok
>>  51: conntrack - ct flush by 5-tuple ok
>>  52: conntrack - IPv4 ping   ok
>>  53: conntrack - get_nconns and get/set_maxconns ok
>>  54: conntrack - IPv6 ping   ok
>>  55: conntrack - preserve registers  ok
>>  56: conntrack - invalid ok
>> 

Re: [ovs-dev] [PATCH ovn v2 2/5] Add NB and SB Template_Var tables.

2022-11-04 Thread 0-day Robot
Bleep bloop.  Greetings Dumitru Ceara, I am a robot and I have tried out your 
patch.
Thanks for your contribution.

I encountered some error that I wasn't expecting.  See the details below.


checkpatch:
ERROR: Inappropriate bracing around statement
#314 FILE: northd/template-var.h:36:
HMAP_FOR_EACH (NODE, hmap_node, &(TBL)->vars)

Lines checked: 535, Warnings: 0, Errors: 1


Please check this out.  If you feel there has been an error, please email 
acon...@redhat.com

Thanks,
0-day Robot
___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [PATCH ovn v2 1/5] lflow: Factor out the lflow reference handling code into a new module.

2022-11-04 Thread 0-day Robot
Bleep bloop.  Greetings Dumitru Ceara, I am a robot and I have tried out your 
patch.
Thanks for your contribution.

I encountered some error that I wasn't expecting.  See the details below.


checkpatch:
ERROR: Inappropriate bracing around statement
#1187 FILE: lib/objdep.h:52:
HMAP_FOR_EACH (NODE, hmap_node, &(MAP)->objs)

Lines checked: 1261, Warnings: 0, Errors: 1


Please check this out.  If you feel there has been an error, please email 
acon...@redhat.com

Thanks,
0-day Robot
___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [PATCH ovn] ci: Update jobs to use numbers instead of test flags

2022-11-04 Thread Dumitru Ceara
On 11/4/22 22:45, Han Zhou wrote:
> On Wed, Nov 2, 2022 at 7:46 AM Dumitru Ceara  wrote:
>>
>> On 11/2/22 15:35, Ales Musil wrote:
>>> To prevent some jobs not running after CI scripts updates
>>> use numbers instead of the test flags. This still allows us
>>> to use parallelization, but without worrying about skipping
>>> some tests by mistake.
>>>
>>> For "test" suites use 3 parts, with 1500 tests in mind.
>>> That should give as additional space for future tests.
>>> Currently, there is ~1200 tests. For sanitizers use 5 parts
>>> instead, as they are slower in general.
>>>
>>> For "system-test" use 3 parts, with 300 tests in mind.
>>> Currently, there is ~200 tests.
>>>
>>> In the end this patch reduces the number of jobs by 8 to 20,
>>> which is ok as there is a limit of 20 running in parallel [0].
>>>
>>> [0]
> https://docs.github.com/en/actions/learn-github-actions/usage-limits-billing-and-administration#usage-limits
>>> Signed-off-by: Ales Musil 
>>> ---
>>
>> Thanks, Ales, for the patch!
>>
>> It makes it harder to skip tests by accident in CI which is great.
>>
>> But there's still the downside that maintainers need to pay a bit more
>> attention so that the last run "1001-" or "201-" doesn't take way longer
>> than the rest.
> 
> I agree with this, but maybe not a big deal. So I am ok with it, too.

Actually, I have already missed this. :)

Branches 22.03 and 22.06 have 2000+ tests because the test matrix
depended on dp-groups being enabled/disabled too.  That's not the case
since >= 22.09.

I still don't think it's a big deal in the end though.

>>
>> I'm OK with that but I wonder what Han, Mark and Numan think about this.
>>
>> If people are OK with it I can apply and backport the patch (assuming
>> the CI is green [0]).
> 
> Backporting may be a problem if the older branch doesn't have test 1200-.
> Otherwise should be good.
> 

I pushed this to the main branch and only backported to branch-22.09 for
now.  We have ~1300 tests there so we're ok.  I'll wait with backporting
to 22.06 and 22.03 for now.

Thanks,
Dumitru

> Thanks,
> Han
> 
>>
>> Regards,
>> Dumitru
>>
>> [0] https://github.com/ovsrobot/ovn/actions/runs/3378478500
>>
> 

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [PATCH ovn 2/5] Add NB and SB Template_Var tables.

2022-11-04 Thread 0-day Robot
Bleep bloop.  Greetings Dumitru Ceara, I am a robot and I have tried out your 
patch.
Thanks for your contribution.

I encountered some error that I wasn't expecting.  See the details below.


checkpatch:
ERROR: Inappropriate bracing around statement
#314 FILE: northd/template-var.h:36:
HMAP_FOR_EACH (NODE, hmap_node, &(TBL)->vars)

Lines checked: 535, Warnings: 0, Errors: 1


Please check this out.  If you feel there has been an error, please email 
acon...@redhat.com

Thanks,
0-day Robot
___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [PATCH ovn 1/5] lflow: Factor out the lflow reference handling code into a new module.

2022-11-04 Thread 0-day Robot
Bleep bloop.  Greetings Dumitru Ceara, I am a robot and I have tried out your 
patch.
Thanks for your contribution.

I encountered some error that I wasn't expecting.  See the details below.


checkpatch:
ERROR: Inappropriate bracing around statement
#1187 FILE: lib/objdep.h:52:
HMAP_FOR_EACH (NODE, hmap_node, &(MAP)->objs)

Lines checked: 1261, Warnings: 0, Errors: 1


Please check this out.  If you feel there has been an error, please email 
acon...@redhat.com

Thanks,
0-day Robot
___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH ovn v2 5/5] tutorial: Add scripts to simulate node-port ovn-k8s services.

2022-11-04 Thread Dumitru Ceara
In a sandbox run:

$ ./ovn-lb-benchmark.sh

to simulate an ovn-k8s-like topology with N nodes, VIPS NodePort services
applied to all nodes.  Each service has BACKENDS backends.

If USE_TEMPLATES is "yes" then the configuration will be optimized to use
Chassis_Template_Vars.  Otherwise it will create N LBs per service, one
for every node.

Signed-off-by: Dumitru Ceara 
---
 tutorial/automake.mk |4 +
 tutorial/ovn-gen-lb-template-vars.py |  116 ++
 tutorial/ovn-lb-benchmark.sh |  110 
 3 files changed, 229 insertions(+), 1 deletion(-)
 create mode 100755 tutorial/ovn-gen-lb-template-vars.py
 create mode 100755 tutorial/ovn-lb-benchmark.sh

diff --git a/tutorial/automake.mk b/tutorial/automake.mk
index 046962c000..171da8de66 100644
--- a/tutorial/automake.mk
+++ b/tutorial/automake.mk
@@ -1,6 +1,8 @@
 EXTRA_DIST += \
tutorial/ovs-sandbox \
-   tutorial/ovn-setup.sh
+   tutorial/ovn-setup.sh \
+   tutorial/ovn-lb-benchmark.sh \
+   tutorial/ovn-gen-lb-template-vars.py
 sandbox: all
cd $(srcdir)/tutorial && MAKE=$(MAKE) HAVE_OPENSSL=$(HAVE_OPENSSL) \
./ovs-sandbox -b $(abs_builddir) --ovs-src $(ovs_srcdir) 
--ovs-build $(ovs_builddir) $(SANDBOXFLAGS)
diff --git a/tutorial/ovn-gen-lb-template-vars.py 
b/tutorial/ovn-gen-lb-template-vars.py
new file mode 100755
index 00..fe7e6b93f6
--- /dev/null
+++ b/tutorial/ovn-gen-lb-template-vars.py
@@ -0,0 +1,116 @@
+import getopt
+import os
+import re
+import sys
+import uuid
+
+import ovs.db.idl
+import ovs.db.schema
+import ovs.db.types
+import ovs.ovsuuid
+import ovs.poller
+import ovs.stream
+import ovs.util
+import ovs.vlog
+from ovs.db import data
+from ovs.db import error
+from ovs.db.idl import _row_to_uuid as row_to_uuid
+from ovs.fatal_signal import signal_alarm
+
+vlog = ovs.vlog.Vlog("template-lb-stress")
+vlog.set_levels_from_string("console:info")
+vlog.init(None)
+
+SCHEMA = '../ovn-nb.ovsschema'
+
+
+def add_chassis_template_vars(idl, n, n_vips, n_backends):
+for i in range(1, n + 1):
+print(f'ADDING LBs for node {i}')
+txn = ovs.db.idl.Transaction(idl)
+tv = txn.insert(idl.tables["Chassis_Template_Var"])
+tv.chassis = f'chassis-{i}'
+tv.setkey('variables', 'vip', f'42.42.42.{i}')
+
+for j in range(1, n_vips + 1):
+backends = ''
+for k in range(0, n_backends):
+j1 = j // 250
+j2 = j % 250
+backends = f'42.{k}.{j1}.{j2}:{j},{backends}'
+tv.setkey('variables', f'backends{j}', backends)
+status = txn.commit_block()
+sys.stdout.write(
+f'commit status = 
{ovs.db.idl.Transaction.status_to_string(status)}\n'
+)
+
+
+def run(remote, n, n_vips, n_backends):
+schema_helper = ovs.db.idl.SchemaHelper(SCHEMA)
+schema_helper.register_all()
+idl = ovs.db.idl.Idl(remote, schema_helper, leader_only=False)
+
+seqno = 0
+
+error, stream = ovs.stream.Stream.open_block(
+ovs.stream.Stream.open(remote), 2000
+)
+if error:
+sys.stderr.write(f'failed to connect to \"{remote}\"')
+sys.exit(1)
+
+if not stream:
+sys.stderr.write(f'failed to connect to \"{remote}\"')
+sys.exit(1)
+rpc = ovs.jsonrpc.Connection(stream)
+
+while idl.change_seqno == seqno and not idl.run():
+rpc.run()
+
+poller = ovs.poller.Poller()
+idl.wait(poller)
+rpc.wait(poller)
+poller.block()
+
+add_chassis_template_vars(idl, n, n_vips, n_backends)
+
+
+def main(argv):
+try:
+options, args = getopt.gnu_getopt(
+argv[1:], 'n:v:b:r:', ['vips', 'backends', 'remote']
+)
+except getopt.GetoptError as geo:
+sys.stderr.write(f'{ovs.util.PROGRAM_NAME}: {geo.msg}\n')
+sys.exit(1)
+
+n = None
+vips = None
+backends = None
+remote = None
+for key, value in options:
+if key == '-n':
+n = int(value)
+elif key in ['-v', '--vips']:
+vips = int(value)
+elif key in ['-b', '--backends']:
+backends = int(value)
+elif key in ['-r', '--remote']:
+remote = value
+else:
+sys.stderr.write(f'{ovs.util.PROGRAM_NAME}: unknown input args')
+sys.exit(1)
+
+if not n or not vips or not backends:
+sys.stderr.write(f'{ovs.util.PROGRAM_NAME}: invalid input args')
+sys.exit(1)
+
+run(remote, n, vips, backends)
+
+
+if __name__ == '__main__':
+try:
+main(sys.argv)
+except error.Error as e:
+sys.stderr.write(f'{e}\n')
+sys.exit(1)
diff --git a/tutorial/ovn-lb-benchmark.sh b/tutorial/ovn-lb-benchmark.sh
new file mode 100755
index 00..6e8129ab97
--- /dev/null
+++ b/tutorial/ovn-lb-benchmark.sh
@@ -0,0 +1,110 @@
+#!/bin/bash
+
+nrtr=$1
+nlb=$2
+nbackends=$3

[ovs-dev] [PATCH ovn v2 4/5] lb: Support using templates.

2022-11-04 Thread Dumitru Ceara
Allow the CMS to configure template LBs.  The following configurations are
supported:
- VIPs of the form: ^vip_variable[:^port_variable|:port]
- Backends of the form:
  
^backendip_variable1[:^port_variable1|:port],^backendip_variable2[:^port_variable2|:port]
  OR
  ^backends_variable1,^backends_variable2

The CMS needs to provide a bit more information than with non-template load
balancers and must explicitly specify the address family to be used.

There is currently no support for template load balancers with
options:add_route=true set.  That is because ovn-northd does not
instantiate template variables.  While this is a limitation in a way, its
impact is not huge.  The load balancer 'add_route' option was added as a
way to make the CMS life easier and to avoid having to explicitly add a
route for the VIP.  The CMS can still achieve the same logical topology by
explicitly adding the VIP route.

Template load balancers don't support the "reachable" neighbor-responder
mode.  Instead the CMS can explicitly configure the responder mode to
either "all" or "none".

To properly handle template updates in ovn-controller we also add a
Chassis_Template_Var <- LB reference in ovn-controller.  This way, when
a Chassis_Template_Var changes value all load balancers that refer to
it will also get updated.

Signed-off-by: Dumitru Ceara 
---
V2:
- Fix GCC build due to missing explicit return.
- Fix ls_in_pre_stateful flows due to using wrong lb field.
- Use new lexer_parse_template_string().
- Changed lb_handle_changed_ref() signature to return bool.
- Update documentation with info about responder mode=none, LB template
  supported formats, lb explicit address family requirements.
- Squashed the template LB patches into a single one
- Added more tests.
- Squashed the system tests patch into this one.
---
 controller/lflow.c  |  118 +--
 controller/lflow.h  |7 +
 controller/ovn-controller.c |   67 +-
 lib/lb.c|  457 ++-
 lib/lb.h|   40 +++-
 lib/ovn-util.c  |3 
 northd/northd.c |   89 
 ovn-nb.xml  |   53 +
 tests/ovn-nbctl.at  |   23 +-
 tests/ovn-northd.at |7 +
 tests/ovn.at|  131 
 tests/system-ovn.at |  183 +
 utilities/ovn-nbctl.c   |  122 ++-
 13 files changed, 1071 insertions(+), 229 deletions(-)

diff --git a/controller/lflow.c b/controller/lflow.c
index fc4371d0df..7f880bd62b 100644
--- a/controller/lflow.c
+++ b/controller/lflow.c
@@ -97,6 +97,15 @@ consider_logical_flow(const struct sbrec_logical_flow *lflow,
   struct lflow_ctx_in *l_ctx_in,
   struct lflow_ctx_out *l_ctx_out);
 
+static void
+consider_lb_hairpin_flows(struct objdep_mgr *mgr,
+  const struct sbrec_load_balancer *sbrec_lb,
+  const struct hmap *local_datapaths,
+  const struct smap *template_vars,
+  bool use_ct_mark,
+  struct ovn_desired_flow_table *flow_table,
+  struct simap *ids);
+
 static void add_port_sec_flows(const struct shash *binding_lports,
const struct sbrec_chassis *,
struct ovn_desired_flow_table *);
@@ -223,7 +232,7 @@ lflow_handle_changed_flows(struct lflow_ctx_in *l_ctx_in,
 UUIDSET_INITIALIZER(_remove_nodes);
 SBREC_LOGICAL_FLOW_TABLE_FOR_EACH_TRACKED (lflow,
l_ctx_in->logical_flow_table) {
-if (uuidset_find(l_ctx_out->lflows_processed, >header_.uuid)) {
+if (uuidset_find(l_ctx_out->objs_processed, >header_.uuid)) {
 VLOG_DBG("lflow "UUID_FMT"has been processed, skip.",
  UUID_ARGS(>header_.uuid));
 continue;
@@ -253,14 +262,14 @@ lflow_handle_changed_flows(struct lflow_ctx_in *l_ctx_in,
  UUID_ARGS(>header_.uuid));
 
 /* For the extra lflows that need to be reprocessed because of the
- * flood remove, remove it from lflows_processed. */
+ * flood remove, remove it from objs_processed. */
 struct uuidset_node *unode =
-uuidset_find(l_ctx_out->lflows_processed,
+uuidset_find(l_ctx_out->objs_processed,
  >header_.uuid);
 if (unode) {
 VLOG_DBG("lflow "UUID_FMT"has been processed, now reprocess.",
  UUID_ARGS(>header_.uuid));
-uuidset_delete(l_ctx_out->lflows_processed, unode);
+uuidset_delete(l_ctx_out->objs_processed, unode);
 }
 
 consider_logical_flow(lflow, false, l_ctx_in, l_ctx_out);
@@ -677,7 +686,7 @@ lflow_handle_addr_set_update(const char *as_name,
 struct 

[ovs-dev] [PATCH ovn v2 3/5] controller: Add support for templated actions and matches.

2022-11-04 Thread Dumitru Ceara
Expand SB.Template_Var records in two stages:
1. first expand them to local values in match/action strings
2. then reparse the expanded strings

For the case when a lflow references a Chassis_Template_Var
also track references (similar to the ones maintained for
multicast groups, address sets, port_groups, port bindings).

Signed-off-by: Dumitru Ceara 
---
V2:
- Fix GCC build due to missing newline.
- Handle SB table rename Template_Var -> Chassis_Template_Var.
- Address Han's comments:
  - Add new function to parse lflow actions.
  - Move xstrdup inside lexer_parse_template_string() and execute it only
if needed.
  - Match template vars only by chassis name.
  - Change local_templates table to a plain smap.
  - Some indentation updates.
  - Use NULL template variable change handlers for chassis/Open_vSwitch
changes.
  - Added more tests.
- Fix tracking of template references in lflow actions.
---
 controller/lflow.c  |  136 ++--
 controller/lflow.h  |1 
 controller/ofctrl.c |   11 +-
 controller/ofctrl.h |3 
 controller/ovn-controller.c |  289 +++
 include/ovn/expr.h  |4 -
 include/ovn/lex.h   |   15 ++
 lib/actions.c   |9 +
 lib/expr.c  |   18 ++-
 lib/lex.c   |   57 
 lib/objdep.c|1 
 lib/objdep.h|1 
 tests/ovn-controller.at |   50 +++
 tests/ovn.at|   80 
 tests/test-ovn.c|   17 ++-
 utilities/ovn-trace.c   |   33 -
 16 files changed, 658 insertions(+), 67 deletions(-)

diff --git a/controller/lflow.c b/controller/lflow.c
index d4434bdee8..fc4371d0df 100644
--- a/controller/lflow.c
+++ b/controller/lflow.c
@@ -81,6 +81,8 @@ convert_match_to_expr(const struct sbrec_logical_flow *,
   const struct local_datapath *ldp,
   struct expr **prereqs, const struct shash *addr_sets,
   const struct shash *port_groups,
+  const struct smap *template_vars,
+  struct sset *template_vars_ref,
   struct objdep_mgr *, bool *pg_addr_set_ref);
 static void
 add_matches_to_flow_table(const struct sbrec_logical_flow *,
@@ -297,6 +299,43 @@ as_info_from_expr_const(const char *as_name, const union 
expr_constant *c,
 return true;
 }
 
+static bool
+lflow_parse_actions(const struct sbrec_logical_flow *lflow,
+const struct lflow_ctx_in *l_ctx_in,
+struct sset *template_vars_ref,
+struct ofpbuf *ovnacts_out,
+struct expr **prereqs_out)
+{
+bool ingress = !strcmp(lflow->pipeline, "ingress");
+struct ovnact_parse_params pp = {
+.symtab = ,
+.dhcp_opts = l_ctx_in->dhcp_opts,
+.dhcpv6_opts = l_ctx_in->dhcpv6_opts,
+.nd_ra_opts = l_ctx_in->nd_ra_opts,
+.controller_event_opts = l_ctx_in->controller_event_opts,
+
+.pipeline = ingress ? OVNACT_P_INGRESS : OVNACT_P_EGRESS,
+.n_tables = LOG_PIPELINE_LEN,
+.cur_ltable = lflow->table_id,
+};
+
+char *actions_expanded_s = NULL;
+const char *actions_s =
+lexer_parse_template_string(lflow->actions, l_ctx_in->template_vars,
+template_vars_ref, _expanded_s);
+char *error = ovnacts_parse_string(actions_s, ,
+   ovnacts_out, prereqs_out);
+free(actions_expanded_s);
+if (error) {
+static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 1);
+VLOG_WARN_RL(, "error parsing actions \"%s\": %s",
+ lflow->actions, error);
+free(error);
+return false;
+}
+return true;
+}
+
 /* Parses the lflow regarding the changed address set 'as_name', and generates
  * ovs flows for the newly added addresses in 'as_diff_added' only. It is
  * similar to consider_logical_flow__, with the below differences:
@@ -347,27 +386,14 @@ consider_lflow_for_added_as_ips__(
 
 uint64_t ovnacts_stub[1024 / 8];
 struct ofpbuf ovnacts = OFPBUF_STUB_INITIALIZER(ovnacts_stub);
-struct ovnact_parse_params pp = {
-.symtab = ,
-.dhcp_opts = l_ctx_in->dhcp_opts,
-.dhcpv6_opts = l_ctx_in->dhcpv6_opts,
-.nd_ra_opts = l_ctx_in->nd_ra_opts,
-.controller_event_opts = l_ctx_in->controller_event_opts,
-.pipeline = ingress ? OVNACT_P_INGRESS : OVNACT_P_EGRESS,
-.n_tables = LOG_PIPELINE_LEN,
-.cur_ltable = lflow->table_id,
-};
+struct sset template_vars_ref = SSET_INITIALIZER(_vars_ref);
 struct expr *prereqs = NULL;
-char *error;
 
-error = ovnacts_parse_string(lflow->actions, , , );
-if (error) {
-static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 1);
-VLOG_WARN_RL(, "error parsing actions \"%s\": %s",
-

[ovs-dev] [PATCH ovn v2 2/5] Add NB and SB Template_Var tables.

2022-11-04 Thread Dumitru Ceara
Propagate the contents of the NB table to the Southbound.

Signed-off-by: Dumitru Ceara 
---
Note:
- ovn-trace doesn't support template variables (yet).

V2:
- Fixed TEMPLATE_VAR_TABLE_INITIALIZER definition so that GCC doesn't
  complain anymore.
- Addressed Han's comments:
  - Rename tables to Chassis_Template_Var.
  - Fix man page.
  - Simplify function prototypes.
- Changed schema as suggested by Ilya.
---
 northd/automake.mk   |4 ++
 northd/en-northd.c   |4 ++
 northd/inc-proc-northd.c |8 -
 northd/northd.c  |   41 +
 northd/northd.h  |4 ++
 northd/template-var.c|   74 ++
 northd/template-var.h|   58 
 ovn-nb.ovsschema |   17 +--
 ovn-nb.xml   |   29 ++
 ovn-sb.ovsschema |   12 ++-
 ovn-sb.xml   |   15 +
 tests/ovn-northd.at  |   35 ++
 utilities/ovn-nbctl.c|3 ++
 utilities/ovn-sbctl.c|3 ++
 14 files changed, 299 insertions(+), 8 deletions(-)
 create mode 100644 northd/template-var.c
 create mode 100644 northd/template-var.h

diff --git a/northd/automake.mk b/northd/automake.mk
index 81582867dc..31134bc329 100644
--- a/northd/automake.mk
+++ b/northd/automake.mk
@@ -13,7 +13,9 @@ northd_ovn_northd_SOURCES = \
northd/inc-proc-northd.c \
northd/inc-proc-northd.h \
northd/ipam.c \
-   northd/ipam.h
+   northd/ipam.h \
+   northd/template-var.c \
+   northd/template-var.h
 northd_ovn_northd_LDADD = \
lib/libovn.la \
$(OVSDB_LIBDIR)/libovsdb.la \
diff --git a/northd/en-northd.c b/northd/en-northd.c
index 7fe83db642..030ee25d8f 100644
--- a/northd/en-northd.c
+++ b/northd/en-northd.c
@@ -80,6 +80,8 @@ void en_northd_run(struct engine_node *node, void *data)
 EN_OVSDB_GET(engine_get_input("NB_acl", node));
 input_data.nbrec_static_mac_binding_table =
 EN_OVSDB_GET(engine_get_input("NB_static_mac_binding", node));
+input_data.nbrec_chassis_template_var_table =
+EN_OVSDB_GET(engine_get_input("NB_chassis_template_var", node));
 
 input_data.sbrec_sb_global_table =
 EN_OVSDB_GET(engine_get_input("SB_sb_global", node));
@@ -113,6 +115,8 @@ void en_northd_run(struct engine_node *node, void *data)
 EN_OVSDB_GET(engine_get_input("SB_chassis_private", node));
 input_data.sbrec_static_mac_binding_table =
 EN_OVSDB_GET(engine_get_input("SB_static_mac_binding", node));
+input_data.sbrec_chassis_template_var_table =
+EN_OVSDB_GET(engine_get_input("SB_chassis_template_var", node));
 
 northd_run(_data, data,
eng_ctx->ovnnb_idl_txn,
diff --git a/northd/inc-proc-northd.c b/northd/inc-proc-northd.c
index 54e0ad3b05..da791f035d 100644
--- a/northd/inc-proc-northd.c
+++ b/northd/inc-proc-northd.c
@@ -64,7 +64,8 @@ VLOG_DEFINE_THIS_MODULE(inc_proc_northd);
 NB_NODE(ha_chassis_group, "ha_chassis_group") \
 NB_NODE(ha_chassis, "ha_chassis") \
 NB_NODE(bfd, "bfd") \
-NB_NODE(static_mac_binding, "static_mac_binding")
+NB_NODE(static_mac_binding, "static_mac_binding") \
+NB_NODE(chassis_template_var, "chassis_template_var")
 
 enum nb_engine_node {
 #define NB_NODE(NAME, NAME_STR) NB_##NAME,
@@ -114,7 +115,8 @@ VLOG_DEFINE_THIS_MODULE(inc_proc_northd);
 SB_NODE(load_balancer, "load_balancer") \
 SB_NODE(bfd, "bfd") \
 SB_NODE(fdb, "fdb") \
-SB_NODE(static_mac_binding, "static_mac_binding")
+SB_NODE(static_mac_binding, "static_mac_binding") \
+SB_NODE(chassis_template_var, "chassis_template_var")
 
 enum sb_engine_node {
 #define SB_NODE(NAME, NAME_STR) SB_##NAME,
@@ -186,6 +188,7 @@ void inc_proc_northd_init(struct ovsdb_idl_loop *nb,
 engine_add_input(_northd, _nb_ha_chassis_group, NULL);
 engine_add_input(_northd, _nb_ha_chassis, NULL);
 engine_add_input(_northd, _nb_static_mac_binding, NULL);
+engine_add_input(_northd, _nb_chassis_template_var, NULL);
 
 engine_add_input(_northd, _sb_sb_global, NULL);
 engine_add_input(_northd, _sb_chassis, NULL);
@@ -215,6 +218,7 @@ void inc_proc_northd_init(struct ovsdb_idl_loop *nb,
 engine_add_input(_northd, _sb_load_balancer, NULL);
 engine_add_input(_northd, _sb_fdb, NULL);
 engine_add_input(_northd, _sb_static_mac_binding, NULL);
+engine_add_input(_northd, _sb_chassis_template_var, NULL);
 engine_add_input(_mac_binding_aging, _nb_nb_global, NULL);
 engine_add_input(_mac_binding_aging, _sb_mac_binding, NULL);
 engine_add_input(_mac_binding_aging, _northd, NULL);
diff --git a/northd/northd.c b/northd/northd.c
index b7388afc58..170b4f95c8 100644
--- a/northd/northd.c
+++ b/northd/northd.c
@@ -51,6 +51,7 @@
 #include "lib/stopwatch-names.h"
 #include "stream.h"
 #include "timeval.h"
+#include "template-var.h"
 #include "util.h"
 #include "uuid.h"
 #include "ovs-thread.h"
@@ 

[ovs-dev] [PATCH ovn v2 1/5] lflow: Factor out the lflow reference handling code into a new module.

2022-11-04 Thread Dumitru Ceara
This makes it easier to have an overview of what the code does and at the
same time it allows multiple users to define and manage
"resource <-> object" dependencies.

Acked-by: Han Zhou 
Signed-off-by: Dumitru Ceara 
---
V2:
- Addressed Mark's comments:
  - Fixed typos in comments in objdep.h.
  - Made objdep_change_handler return bool (handled successfully or not).
  - Reverted some unrelated style changes.
- Fixed cast style.
- Addressed Han's comments:
  - Removed superfluous 'type' argument name in prototypes.
- Added Han's ack.
---
 controller/lflow.c  |  330 ++-
 controller/lflow.h  |   67 +
 controller/ovn-controller.c |   55 +--
 lib/automake.mk |2 
 lib/objdep.c|  260 ++
 lib/objdep.h|  122 
 6 files changed, 508 insertions(+), 328 deletions(-)
 create mode 100644 lib/objdep.c
 create mode 100644 lib/objdep.h

diff --git a/controller/lflow.c b/controller/lflow.c
index cc0f31db06..d4434bdee8 100644
--- a/controller/lflow.c
+++ b/controller/lflow.c
@@ -61,7 +61,7 @@ struct lookup_port_aux {
 struct ovsdb_idl_index *sbrec_port_binding_by_name;
 const struct sbrec_datapath_binding *dp;
 const struct sbrec_logical_flow *lflow;
-struct lflow_resource_ref *lfrr;
+struct objdep_mgr *deps_mgr;
 const struct hmap *chassis_tunnels;
 };
 
@@ -72,8 +72,8 @@ struct condition_aux {
 const struct sset *active_tunnels;
 const struct sbrec_logical_flow *lflow;
 /* Resource reference to store the port name referenced
- * in is_chassis_resident() to the logical flow. */
-struct lflow_resource_ref *lfrr;
+ * in is_chassis_resident() to the object (logical flow). */
+struct objdep_mgr *deps_mgr;
 };
 
 static struct expr *
@@ -81,7 +81,7 @@ convert_match_to_expr(const struct sbrec_logical_flow *,
   const struct local_datapath *ldp,
   struct expr **prereqs, const struct shash *addr_sets,
   const struct shash *port_groups,
-  struct lflow_resource_ref *, bool *pg_addr_set_ref);
+  struct objdep_mgr *, bool *pg_addr_set_ref);
 static void
 add_matches_to_flow_table(const struct sbrec_logical_flow *,
   const struct local_datapath *,
@@ -94,17 +94,6 @@ consider_logical_flow(const struct sbrec_logical_flow *lflow,
   bool is_recompute,
   struct lflow_ctx_in *l_ctx_in,
   struct lflow_ctx_out *l_ctx_out);
-static void lflow_resource_add(struct lflow_resource_ref *, enum ref_type,
-   const char *ref_name, const struct uuid *,
-   size_t ref_count);
-static struct ref_lflow_node *ref_lflow_lookup(struct hmap *ref_lflow_table,
-   enum ref_type,
-   const char *ref_name);
-static struct lflow_ref_node *lflow_ref_lookup(struct hmap *lflow_ref_table,
-   const struct uuid *lflow_uuid);
-static void ref_lflow_node_destroy(struct ref_lflow_node *);
-static void lflow_resource_destroy_lflow(struct lflow_resource_ref *,
- const struct uuid *lflow_uuid);
 
 static void add_port_sec_flows(const struct shash *binding_lports,
const struct sbrec_chassis *,
@@ -125,8 +114,8 @@ lookup_port_cb(const void *aux_, const char *port_name, 
unsigned int *portp)
 /* Store the name that used to lookup the lport to lflow reference, so that
  * in the future when the lport's port binding changes, the logical flow
  * that references this lport can be reprocessed. */
-lflow_resource_add(aux->lfrr, REF_TYPE_PORTBINDING, port_name,
-   >lflow->header_.uuid, 0);
+objdep_mgr_add(aux->deps_mgr, OBJDEP_TYPE_PORTBINDING, port_name,
+   >lflow->header_.uuid);
 
 const struct sbrec_port_binding *pb
 = lport_lookup_by_name(aux->sbrec_port_binding_by_name, port_name);
@@ -141,8 +130,8 @@ lookup_port_cb(const void *aux_, const char *port_name, 
unsigned int *portp)
  * this multicast group can be reprocessed. */
 struct ds mg_key = DS_EMPTY_INITIALIZER;
 get_mc_group_key(port_name, aux->dp->tunnel_key, _key);
-lflow_resource_add(aux->lfrr, REF_TYPE_MC_GROUP, ds_cstr(_key),
-   >lflow->header_.uuid, 0);
+objdep_mgr_add(aux->deps_mgr, OBJDEP_TYPE_MC_GROUP, ds_cstr(_key),
+   >lflow->header_.uuid);
 ds_destroy(_key);
 
 const struct sbrec_multicast_group *mg = mcgroup_lookup_by_dp_name(
@@ -180,11 +169,11 @@ is_chassis_resident_cb(const void *c_aux_, const char 
*port_name)
 {
 const struct condition_aux *c_aux = c_aux_;
 
-/* Store the port name that used to lookup the lport 

[ovs-dev] [PATCH v2 ovn 0/5] Add OVN component templates.

2022-11-04 Thread Dumitru Ceara
Sometimes network components are compute node-specific.  Sometimes such
components are replicated, almost identically, for multiple nodes
in the cluster.

One such example is the case of Kubernetes NodePort services which
translate (in the ovn-kubernetes case) to Load_Balancer
objects being applied to each and every node's logical gateway router.
These load balancers are almost identical, the main difference being
the fact that they use different VIPs (the node's IP).

With the current OVN load balancer design, this becomes a problem at
scale because the number of load balancers that must be configured is
N x M (N nodes times M services).

This series proposes a new concept in OVN: virtual network component
templates.  The goal of the templates is to help reduce resource
consumption in the OVN central components in specific cases like the one
described above.

To achieve that, the CMS will instead configure a "templated" load
balancer for every service and apply that single template record to
the cluster-wide load balancer group.  This template is then
instantiated differently on different compute nodes.  This translation
is controlled through per-chassis "template variables" configured by
the CMS in the new NB.Template_Var table.

Patch 5/5 introduces a synthetic benchmark simulating what an OpenShift
router (using NodePort services) scale test would do.  The benchmark
allows us to compare the "standard" (no-template) configuration
against the configuration that uses component templates:

+--+---+---++--+-+---+---+
|  |   |   | NB | SB   |
 |  northd   |ovn-controller |
+--+---+---++--+-+---+---+
| Template | Nodes | NodePort Services | Size(MB) | RSS(MB) | Size(MB) | 
RSS(MB) | loop(sec) | RSS (MB) | recompute(sec) |
|  |   |(5 backends)   |  | |  |
 |   |  ||
+--+---+---+--+-+--+-+---+--++
| No   |  60   |   1000| 25   |116  |   118|   589  
 |2.70   |463   |  0.20  |
| Yes  |  60   |   1000|  6   | 25  | 8|46  
 |0.07   | 44   |  0.20  |
+--+---+---+--+-+--+-+---+--++
| No   | 120   |   2000| 67   |865  |   471|  9000  
 |   15.60   |   1016   |  0.40  |
| Yes  | 120   |   2000| 23   | 96  |28|   225  
 |0.22   | 83   |  0.40  |
+--+---+---+--+-+--+-+---+--++
| Yes  | 120   |   1   |118   |440  |   136|   668  
 |0.72   |311   |  1.77  |
+--+---+---+--+-+--+-+---+--+-
| Yes  | 250   |   1   |244   |870  |   263|  1502  
 |1.26   |318   |  1.87  |
+-

For the N=120 S=2000 case:
- NB size reduced by ~65%
- NB RSS  reduced by ~90%
- SB size reduced by ~95%
- SB RSS  reduced by ~98%
- ovn-northd loop time reduced by ~98%
- ovn-controller RSS reduced by ~92%

Dumitru Ceara (5):
  lflow: Factor out the lflow reference handling code into a new module.
  Add NB and SB Template_Var tables.
  controller: Add support for templated actions and matches.
  lb: Support using templates.
  tutorial: Add scripts to simulate node-port ovn-k8s services.


 controller/lflow.c   | 244 ++
 controller/lflow.h   |   8 +-
 controller/ofctrl.c  |  11 +-
 controller/ofctrl.h  |   3 +-
 controller/ovn-controller.c  | 350 +++-
 include/ovn/expr.h   |   4 +-
 include/ovn/lex.h|  15 +-
 lib/actions.c|   9 +-
 lib/expr.c   |  18 +-
 lib/lb.c | 457 +++
 lib/lb.h |  40 ++-
 lib/lex.c|  57 
 lib/objdep.c |   1 +
 lib/objdep.h |   1 +
 lib/ovn-util.c   |   3 -
 northd/automake.mk   |   4 +-
 northd/en-northd.c   |   4 +
 northd/inc-proc-northd.c |   8 +-
 northd/northd.c  | 130 +---
 northd/northd.h  

Re: [ovs-dev] [PATCH ovn 0/5] Add OVN component templates.

2022-11-04 Thread Dumitru Ceara
On 11/4/22 23:06, Dumitru Ceara wrote:
> Sometimes network components are compute node-specific.  Sometimes such
> components are replicated, almost identically, for multiple nodes
> in the cluster.
> 
> One such example is the case of Kubernetes NodePort services which
> translate (in the ovn-kubernetes case) to Load_Balancer
> objects being applied to each and every node's logical gateway router.
> These load balancers are almost identical, the main difference being
> the fact that they use different VIPs (the node's IP).
> 
> With the current OVN load balancer design, this becomes a problem at
> scale because the number of load balancers that must be configured is
> N x M (N nodes times M services).
> 
> This series proposes a new concept in OVN: virtual network component
> templates.  The goal of the templates is to help reduce resource
> consumption in the OVN central components in specific cases like the one
> described above.
> 
> To achieve that, the CMS will instead configure a "templated" load
> balancer for every service and apply that single template record to
> the cluster-wide load balancer group.  This template is then
> instantiated differently on different compute nodes.  This translation
> is controlled through per-chassis "template variables" configured by
> the CMS in the new NB.Template_Var table.
> 
> Patch 5/5 introduces a synthetic benchmark simulating what an OpenShift
> router (using NodePort services) scale test would do.  The benchmark
> allows us to compare the "standard" (no-template) configuration
> against the configuration that uses component templates:
> 
> +--+---+---++--+-+---+---+
> |  |   |   | NB | SB   |  
>|  northd   |ovn-controller |
> +--+---+---++--+-+---+---+
> | Template | Nodes | NodePort Services | Size(MB) | RSS(MB) | Size(MB) | 
> RSS(MB) | loop(sec) | RSS (MB) | recompute(sec) |
> |  |   |(5 backends)   |  | |  |  
>|   |  ||
> +--+---+---+--+-+--+-+---+--++
> | No   |  60   |   1000| 25   |116  |   118|   
> 589   |2.70   |463   |  0.20  |
> | Yes  |  60   |   1000|  6   | 25  | 8|
> 46   |0.07   | 44   |  0.20  |
> +--+---+---+--+-+--+-+---+--++
> | No   | 120   |   2000| 67   |865  |   471|  
> 9000   |   15.60   |   1016   |  0.40  |
> | Yes  | 120   |   2000| 23   | 96  |28|   
> 225   |0.22   | 83   |  0.40  |
> +--+---+---+--+-+--+-+---+--++
> | Yes  | 120   |   1   |118   |440  |   136|   
> 668   |0.72   |311   |  1.77  |
> +--+---+---+--+-+--+-+---+--+-
> | Yes  | 250   |   1   |244   |870  |   263|  
> 1502   |1.26   |318   |  1.87  |
> +-
> 
> For the N=120 S=2000 case:
> - NB size reduced by ~65%
> - NB RSS  reduced by ~90%
> - SB size reduced by ~95%
> - SB RSS  reduced by ~98%
> - ovn-northd loop time reduced by ~98%
> - ovn-controller RSS reduced by ~92%
> 
> Dumitru Ceara (5):
>   lflow: Factor out the lflow reference handling code into a new module.
>   Add NB and SB Template_Var tables.
>   controller: Add support for templated actions and matches.
>   lb: Support using templates.
>   tutorial: Add scripts to simulate node-port ovn-k8s services.
> 

Oops, sorry about the noise, this should've been a v2.  I'll repost.

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH ovn 4/5] lb: Support using templates.

2022-11-04 Thread Dumitru Ceara
Allow the CMS to configure template LBs.  The following configurations are
supported:
- VIPs of the form: ^vip_variable[:^port_variable|:port]
- Backends of the form:
  
^backendip_variable1[:^port_variable1|:port],^backendip_variable2[:^port_variable2|:port]
  OR
  ^backends_variable1,^backends_variable2

The CMS needs to provide a bit more information than with non-template load
balancers and must explicitly specify the address family to be used.

There is currently no support for template load balancers with
options:add_route=true set.  That is because ovn-northd does not
instantiate template variables.  While this is a limitation in a way, its
impact is not huge.  The load balancer 'add_route' option was added as a
way to make the CMS life easier and to avoid having to explicitly add a
route for the VIP.  The CMS can still achieve the same logical topology by
explicitly adding the VIP route.

Template load balancers don't support the "reachable" neighbor-responder
mode.  Instead the CMS can explicitly configure the responder mode to
either "all" or "none".

To properly handle template updates in ovn-controller we also add a
Chassis_Template_Var <- LB reference in ovn-controller.  This way, when
a Chassis_Template_Var changes value all load balancers that refer to
it will also get updated.

Signed-off-by: Dumitru Ceara 
---
V2:
- Fix GCC build due to missing explicit return.
- Fix ls_in_pre_stateful flows due to using wrong lb field.
- Use new lexer_parse_template_string().
- Changed lb_handle_changed_ref() signature to return bool.
- Update documentation with info about responder mode=none, LB template
  supported formats, lb explicit address family requirements.
- Squashed the template LB patches into a single one
- Added more tests.
- Squashed the system tests patch into this one.
---
 controller/lflow.c  |  118 +--
 controller/lflow.h  |7 +
 controller/ovn-controller.c |   67 +-
 lib/lb.c|  457 ++-
 lib/lb.h|   40 +++-
 lib/ovn-util.c  |3 
 northd/northd.c |   89 
 ovn-nb.xml  |   53 +
 tests/ovn-nbctl.at  |   23 +-
 tests/ovn-northd.at |7 +
 tests/ovn.at|  131 
 tests/system-ovn.at |  183 +
 utilities/ovn-nbctl.c   |  122 ++-
 13 files changed, 1071 insertions(+), 229 deletions(-)

diff --git a/controller/lflow.c b/controller/lflow.c
index fc4371d0df..7f880bd62b 100644
--- a/controller/lflow.c
+++ b/controller/lflow.c
@@ -97,6 +97,15 @@ consider_logical_flow(const struct sbrec_logical_flow *lflow,
   struct lflow_ctx_in *l_ctx_in,
   struct lflow_ctx_out *l_ctx_out);
 
+static void
+consider_lb_hairpin_flows(struct objdep_mgr *mgr,
+  const struct sbrec_load_balancer *sbrec_lb,
+  const struct hmap *local_datapaths,
+  const struct smap *template_vars,
+  bool use_ct_mark,
+  struct ovn_desired_flow_table *flow_table,
+  struct simap *ids);
+
 static void add_port_sec_flows(const struct shash *binding_lports,
const struct sbrec_chassis *,
struct ovn_desired_flow_table *);
@@ -223,7 +232,7 @@ lflow_handle_changed_flows(struct lflow_ctx_in *l_ctx_in,
 UUIDSET_INITIALIZER(_remove_nodes);
 SBREC_LOGICAL_FLOW_TABLE_FOR_EACH_TRACKED (lflow,
l_ctx_in->logical_flow_table) {
-if (uuidset_find(l_ctx_out->lflows_processed, >header_.uuid)) {
+if (uuidset_find(l_ctx_out->objs_processed, >header_.uuid)) {
 VLOG_DBG("lflow "UUID_FMT"has been processed, skip.",
  UUID_ARGS(>header_.uuid));
 continue;
@@ -253,14 +262,14 @@ lflow_handle_changed_flows(struct lflow_ctx_in *l_ctx_in,
  UUID_ARGS(>header_.uuid));
 
 /* For the extra lflows that need to be reprocessed because of the
- * flood remove, remove it from lflows_processed. */
+ * flood remove, remove it from objs_processed. */
 struct uuidset_node *unode =
-uuidset_find(l_ctx_out->lflows_processed,
+uuidset_find(l_ctx_out->objs_processed,
  >header_.uuid);
 if (unode) {
 VLOG_DBG("lflow "UUID_FMT"has been processed, now reprocess.",
  UUID_ARGS(>header_.uuid));
-uuidset_delete(l_ctx_out->lflows_processed, unode);
+uuidset_delete(l_ctx_out->objs_processed, unode);
 }
 
 consider_logical_flow(lflow, false, l_ctx_in, l_ctx_out);
@@ -677,7 +686,7 @@ lflow_handle_addr_set_update(const char *as_name,
 struct 

[ovs-dev] [PATCH ovn 5/5] tutorial: Add scripts to simulate node-port ovn-k8s services.

2022-11-04 Thread Dumitru Ceara
In a sandbox run:

$ ./ovn-lb-benchmark.sh

to simulate an ovn-k8s-like topology with N nodes, VIPS NodePort services
applied to all nodes.  Each service has BACKENDS backends.

If USE_TEMPLATES is "yes" then the configuration will be optimized to use
Chassis_Template_Vars.  Otherwise it will create N LBs per service, one
for every node.

Signed-off-by: Dumitru Ceara 
---
 tutorial/automake.mk |4 +
 tutorial/ovn-gen-lb-template-vars.py |  116 ++
 tutorial/ovn-lb-benchmark.sh |  110 
 3 files changed, 229 insertions(+), 1 deletion(-)
 create mode 100755 tutorial/ovn-gen-lb-template-vars.py
 create mode 100755 tutorial/ovn-lb-benchmark.sh

diff --git a/tutorial/automake.mk b/tutorial/automake.mk
index 046962c000..171da8de66 100644
--- a/tutorial/automake.mk
+++ b/tutorial/automake.mk
@@ -1,6 +1,8 @@
 EXTRA_DIST += \
tutorial/ovs-sandbox \
-   tutorial/ovn-setup.sh
+   tutorial/ovn-setup.sh \
+   tutorial/ovn-lb-benchmark.sh \
+   tutorial/ovn-gen-lb-template-vars.py
 sandbox: all
cd $(srcdir)/tutorial && MAKE=$(MAKE) HAVE_OPENSSL=$(HAVE_OPENSSL) \
./ovs-sandbox -b $(abs_builddir) --ovs-src $(ovs_srcdir) 
--ovs-build $(ovs_builddir) $(SANDBOXFLAGS)
diff --git a/tutorial/ovn-gen-lb-template-vars.py 
b/tutorial/ovn-gen-lb-template-vars.py
new file mode 100755
index 00..fe7e6b93f6
--- /dev/null
+++ b/tutorial/ovn-gen-lb-template-vars.py
@@ -0,0 +1,116 @@
+import getopt
+import os
+import re
+import sys
+import uuid
+
+import ovs.db.idl
+import ovs.db.schema
+import ovs.db.types
+import ovs.ovsuuid
+import ovs.poller
+import ovs.stream
+import ovs.util
+import ovs.vlog
+from ovs.db import data
+from ovs.db import error
+from ovs.db.idl import _row_to_uuid as row_to_uuid
+from ovs.fatal_signal import signal_alarm
+
+vlog = ovs.vlog.Vlog("template-lb-stress")
+vlog.set_levels_from_string("console:info")
+vlog.init(None)
+
+SCHEMA = '../ovn-nb.ovsschema'
+
+
+def add_chassis_template_vars(idl, n, n_vips, n_backends):
+for i in range(1, n + 1):
+print(f'ADDING LBs for node {i}')
+txn = ovs.db.idl.Transaction(idl)
+tv = txn.insert(idl.tables["Chassis_Template_Var"])
+tv.chassis = f'chassis-{i}'
+tv.setkey('variables', 'vip', f'42.42.42.{i}')
+
+for j in range(1, n_vips + 1):
+backends = ''
+for k in range(0, n_backends):
+j1 = j // 250
+j2 = j % 250
+backends = f'42.{k}.{j1}.{j2}:{j},{backends}'
+tv.setkey('variables', f'backends{j}', backends)
+status = txn.commit_block()
+sys.stdout.write(
+f'commit status = 
{ovs.db.idl.Transaction.status_to_string(status)}\n'
+)
+
+
+def run(remote, n, n_vips, n_backends):
+schema_helper = ovs.db.idl.SchemaHelper(SCHEMA)
+schema_helper.register_all()
+idl = ovs.db.idl.Idl(remote, schema_helper, leader_only=False)
+
+seqno = 0
+
+error, stream = ovs.stream.Stream.open_block(
+ovs.stream.Stream.open(remote), 2000
+)
+if error:
+sys.stderr.write(f'failed to connect to \"{remote}\"')
+sys.exit(1)
+
+if not stream:
+sys.stderr.write(f'failed to connect to \"{remote}\"')
+sys.exit(1)
+rpc = ovs.jsonrpc.Connection(stream)
+
+while idl.change_seqno == seqno and not idl.run():
+rpc.run()
+
+poller = ovs.poller.Poller()
+idl.wait(poller)
+rpc.wait(poller)
+poller.block()
+
+add_chassis_template_vars(idl, n, n_vips, n_backends)
+
+
+def main(argv):
+try:
+options, args = getopt.gnu_getopt(
+argv[1:], 'n:v:b:r:', ['vips', 'backends', 'remote']
+)
+except getopt.GetoptError as geo:
+sys.stderr.write(f'{ovs.util.PROGRAM_NAME}: {geo.msg}\n')
+sys.exit(1)
+
+n = None
+vips = None
+backends = None
+remote = None
+for key, value in options:
+if key == '-n':
+n = int(value)
+elif key in ['-v', '--vips']:
+vips = int(value)
+elif key in ['-b', '--backends']:
+backends = int(value)
+elif key in ['-r', '--remote']:
+remote = value
+else:
+sys.stderr.write(f'{ovs.util.PROGRAM_NAME}: unknown input args')
+sys.exit(1)
+
+if not n or not vips or not backends:
+sys.stderr.write(f'{ovs.util.PROGRAM_NAME}: invalid input args')
+sys.exit(1)
+
+run(remote, n, vips, backends)
+
+
+if __name__ == '__main__':
+try:
+main(sys.argv)
+except error.Error as e:
+sys.stderr.write(f'{e}\n')
+sys.exit(1)
diff --git a/tutorial/ovn-lb-benchmark.sh b/tutorial/ovn-lb-benchmark.sh
new file mode 100755
index 00..6e8129ab97
--- /dev/null
+++ b/tutorial/ovn-lb-benchmark.sh
@@ -0,0 +1,110 @@
+#!/bin/bash
+
+nrtr=$1
+nlb=$2
+nbackends=$3

[ovs-dev] [PATCH ovn 3/5] controller: Add support for templated actions and matches.

2022-11-04 Thread Dumitru Ceara
Expand SB.Template_Var records in two stages:
1. first expand them to local values in match/action strings
2. then reparse the expanded strings

For the case when a lflow references a Chassis_Template_Var
also track references (similar to the ones maintained for
multicast groups, address sets, port_groups, port bindings).

Signed-off-by: Dumitru Ceara 
---
V2:
- Fix GCC build due to missing newline.
- Handle SB table rename Template_Var -> Chassis_Template_Var.
- Address Han's comments:
  - Add new function to parse lflow actions.
  - Move xstrdup inside lexer_parse_template_string() and execute it only
if needed.
  - Match template vars only by chassis name.
  - Change local_templates table to a plain smap.
  - Some indentation updates.
  - Use NULL template variable change handlers for chassis/Open_vSwitch
changes.
  - Added more tests.
- Fix tracking of template references in lflow actions.
---
 controller/lflow.c  |  136 ++--
 controller/lflow.h  |1 
 controller/ofctrl.c |   11 +-
 controller/ofctrl.h |3 
 controller/ovn-controller.c |  289 +++
 include/ovn/expr.h  |4 -
 include/ovn/lex.h   |   15 ++
 lib/actions.c   |9 +
 lib/expr.c  |   18 ++-
 lib/lex.c   |   57 
 lib/objdep.c|1 
 lib/objdep.h|1 
 tests/ovn-controller.at |   50 +++
 tests/ovn.at|   80 
 tests/test-ovn.c|   17 ++-
 utilities/ovn-trace.c   |   33 -
 16 files changed, 658 insertions(+), 67 deletions(-)

diff --git a/controller/lflow.c b/controller/lflow.c
index d4434bdee8..fc4371d0df 100644
--- a/controller/lflow.c
+++ b/controller/lflow.c
@@ -81,6 +81,8 @@ convert_match_to_expr(const struct sbrec_logical_flow *,
   const struct local_datapath *ldp,
   struct expr **prereqs, const struct shash *addr_sets,
   const struct shash *port_groups,
+  const struct smap *template_vars,
+  struct sset *template_vars_ref,
   struct objdep_mgr *, bool *pg_addr_set_ref);
 static void
 add_matches_to_flow_table(const struct sbrec_logical_flow *,
@@ -297,6 +299,43 @@ as_info_from_expr_const(const char *as_name, const union 
expr_constant *c,
 return true;
 }
 
+static bool
+lflow_parse_actions(const struct sbrec_logical_flow *lflow,
+const struct lflow_ctx_in *l_ctx_in,
+struct sset *template_vars_ref,
+struct ofpbuf *ovnacts_out,
+struct expr **prereqs_out)
+{
+bool ingress = !strcmp(lflow->pipeline, "ingress");
+struct ovnact_parse_params pp = {
+.symtab = ,
+.dhcp_opts = l_ctx_in->dhcp_opts,
+.dhcpv6_opts = l_ctx_in->dhcpv6_opts,
+.nd_ra_opts = l_ctx_in->nd_ra_opts,
+.controller_event_opts = l_ctx_in->controller_event_opts,
+
+.pipeline = ingress ? OVNACT_P_INGRESS : OVNACT_P_EGRESS,
+.n_tables = LOG_PIPELINE_LEN,
+.cur_ltable = lflow->table_id,
+};
+
+char *actions_expanded_s = NULL;
+const char *actions_s =
+lexer_parse_template_string(lflow->actions, l_ctx_in->template_vars,
+template_vars_ref, _expanded_s);
+char *error = ovnacts_parse_string(actions_s, ,
+   ovnacts_out, prereqs_out);
+free(actions_expanded_s);
+if (error) {
+static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 1);
+VLOG_WARN_RL(, "error parsing actions \"%s\": %s",
+ lflow->actions, error);
+free(error);
+return false;
+}
+return true;
+}
+
 /* Parses the lflow regarding the changed address set 'as_name', and generates
  * ovs flows for the newly added addresses in 'as_diff_added' only. It is
  * similar to consider_logical_flow__, with the below differences:
@@ -347,27 +386,14 @@ consider_lflow_for_added_as_ips__(
 
 uint64_t ovnacts_stub[1024 / 8];
 struct ofpbuf ovnacts = OFPBUF_STUB_INITIALIZER(ovnacts_stub);
-struct ovnact_parse_params pp = {
-.symtab = ,
-.dhcp_opts = l_ctx_in->dhcp_opts,
-.dhcpv6_opts = l_ctx_in->dhcpv6_opts,
-.nd_ra_opts = l_ctx_in->nd_ra_opts,
-.controller_event_opts = l_ctx_in->controller_event_opts,
-.pipeline = ingress ? OVNACT_P_INGRESS : OVNACT_P_EGRESS,
-.n_tables = LOG_PIPELINE_LEN,
-.cur_ltable = lflow->table_id,
-};
+struct sset template_vars_ref = SSET_INITIALIZER(_vars_ref);
 struct expr *prereqs = NULL;
-char *error;
 
-error = ovnacts_parse_string(lflow->actions, , , );
-if (error) {
-static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 1);
-VLOG_WARN_RL(, "error parsing actions \"%s\": %s",
-

[ovs-dev] [PATCH ovn 2/5] Add NB and SB Template_Var tables.

2022-11-04 Thread Dumitru Ceara
Propagate the contents of the NB table to the Southbound.

Signed-off-by: Dumitru Ceara 
---
Note:
- ovn-trace doesn't support template variables (yet).

V2:
- Fixed TEMPLATE_VAR_TABLE_INITIALIZER definition so that GCC doesn't
  complain anymore.
- Addressed Han's comments:
  - Rename tables to Chassis_Template_Var.
  - Fix man page.
  - Simplify function prototypes.
- Changed schema as suggested by Ilya.
---
 northd/automake.mk   |4 ++
 northd/en-northd.c   |4 ++
 northd/inc-proc-northd.c |8 -
 northd/northd.c  |   41 +
 northd/northd.h  |4 ++
 northd/template-var.c|   74 ++
 northd/template-var.h|   58 
 ovn-nb.ovsschema |   17 +--
 ovn-nb.xml   |   29 ++
 ovn-sb.ovsschema |   12 ++-
 ovn-sb.xml   |   15 +
 tests/ovn-northd.at  |   35 ++
 utilities/ovn-nbctl.c|3 ++
 utilities/ovn-sbctl.c|3 ++
 14 files changed, 299 insertions(+), 8 deletions(-)
 create mode 100644 northd/template-var.c
 create mode 100644 northd/template-var.h

diff --git a/northd/automake.mk b/northd/automake.mk
index 81582867dc..31134bc329 100644
--- a/northd/automake.mk
+++ b/northd/automake.mk
@@ -13,7 +13,9 @@ northd_ovn_northd_SOURCES = \
northd/inc-proc-northd.c \
northd/inc-proc-northd.h \
northd/ipam.c \
-   northd/ipam.h
+   northd/ipam.h \
+   northd/template-var.c \
+   northd/template-var.h
 northd_ovn_northd_LDADD = \
lib/libovn.la \
$(OVSDB_LIBDIR)/libovsdb.la \
diff --git a/northd/en-northd.c b/northd/en-northd.c
index 7fe83db642..030ee25d8f 100644
--- a/northd/en-northd.c
+++ b/northd/en-northd.c
@@ -80,6 +80,8 @@ void en_northd_run(struct engine_node *node, void *data)
 EN_OVSDB_GET(engine_get_input("NB_acl", node));
 input_data.nbrec_static_mac_binding_table =
 EN_OVSDB_GET(engine_get_input("NB_static_mac_binding", node));
+input_data.nbrec_chassis_template_var_table =
+EN_OVSDB_GET(engine_get_input("NB_chassis_template_var", node));
 
 input_data.sbrec_sb_global_table =
 EN_OVSDB_GET(engine_get_input("SB_sb_global", node));
@@ -113,6 +115,8 @@ void en_northd_run(struct engine_node *node, void *data)
 EN_OVSDB_GET(engine_get_input("SB_chassis_private", node));
 input_data.sbrec_static_mac_binding_table =
 EN_OVSDB_GET(engine_get_input("SB_static_mac_binding", node));
+input_data.sbrec_chassis_template_var_table =
+EN_OVSDB_GET(engine_get_input("SB_chassis_template_var", node));
 
 northd_run(_data, data,
eng_ctx->ovnnb_idl_txn,
diff --git a/northd/inc-proc-northd.c b/northd/inc-proc-northd.c
index 54e0ad3b05..da791f035d 100644
--- a/northd/inc-proc-northd.c
+++ b/northd/inc-proc-northd.c
@@ -64,7 +64,8 @@ VLOG_DEFINE_THIS_MODULE(inc_proc_northd);
 NB_NODE(ha_chassis_group, "ha_chassis_group") \
 NB_NODE(ha_chassis, "ha_chassis") \
 NB_NODE(bfd, "bfd") \
-NB_NODE(static_mac_binding, "static_mac_binding")
+NB_NODE(static_mac_binding, "static_mac_binding") \
+NB_NODE(chassis_template_var, "chassis_template_var")
 
 enum nb_engine_node {
 #define NB_NODE(NAME, NAME_STR) NB_##NAME,
@@ -114,7 +115,8 @@ VLOG_DEFINE_THIS_MODULE(inc_proc_northd);
 SB_NODE(load_balancer, "load_balancer") \
 SB_NODE(bfd, "bfd") \
 SB_NODE(fdb, "fdb") \
-SB_NODE(static_mac_binding, "static_mac_binding")
+SB_NODE(static_mac_binding, "static_mac_binding") \
+SB_NODE(chassis_template_var, "chassis_template_var")
 
 enum sb_engine_node {
 #define SB_NODE(NAME, NAME_STR) SB_##NAME,
@@ -186,6 +188,7 @@ void inc_proc_northd_init(struct ovsdb_idl_loop *nb,
 engine_add_input(_northd, _nb_ha_chassis_group, NULL);
 engine_add_input(_northd, _nb_ha_chassis, NULL);
 engine_add_input(_northd, _nb_static_mac_binding, NULL);
+engine_add_input(_northd, _nb_chassis_template_var, NULL);
 
 engine_add_input(_northd, _sb_sb_global, NULL);
 engine_add_input(_northd, _sb_chassis, NULL);
@@ -215,6 +218,7 @@ void inc_proc_northd_init(struct ovsdb_idl_loop *nb,
 engine_add_input(_northd, _sb_load_balancer, NULL);
 engine_add_input(_northd, _sb_fdb, NULL);
 engine_add_input(_northd, _sb_static_mac_binding, NULL);
+engine_add_input(_northd, _sb_chassis_template_var, NULL);
 engine_add_input(_mac_binding_aging, _nb_nb_global, NULL);
 engine_add_input(_mac_binding_aging, _sb_mac_binding, NULL);
 engine_add_input(_mac_binding_aging, _northd, NULL);
diff --git a/northd/northd.c b/northd/northd.c
index b7388afc58..170b4f95c8 100644
--- a/northd/northd.c
+++ b/northd/northd.c
@@ -51,6 +51,7 @@
 #include "lib/stopwatch-names.h"
 #include "stream.h"
 #include "timeval.h"
+#include "template-var.h"
 #include "util.h"
 #include "uuid.h"
 #include "ovs-thread.h"
@@ 

[ovs-dev] [PATCH ovn 1/5] lflow: Factor out the lflow reference handling code into a new module.

2022-11-04 Thread Dumitru Ceara
This makes it easier to have an overview of what the code does and at the
same time it allows multiple users to define and manage
"resource <-> object" dependencies.

Acked-by: Han Zhou 
Signed-off-by: Dumitru Ceara 
---
V2:
- Addressed Mark's comments:
  - Fixed typos in comments in objdep.h.
  - Made objdep_change_handler return bool (handled successfully or not).
  - Reverted some unrelated style changes.
- Fixed cast style.
- Addressed Han's comments:
  - Removed superfluous 'type' argument name in prototypes.
- Added Han's ack.
---
 controller/lflow.c  |  330 ++-
 controller/lflow.h  |   67 +
 controller/ovn-controller.c |   55 +--
 lib/automake.mk |2 
 lib/objdep.c|  260 ++
 lib/objdep.h|  122 
 6 files changed, 508 insertions(+), 328 deletions(-)
 create mode 100644 lib/objdep.c
 create mode 100644 lib/objdep.h

diff --git a/controller/lflow.c b/controller/lflow.c
index cc0f31db06..d4434bdee8 100644
--- a/controller/lflow.c
+++ b/controller/lflow.c
@@ -61,7 +61,7 @@ struct lookup_port_aux {
 struct ovsdb_idl_index *sbrec_port_binding_by_name;
 const struct sbrec_datapath_binding *dp;
 const struct sbrec_logical_flow *lflow;
-struct lflow_resource_ref *lfrr;
+struct objdep_mgr *deps_mgr;
 const struct hmap *chassis_tunnels;
 };
 
@@ -72,8 +72,8 @@ struct condition_aux {
 const struct sset *active_tunnels;
 const struct sbrec_logical_flow *lflow;
 /* Resource reference to store the port name referenced
- * in is_chassis_resident() to the logical flow. */
-struct lflow_resource_ref *lfrr;
+ * in is_chassis_resident() to the object (logical flow). */
+struct objdep_mgr *deps_mgr;
 };
 
 static struct expr *
@@ -81,7 +81,7 @@ convert_match_to_expr(const struct sbrec_logical_flow *,
   const struct local_datapath *ldp,
   struct expr **prereqs, const struct shash *addr_sets,
   const struct shash *port_groups,
-  struct lflow_resource_ref *, bool *pg_addr_set_ref);
+  struct objdep_mgr *, bool *pg_addr_set_ref);
 static void
 add_matches_to_flow_table(const struct sbrec_logical_flow *,
   const struct local_datapath *,
@@ -94,17 +94,6 @@ consider_logical_flow(const struct sbrec_logical_flow *lflow,
   bool is_recompute,
   struct lflow_ctx_in *l_ctx_in,
   struct lflow_ctx_out *l_ctx_out);
-static void lflow_resource_add(struct lflow_resource_ref *, enum ref_type,
-   const char *ref_name, const struct uuid *,
-   size_t ref_count);
-static struct ref_lflow_node *ref_lflow_lookup(struct hmap *ref_lflow_table,
-   enum ref_type,
-   const char *ref_name);
-static struct lflow_ref_node *lflow_ref_lookup(struct hmap *lflow_ref_table,
-   const struct uuid *lflow_uuid);
-static void ref_lflow_node_destroy(struct ref_lflow_node *);
-static void lflow_resource_destroy_lflow(struct lflow_resource_ref *,
- const struct uuid *lflow_uuid);
 
 static void add_port_sec_flows(const struct shash *binding_lports,
const struct sbrec_chassis *,
@@ -125,8 +114,8 @@ lookup_port_cb(const void *aux_, const char *port_name, 
unsigned int *portp)
 /* Store the name that used to lookup the lport to lflow reference, so that
  * in the future when the lport's port binding changes, the logical flow
  * that references this lport can be reprocessed. */
-lflow_resource_add(aux->lfrr, REF_TYPE_PORTBINDING, port_name,
-   >lflow->header_.uuid, 0);
+objdep_mgr_add(aux->deps_mgr, OBJDEP_TYPE_PORTBINDING, port_name,
+   >lflow->header_.uuid);
 
 const struct sbrec_port_binding *pb
 = lport_lookup_by_name(aux->sbrec_port_binding_by_name, port_name);
@@ -141,8 +130,8 @@ lookup_port_cb(const void *aux_, const char *port_name, 
unsigned int *portp)
  * this multicast group can be reprocessed. */
 struct ds mg_key = DS_EMPTY_INITIALIZER;
 get_mc_group_key(port_name, aux->dp->tunnel_key, _key);
-lflow_resource_add(aux->lfrr, REF_TYPE_MC_GROUP, ds_cstr(_key),
-   >lflow->header_.uuid, 0);
+objdep_mgr_add(aux->deps_mgr, OBJDEP_TYPE_MC_GROUP, ds_cstr(_key),
+   >lflow->header_.uuid);
 ds_destroy(_key);
 
 const struct sbrec_multicast_group *mg = mcgroup_lookup_by_dp_name(
@@ -180,11 +169,11 @@ is_chassis_resident_cb(const void *c_aux_, const char 
*port_name)
 {
 const struct condition_aux *c_aux = c_aux_;
 
-/* Store the port name that used to lookup the lport 

[ovs-dev] [PATCH ovn 0/5] Add OVN component templates.

2022-11-04 Thread Dumitru Ceara
Sometimes network components are compute node-specific.  Sometimes such
components are replicated, almost identically, for multiple nodes
in the cluster.

One such example is the case of Kubernetes NodePort services which
translate (in the ovn-kubernetes case) to Load_Balancer
objects being applied to each and every node's logical gateway router.
These load balancers are almost identical, the main difference being
the fact that they use different VIPs (the node's IP).

With the current OVN load balancer design, this becomes a problem at
scale because the number of load balancers that must be configured is
N x M (N nodes times M services).

This series proposes a new concept in OVN: virtual network component
templates.  The goal of the templates is to help reduce resource
consumption in the OVN central components in specific cases like the one
described above.

To achieve that, the CMS will instead configure a "templated" load
balancer for every service and apply that single template record to
the cluster-wide load balancer group.  This template is then
instantiated differently on different compute nodes.  This translation
is controlled through per-chassis "template variables" configured by
the CMS in the new NB.Template_Var table.

Patch 5/5 introduces a synthetic benchmark simulating what an OpenShift
router (using NodePort services) scale test would do.  The benchmark
allows us to compare the "standard" (no-template) configuration
against the configuration that uses component templates:

+--+---+---++--+-+---+---+
|  |   |   | NB | SB   |
 |  northd   |ovn-controller |
+--+---+---++--+-+---+---+
| Template | Nodes | NodePort Services | Size(MB) | RSS(MB) | Size(MB) | 
RSS(MB) | loop(sec) | RSS (MB) | recompute(sec) |
|  |   |(5 backends)   |  | |  |
 |   |  ||
+--+---+---+--+-+--+-+---+--++
| No   |  60   |   1000| 25   |116  |   118|   589  
 |2.70   |463   |  0.20  |
| Yes  |  60   |   1000|  6   | 25  | 8|46  
 |0.07   | 44   |  0.20  |
+--+---+---+--+-+--+-+---+--++
| No   | 120   |   2000| 67   |865  |   471|  9000  
 |   15.60   |   1016   |  0.40  |
| Yes  | 120   |   2000| 23   | 96  |28|   225  
 |0.22   | 83   |  0.40  |
+--+---+---+--+-+--+-+---+--++
| Yes  | 120   |   1   |118   |440  |   136|   668  
 |0.72   |311   |  1.77  |
+--+---+---+--+-+--+-+---+--+-
| Yes  | 250   |   1   |244   |870  |   263|  1502  
 |1.26   |318   |  1.87  |
+-

For the N=120 S=2000 case:
- NB size reduced by ~65%
- NB RSS  reduced by ~90%
- SB size reduced by ~95%
- SB RSS  reduced by ~98%
- ovn-northd loop time reduced by ~98%
- ovn-controller RSS reduced by ~92%

Dumitru Ceara (5):
  lflow: Factor out the lflow reference handling code into a new module.
  Add NB and SB Template_Var tables.
  controller: Add support for templated actions and matches.
  lb: Support using templates.
  tutorial: Add scripts to simulate node-port ovn-k8s services.


 controller/lflow.c   | 244 ++
 controller/lflow.h   |   8 +-
 controller/ofctrl.c  |  11 +-
 controller/ofctrl.h  |   3 +-
 controller/ovn-controller.c  | 350 +++-
 include/ovn/expr.h   |   4 +-
 include/ovn/lex.h|  15 +-
 lib/actions.c|   9 +-
 lib/expr.c   |  18 +-
 lib/lb.c | 457 +++
 lib/lb.h |  40 ++-
 lib/lex.c|  57 
 lib/objdep.c |   1 +
 lib/objdep.h |   1 +
 lib/ovn-util.c   |   3 -
 northd/automake.mk   |   4 +-
 northd/en-northd.c   |   4 +
 northd/inc-proc-northd.c |   8 +-
 northd/northd.c  | 130 +---
 northd/northd.h  

[ovs-dev] [PATCH v3] ovs-thread: Detect changes in number of cpus

2022-11-04 Thread Adrian Moreno
Currently, things like the number of handler and revalidator threads are
calculated based on the number of available CPUs. However, this number
is considered static and only calculated once, hence ignoring events
such as cpus being hotplugged, switched on/off or affinity mask
changing.

On the other hand, checking the number of available CPUs multiple times
per second seems like an overkill.
Affinity should not change that often and, even if it does, the impact
of destroying and recreating all the threads so often is probably a
price too expensive to pay.

This patch makes the number of cpus be calculated every time 5 seconds
which seems a reasonable middle point.
It generates an impact in the main loop duration of <1% and a worst-case
scenario impact in throughput of < 5% [1].

As a result of these changes (assuming the patch is backported):
- >=2.16: a change in the cpu affinity reflects on the number of threads
  in (at most) 5 seconds.
- < 2.16: a change in the cpu affinity will be reflected on
  the number of threads the next time there is a call to
  bridge_reconfigure() (e.g: on the next DB change), and 5 seconds
  have passed.

The difference in behavior is because on older versions the thread
number calculation was done on bridge reconfiguration while newer
versions moved this logic down to the dpif layer and is run on
dpif->run() stage.
Considering it has not been a huge problem up to today and that the
cpu change would be reflected sooner or later (e.g the user could
force a recalculation with a simple ovs-vsctl command), I think it
might be OK to leave like that.

[1] Tested in the worst-case scenario of disabling the kernel cache
(other_config:flow-size=0), modifying ovs-vswithd's affinity so the
number of handlers go up and down every 5 seconds and calculated the
difference in netperf's ops/sec.

Fixes: be15ec48d766 ("lib: Use a more accurate value for CPU count 
(sched_getaffinity).")
Cc: david.march...@redhat.com
Signed-off-by: Adrian Moreno 
---
 lib/ovs-thread.c | 61 +---
 1 file changed, 37 insertions(+), 24 deletions(-)

diff --git a/lib/ovs-thread.c b/lib/ovs-thread.c
index 78ed3e970..d1deb9c52 100644
--- a/lib/ovs-thread.c
+++ b/lib/ovs-thread.c
@@ -31,6 +31,7 @@
 #include "openvswitch/poll-loop.h"
 #include "seq.h"
 #include "socket-util.h"
+#include "timeval.h"
 #include "util.h"
 
 #ifdef __CHECKER__
@@ -627,42 +628,54 @@ ovs_thread_stats_next_bucket(const struct ovsthread_stats 
*stats, size_t i)
 }
 
 
-/* Returns the total number of cores available to this process, or 0 if the
- * number cannot be determined. */
-int
-count_cpu_cores(void)
+static int
+count_cpu_cores__(void)
 {
-static struct ovsthread_once once = OVSTHREAD_ONCE_INITIALIZER;
-static long int n_cores;
+long int n_cores;
 
-if (ovsthread_once_start()) {
 #ifndef _WIN32
-n_cores = sysconf(_SC_NPROCESSORS_ONLN);
+n_cores = sysconf(_SC_NPROCESSORS_ONLN);
+#else
+SYSTEM_INFO sysinfo;
+GetSystemInfo();
+n_cores = sysinfo.dwNumberOfProcessors;
+#endif
 #ifdef __linux__
-if (n_cores > 0) {
-cpu_set_t *set = CPU_ALLOC(n_cores);
+if (n_cores > 0) {
+cpu_set_t *set = CPU_ALLOC(n_cores);
 
-if (set) {
-size_t size = CPU_ALLOC_SIZE(n_cores);
+if (set) {
+size_t size = CPU_ALLOC_SIZE(n_cores);
 
-if (!sched_getaffinity(0, size, set)) {
-n_cores = CPU_COUNT_S(size, set);
-}
-CPU_FREE(set);
+if (!sched_getaffinity(0, size, set)) {
+n_cores = CPU_COUNT_S(size, set);
 }
+CPU_FREE(set);
 }
-#endif
-#else
-SYSTEM_INFO sysinfo;
-GetSystemInfo();
-n_cores = sysinfo.dwNumberOfProcessors;
-#endif
-ovsthread_once_done();
 }
-
+#endif
 return n_cores > 0 ? n_cores : 0;
 }
 
+/* It's unlikely that the available cpus change several times per second and
+ * even if it does, it's not needed (or desired) to react to such changes so
+ * quickly.*/
+#define COUNT_CPU_UPDATE_TIME_MS 5000
+/* Returns the current total number of cores available to this process, or 0
+ * if the number cannot be determined.
+ * It is assumed that this function is only called from the main thread.*/
+int count_cpu_cores(void) {
+static long long int last_updated = 0;
+long long int now = time_msec();
+static int cpu_cores;
+
+if (now - last_updated >= COUNT_CPU_UPDATE_TIME_MS) {
+last_updated = now;
+cpu_cores = count_cpu_cores__();
+}
+return cpu_cores;
+}
+
 /* Returns the total number of cores on the system, or 0 if the
  * number cannot be determined. */
 int
-- 
2.37.3

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [PATCH] ovs-thread: Detect changes in number of cpus

2022-11-04 Thread Adrian Moreno



On 11/4/22 21:47, Mike Pattrick wrote:

On Fri, Nov 4, 2022 at 11:45 AM Adrian Moreno  wrote:


Currently, things like the number of handler and revalidator threads are
calculated based on the number of available CPUs. However, this number
is considered static and only calculated once, hence ignoring events
such as cpus being hotplugged, switched on/off or affinity mask
changing.

On the other hand, checking the number of available CPUs multiple times
per second seems like an overkill.
Affinity should not change that often and, even if it does, the impact
of destroying and recreating all the threads so often is probably a
price too expensive to pay.

This patch makes the number of cpus be calculated every time 5 seconds
which seems a reasonable middle point.
It generates an impact in the main loop duration of <1% and a worst-case
scenario impact in throughput of < 5% [1].

As a result of these changes (assuming the patch is backported):
- >=2.16: a change in the cpu affinity reflects on the number of threads
   in (at most) 5 seconds.
- < 2.16: a change in the cpu affinity will be reflected on
   the number of threads the next time there is a call to
   bridge_reconfigure() (e.g: on the next DB change), and 5 seconds
   have passed.

The difference in behavior is because on older versions the thread
number calculation was done on bridge reconfiguration while newer
versions moved this logic down to the dpif layer and is run on
dpif->run() stage.
Considering it has not been a huge problem up to today and that the
cpu change would be reflected sooner or later (e.g the user could
force a recalculation with a simple ovs-vsctl command), I think it
might be OK to leave like that.

[1] Tested in the worst-case scenario of disabling the kernel cache
(other_config:flow-size=0), modifying ovs-vswithd's affinity so the
number of handlers go up and down every 5 seconds and calculated the
difference in netperf's ops/sec.

Fixes: be15ec48d766 ("lib: Use a more accurate value for CPU count 
(sched_getaffinity).")
Cc: david.march...@redhat.com
Signed-off-by: Adrian Moreno 
---
  lib/ovs-thread.c | 61 +---
  1 file changed, 37 insertions(+), 24 deletions(-)

diff --git a/lib/ovs-thread.c b/lib/ovs-thread.c
index 78ed3e970..4709f7ead 100644
--- a/lib/ovs-thread.c
+++ b/lib/ovs-thread.c
@@ -31,6 +31,7 @@
  #include "openvswitch/poll-loop.h"
  #include "seq.h"
  #include "socket-util.h"
+#include "timeval.h"
  #include "util.h"

  #ifdef __CHECKER__
@@ -627,42 +628,54 @@ ovs_thread_stats_next_bucket(const struct ovsthread_stats 
*stats, size_t i)
  }


-/* Returns the total number of cores available to this process, or 0 if the
- * number cannot be determined. */
-int
-count_cpu_cores(void)
+static int
+count_cpu_cores__(void)
  {
-static struct ovsthread_once once = OVSTHREAD_ONCE_INITIALIZER;
-static long int n_cores;
+long int n_cores;

-if (ovsthread_once_start()) {
  #ifndef _WIN32
-n_cores = sysconf(_SC_NPROCESSORS_ONLN);
+n_cores = sysconf(_SC_NPROCESSORS_ONLN);
+#else
+SYSTEM_INFO sysinfo;
+GetSystemInfo();
+n_cores = sysinfo.dwNumberOfProcessors;
+#endif
  #ifdef __linux__
-if (n_cores > 0) {
-cpu_set_t *set = CPU_ALLOC(n_cores);
+if (n_cores > 0) {
+cpu_set_t *set = CPU_ALLOC(n_cores);

-if (set) {
-size_t size = CPU_ALLOC_SIZE(n_cores);
+if (set) {
+size_t size = CPU_ALLOC_SIZE(n_cores);

-if (!sched_getaffinity(0, size, set)) {
-n_cores = CPU_COUNT_S(size, set);
-}
-CPU_FREE(set);
+if (!sched_getaffinity(0, size, set)) {
+n_cores = CPU_COUNT_S(size, set);
  }
+CPU_FREE(set);
  }
-#endif
-#else
-SYSTEM_INFO sysinfo;
-GetSystemInfo();
-n_cores = sysinfo.dwNumberOfProcessors;
-#endif
-ovsthread_once_done();
  }
-
+#endif
  return n_cores > 0 ? n_cores : 0;
  }

+/* It's unlikely that the available cpus change several times per second and
+ * even if it does, it's not needed (or desired) to react to such changes so
+ * quickly.*/
+#define COUNT_CPU_UPDATE_TIME_MS 5000
+/* Returns the current total number of cores available to this process, or 0
+ * if the number cannot be determined.
+ * It is assumed that this function is only called from the main thread.*/
+int count_cpu_cores(void) {
+static int cpu_cores;
+static long long int last_updated = 0;
+long long int now = time_msec();


Very minor but should probably should be:

+static long long int last_updated = 0;
+long long int now = time_msec();
+static int cpu_cores;

Otherwise, looks good!


I just noticed I missed the version on the patch header so I'll send a new patch 
with the right version and addressing your comment.





+
+if (now - last_updated >= COUNT_CPU_UPDATE_TIME_MS) {
+last_updated = 

Re: [ovs-dev] [PATCH ovn] ci: Update jobs to use numbers instead of test flags

2022-11-04 Thread Han Zhou
On Wed, Nov 2, 2022 at 7:46 AM Dumitru Ceara  wrote:
>
> On 11/2/22 15:35, Ales Musil wrote:
> > To prevent some jobs not running after CI scripts updates
> > use numbers instead of the test flags. This still allows us
> > to use parallelization, but without worrying about skipping
> > some tests by mistake.
> >
> > For "test" suites use 3 parts, with 1500 tests in mind.
> > That should give as additional space for future tests.
> > Currently, there is ~1200 tests. For sanitizers use 5 parts
> > instead, as they are slower in general.
> >
> > For "system-test" use 3 parts, with 300 tests in mind.
> > Currently, there is ~200 tests.
> >
> > In the end this patch reduces the number of jobs by 8 to 20,
> > which is ok as there is a limit of 20 running in parallel [0].
> >
> > [0]
https://docs.github.com/en/actions/learn-github-actions/usage-limits-billing-and-administration#usage-limits
> > Signed-off-by: Ales Musil 
> > ---
>
> Thanks, Ales, for the patch!
>
> It makes it harder to skip tests by accident in CI which is great.
>
> But there's still the downside that maintainers need to pay a bit more
> attention so that the last run "1001-" or "201-" doesn't take way longer
> than the rest.

I agree with this, but maybe not a big deal. So I am ok with it, too.
>
> I'm OK with that but I wonder what Han, Mark and Numan think about this.
>
> If people are OK with it I can apply and backport the patch (assuming
> the CI is green [0]).

Backporting may be a problem if the older branch doesn't have test 1200-.
Otherwise should be good.

Thanks,
Han

>
> Regards,
> Dumitru
>
> [0] https://github.com/ovsrobot/ovn/actions/runs/3378478500
>
___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [PATCH 2/2] ovsdb/transaction.c: Fix weak reference leak.

2022-11-04 Thread Han Zhou
On Fri, Nov 4, 2022 at 10:22 AM Ilya Maximets  wrote:
>
> On 11/2/22 05:09, Han Zhou wrote:
> > When a row is deleted, if the row has weak references to other rows, the
> > weak reference nodes attached to the destination rows (through
> > weak->dst_node hmap) are not destroyed.
> >
> > Deleting weak references is properly handled when a row is modified. The
> > removed references are taken care by:
> > 1. assess_weak_refs() figures out the deleted references from the row
> >and add them to txn_row->deleted_refs.
> > 2. before commit, in ovsdb_txn_update_weak_refs() it finds the
> >destination row for each item in txn_row->deleted_refs (from step 1),
> >and destroy the corresponding weak references of the destionation
row.
> >
> > However, when the row is deleted, the step 1 in assess_weak_refs() is
> > missing. It directly returns without adding the deleted references to
> > txn_row->deleted_refs. So, the detination nodes will keep those weak
> > references although the source side of the references are already
> > deleted.  When such rows that originating weak references are created
> > and deleted, more and more such useless weak reference structures
> > accumulate in the memory, and can stay there until the destination rows
> > are deleted. It is possible that the detination row is never deleted,
> > and in such case the ovsdb-server memory keeps growing (although it is
> > not strictly memory leak, because the structures are still referenced).
> >
> > This problem has an impact to applications like OVN SB DB - the memory
> > grows very fast in long-runing deployments and finally causes OOM.
> >
> > This patch fixes it by generating deleted_refs for deleted rows in
> > assess_weak_refs().
> >
> > Fixes: 4dbff9f0a685 ("ovsdb: transaction: Incremental reassessment of
weak refs.")
> > Signed-off-by: Han Zhou 
> > ---
> >  ovsdb/transaction.c | 11 +--
> >  1 file changed, 9 insertions(+), 2 deletions(-)
>
> Hi, Han.
>
> I reproduced the issue by continuously adding and removing logical
> switches in OVN setup that already has some other logical switches
> pre-created.  Number of weak reference objects grows without bound.
>
> The patch fixes the problem and looks good to me.
>
> I also ran some of our scale tests with ovn-heater and observed
> a bit lower memory consumption overall with the fix applied (These
> tests do not really remove a lot of resources).
>
> Good catch!  Thanks!
>
> I slightly re-named the 'area' part of the patch subject just to
> be make the look similar to other commits in that area.
>
> With that, applied and backported down to 2.17.
>
> Best regards, Ilya Maximets.

Thanks Ilya!
___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [PATCH] ovs-thread: Detect changes in number of cpus

2022-11-04 Thread Mike Pattrick
On Fri, Nov 4, 2022 at 11:45 AM Adrian Moreno  wrote:
>
> Currently, things like the number of handler and revalidator threads are
> calculated based on the number of available CPUs. However, this number
> is considered static and only calculated once, hence ignoring events
> such as cpus being hotplugged, switched on/off or affinity mask
> changing.
>
> On the other hand, checking the number of available CPUs multiple times
> per second seems like an overkill.
> Affinity should not change that often and, even if it does, the impact
> of destroying and recreating all the threads so often is probably a
> price too expensive to pay.
>
> This patch makes the number of cpus be calculated every time 5 seconds
> which seems a reasonable middle point.
> It generates an impact in the main loop duration of <1% and a worst-case
> scenario impact in throughput of < 5% [1].
>
> As a result of these changes (assuming the patch is backported):
> - >=2.16: a change in the cpu affinity reflects on the number of threads
>   in (at most) 5 seconds.
> - < 2.16: a change in the cpu affinity will be reflected on
>   the number of threads the next time there is a call to
>   bridge_reconfigure() (e.g: on the next DB change), and 5 seconds
>   have passed.
>
> The difference in behavior is because on older versions the thread
> number calculation was done on bridge reconfiguration while newer
> versions moved this logic down to the dpif layer and is run on
> dpif->run() stage.
> Considering it has not been a huge problem up to today and that the
> cpu change would be reflected sooner or later (e.g the user could
> force a recalculation with a simple ovs-vsctl command), I think it
> might be OK to leave like that.
>
> [1] Tested in the worst-case scenario of disabling the kernel cache
> (other_config:flow-size=0), modifying ovs-vswithd's affinity so the
> number of handlers go up and down every 5 seconds and calculated the
> difference in netperf's ops/sec.
>
> Fixes: be15ec48d766 ("lib: Use a more accurate value for CPU count 
> (sched_getaffinity).")
> Cc: david.march...@redhat.com
> Signed-off-by: Adrian Moreno 
> ---
>  lib/ovs-thread.c | 61 +---
>  1 file changed, 37 insertions(+), 24 deletions(-)
>
> diff --git a/lib/ovs-thread.c b/lib/ovs-thread.c
> index 78ed3e970..4709f7ead 100644
> --- a/lib/ovs-thread.c
> +++ b/lib/ovs-thread.c
> @@ -31,6 +31,7 @@
>  #include "openvswitch/poll-loop.h"
>  #include "seq.h"
>  #include "socket-util.h"
> +#include "timeval.h"
>  #include "util.h"
>
>  #ifdef __CHECKER__
> @@ -627,42 +628,54 @@ ovs_thread_stats_next_bucket(const struct 
> ovsthread_stats *stats, size_t i)
>  }
>
>
> -/* Returns the total number of cores available to this process, or 0 if the
> - * number cannot be determined. */
> -int
> -count_cpu_cores(void)
> +static int
> +count_cpu_cores__(void)
>  {
> -static struct ovsthread_once once = OVSTHREAD_ONCE_INITIALIZER;
> -static long int n_cores;
> +long int n_cores;
>
> -if (ovsthread_once_start()) {
>  #ifndef _WIN32
> -n_cores = sysconf(_SC_NPROCESSORS_ONLN);
> +n_cores = sysconf(_SC_NPROCESSORS_ONLN);
> +#else
> +SYSTEM_INFO sysinfo;
> +GetSystemInfo();
> +n_cores = sysinfo.dwNumberOfProcessors;
> +#endif
>  #ifdef __linux__
> -if (n_cores > 0) {
> -cpu_set_t *set = CPU_ALLOC(n_cores);
> +if (n_cores > 0) {
> +cpu_set_t *set = CPU_ALLOC(n_cores);
>
> -if (set) {
> -size_t size = CPU_ALLOC_SIZE(n_cores);
> +if (set) {
> +size_t size = CPU_ALLOC_SIZE(n_cores);
>
> -if (!sched_getaffinity(0, size, set)) {
> -n_cores = CPU_COUNT_S(size, set);
> -}
> -CPU_FREE(set);
> +if (!sched_getaffinity(0, size, set)) {
> +n_cores = CPU_COUNT_S(size, set);
>  }
> +CPU_FREE(set);
>  }
> -#endif
> -#else
> -SYSTEM_INFO sysinfo;
> -GetSystemInfo();
> -n_cores = sysinfo.dwNumberOfProcessors;
> -#endif
> -ovsthread_once_done();
>  }
> -
> +#endif
>  return n_cores > 0 ? n_cores : 0;
>  }
>
> +/* It's unlikely that the available cpus change several times per second and
> + * even if it does, it's not needed (or desired) to react to such changes so
> + * quickly.*/
> +#define COUNT_CPU_UPDATE_TIME_MS 5000
> +/* Returns the current total number of cores available to this process, or 0
> + * if the number cannot be determined.
> + * It is assumed that this function is only called from the main thread.*/
> +int count_cpu_cores(void) {
> +static int cpu_cores;
> +static long long int last_updated = 0;
> +long long int now = time_msec();

Very minor but should probably should be:

+static long long int last_updated = 0;
+long long int now = time_msec();
+static int cpu_cores;

Otherwise, looks good!

> +
> +if (now - last_updated >= 

Re: [ovs-dev] [OVN v13] OVN - Add Support for Remote Port Mirroring

2022-11-04 Thread 0-day Robot
Bleep bloop.  Greetings Abhiram R N, I am a robot and I have tried out your 
patch.
Thanks for your contribution.

I encountered some error that I wasn't expecting.  See the details below.


checkpatch:
WARNING: Line lacks whitespace around operator
#2593 FILE: utilities/ovn-nbctl.c:275:
  mirror-add NAME TYPE INDEX FILTER IP\n\

WARNING: Line lacks whitespace around operator
#2602 FILE: utilities/ovn-nbctl.c:284:
  mirror-del [NAME] remove mirrors\n\

WARNING: Line lacks whitespace around operator
#2603 FILE: utilities/ovn-nbctl.c:285:
  mirror-list   print mirrors\n\

WARNING: Line lacks whitespace around operator
#2612 FILE: utilities/ovn-nbctl.c:327:
  lsp-attach-mirror PORT MIRROR   attach source PORT to MIRROR\n\

WARNING: Line lacks whitespace around operator
#2613 FILE: utilities/ovn-nbctl.c:328:
  lsp-detach-mirror PORT MIRROR   detach source PORT from MIRROR\n\

Lines checked: 3012, Warnings: 5, Errors: 0


Please check this out.  If you feel there has been an error, please email 
acon...@redhat.com

Thanks,
0-day Robot
___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [OVN v13] OVN - Add Support for Remote Port Mirroring

2022-11-04 Thread Abhiram R N
Mirror creation just creates the mirror. The lsp-attach-mirror
triggers the sequence to create Mirror in OVS DB on compute node.
OVS already supports Port Mirroring.

Note: This is targeted to mirror to destinations anywhere outside the
cluster where the analyser resides and it need not be an OVN node.

Example commands are as below:

Mirror creation
ovn-nbctl mirror-add mirror1 gre 0 from-lport 10.10.10.2

Attach a logical port to the mirror.
ovn-nbctl lsp-attach-mirror sw0-port1 mirror1

Detach a source from Mirror
ovn-nbctl lsp-detach-mirror sw0-port1 mirror1

Mirror deletion
ovn-nbctl mirror-del mirror1

Co-authored-by: Veda Barrenkala 
Signed-off-by: Veda Barrenkala 
Signed-off-by: Abhiram R N 
---
v12 --> V13: Made each of bulk test cases(in ovn.at) as separate
 test to make it pass consistently.
V11 --> V12: Minor fix in ovn.at to solve intermittent failures

V10 --> V11: Addressed review comments from V10 by Ihar
   i) Expanded bulk updates test cases in ovn.at
  Overall below cases are covered
  a) Attaches multiple mirrors (new)
  b) Equal detaches and attaches (same as V10)
  c) Detaches more than attaches (new)
  d) Attaches more than detaches (new)
  e) Detaches all (new)
  ii) Addressed the detach all case in mirror.c
 iii) Minor correction in NEWS
  iv) Added invalid mirror attach case in ovn-nbctl.at

Files modified (V10 --> V11):
Code --> mirror.c
Test --> ovn.at, ovn-nbctl.at
Misc --> NEWS

Ihar,
Regarding mirror_delete function param delete_all it is wrt the
port binding and if a port binding is removed we delete all its
attachment. Already that use case is covered in ovn.at.
Having said that the detaches all had issue in mirror_delete which
I have addressed. With all the above cases added now in bulk updates
hope it should give good assurance.

 NEWS|   1 +
 controller/automake.mk  |   4 +-
 controller/mirror.c | 538 +
 controller/mirror.h |  53 +++
 controller/ovn-controller.c | 266 ++--
 northd/en-northd.c  |   4 +
 northd/inc-proc-northd.c|   4 +
 northd/northd.c | 172 
 northd/northd.h |   2 +
 ovn-nb.ovsschema|  31 +-
 ovn-nb.xml  |  63 +++
 ovn-sb.ovsschema|  26 +-
 ovn-sb.xml  |  50 +++
 tests/ovn-nbctl.at  | 120 ++
 tests/ovn-northd.at | 102 +
 tests/ovn.at| 778 
 utilities/ovn-nbctl.c   | 357 +
 utilities/ovn-sbctl.c   |   4 +
 18 files changed, 2547 insertions(+), 28 deletions(-)
 create mode 100644 controller/mirror.c
 create mode 100644 controller/mirror.h

diff --git a/NEWS b/NEWS
index 224a7b83e..84b22abdb 100644
--- a/NEWS
+++ b/NEWS
@@ -25,6 +25,7 @@ OVN v22.09.0 - 16 Sep 2022
 any of LR's LRP IP, there is no need to create SNAT entry.  Now such
 traffic destined to LRP IP is not dropped.
   - Bump python version required for building OVN to 3.6.
+  - Added Support for Remote Port Mirroring.
 
 OVN v22.06.0 - 03 Jun 2022
 --
diff --git a/controller/automake.mk b/controller/automake.mk
index c2ab1bbe6..334672b4d 100644
--- a/controller/automake.mk
+++ b/controller/automake.mk
@@ -41,7 +41,9 @@ controller_ovn_controller_SOURCES = \
controller/ovsport.h \
controller/ovsport.c \
controller/vif-plug.h \
-   controller/vif-plug.c
+   controller/vif-plug.c \
+   controller/mirror.h \
+   controller/mirror.c
 
 controller_ovn_controller_LDADD = lib/libovn.la $(OVS_LIBDIR)/libopenvswitch.la
 man_MANS += controller/ovn-controller.8
diff --git a/controller/mirror.c b/controller/mirror.c
new file mode 100644
index 0..11f2b63a6
--- /dev/null
+++ b/controller/mirror.c
@@ -0,0 +1,538 @@
+/* Copyright (c) 2022 Red Hat, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include 
+#include 
+
+/* library headers */
+#include "lib/sset.h"
+#include "lib/util.h"
+
+/* OVS includes. */
+#include "lib/vswitch-idl.h"
+#include "openvswitch/vlog.h"
+
+/* OVN includes. */
+#include "binding.h"
+#include "lib/ovn-sb-idl.h"
+#include "mirror.h"
+
+VLOG_DEFINE_THIS_MODULE(port_mirror);
+
+/* Static function declarations */
+
+static const struct ovsrec_port *
+get_port_for_iface(const struct 

Re: [ovs-dev] [PATCH 2/2] ovsdb/transaction.c: Fix weak reference leak.

2022-11-04 Thread Ilya Maximets
On 11/2/22 05:09, Han Zhou wrote:
> When a row is deleted, if the row has weak references to other rows, the
> weak reference nodes attached to the destination rows (through
> weak->dst_node hmap) are not destroyed.
> 
> Deleting weak references is properly handled when a row is modified. The
> removed references are taken care by:
> 1. assess_weak_refs() figures out the deleted references from the row
>and add them to txn_row->deleted_refs.
> 2. before commit, in ovsdb_txn_update_weak_refs() it finds the
>destination row for each item in txn_row->deleted_refs (from step 1),
>and destroy the corresponding weak references of the destionation row.
> 
> However, when the row is deleted, the step 1 in assess_weak_refs() is
> missing. It directly returns without adding the deleted references to
> txn_row->deleted_refs. So, the detination nodes will keep those weak
> references although the source side of the references are already
> deleted.  When such rows that originating weak references are created
> and deleted, more and more such useless weak reference structures
> accumulate in the memory, and can stay there until the destination rows
> are deleted. It is possible that the detination row is never deleted,
> and in such case the ovsdb-server memory keeps growing (although it is
> not strictly memory leak, because the structures are still referenced).
> 
> This problem has an impact to applications like OVN SB DB - the memory
> grows very fast in long-runing deployments and finally causes OOM.
> 
> This patch fixes it by generating deleted_refs for deleted rows in
> assess_weak_refs().
> 
> Fixes: 4dbff9f0a685 ("ovsdb: transaction: Incremental reassessment of weak 
> refs.")
> Signed-off-by: Han Zhou 
> ---
>  ovsdb/transaction.c | 11 +--
>  1 file changed, 9 insertions(+), 2 deletions(-)

Hi, Han.

I reproduced the issue by continuously adding and removing logical
switches in OVN setup that already has some other logical switches
pre-created.  Number of weak reference objects grows without bound.

The patch fixes the problem and looks good to me.

I also ran some of our scale tests with ovn-heater and observed
a bit lower memory consumption overall with the fix applied (These
tests do not really remove a lot of resources).

Good catch!  Thanks!

I slightly re-named the 'area' part of the patch subject just to
be make the look similar to other commits in that area.

With that, applied and backported down to 2.17.

Best regards, Ilya Maximets.
___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [PATCH ovn v5 3/3] northd: add drop sampling

2022-11-04 Thread 0-day Robot
Bleep bloop.  Greetings Adrian Moreno, I am a robot and I have tried out your 
patch.
Thanks for your contribution.

I encountered some error that I wasn't expecting.  See the details below.


checkpatch:
ERROR: Inappropriate spacing around cast
#77 FILE: controller/ovn-controller.c:3200:
(struct sbrec_sb_global_table *)EN_OVSDB_GET(

ERROR: Inappropriate spacing around cast
#107 FILE: controller/ovn-controller.c:3439:
(struct sbrec_sb_global_table *)EN_OVSDB_GET(

Lines checked: 1023, Warnings: 0, Errors: 2


Please check this out.  If you feel there has been an error, please email 
acon...@redhat.com

Thanks,
0-day Robot
___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [PATCH ovn v4 2/3] northd: make default drops explicit

2022-11-04 Thread Adrian Moreno



On 11/1/22 18:08, Numan Siddique wrote:

On Mon, Oct 17, 2022 at 9:15 AM Adrian Moreno  wrote:


By default, traffic that doesn't match any configured flow will be dropped.
But having that behavior implicit makes those drops more difficult to
visualize.

Make default drops explicit both as default logical flows and as default
openflow flows (e.g: for physical tables).

Signed-off-by: Adrian Moreno 


Thanks for the patch.

Can you please update the documentation in ovn-northd.8.xml about the
newly added flows ?

Also the same applies for patch 3.



Thanks Numan. I've sent v5 with some more documentation.


Thanks
Numan


---
  controller/physical.c |  45 
  northd/northd.c   |  34 +-
  tests/ovn-northd.at   |  84 ++
  tests/ovn.at  | 256 --
  4 files changed, 383 insertions(+), 36 deletions(-)

diff --git a/controller/physical.c b/controller/physical.c
index 705146316..415d16b76 100644
--- a/controller/physical.c
+++ b/controller/physical.c
@@ -833,6 +833,17 @@ put_zones_ofpacts(const struct zone_ids *zone_ids, struct 
ofpbuf *ofpacts_p)
  }
  }

+static void
+add_default_drop_flow(uint8_t table_id,
+  struct ovn_desired_flow_table *flow_table)
+{
+struct match match = MATCH_CATCHALL_INITIALIZER;
+struct ofpbuf ofpacts;
+ofpbuf_init(, 0);
+ofctrl_add_flow(flow_table, table_id, 0, 0, ,
+, hc_uuid);
+}
+
  static void
  put_local_common_flows(uint32_t dp_key,
 const struct sbrec_port_binding *pb,
@@ -2114,6 +2125,13 @@ physical_run(struct physical_ctx *p_ctx,
  }
  }

+/* Table 0, priority 0.
+ * ==
+ *
+ * Drop packets tha do not match any tunnel in_port.
+ */
+add_default_drop_flow(OFTABLE_PHY_TO_LOG, flow_table);
+
  /* Table 37, priority 150.
   * ===
   *
@@ -2159,6 +2177,13 @@ physical_run(struct physical_ctx *p_ctx,
  ofctrl_add_flow(flow_table, OFTABLE_REMOTE_OUTPUT, 0, 0, ,
  , hc_uuid);

+/* Table 38, priority 0.
+ * ==
+ *
+ * Drop packets that do not match previous flows.
+ */
+add_default_drop_flow(OFTABLE_LOCAL_OUTPUT, flow_table);
+
  /* Table 39, Priority 0.
   * ===
   *
@@ -2185,5 +2210,25 @@ physical_run(struct physical_ctx *p_ctx,
  ofctrl_add_flow(flow_table, OFTABLE_SAVE_INPORT, 0, 0, ,
  , hc_uuid);

+/* Table 65, priority 0.
+ * ==
+ *
+ * Drop packets that do not match previous flows.
+ */
+add_default_drop_flow(OFTABLE_LOG_TO_PHY, flow_table);
+
+/* Table 68, priority 0.
+ * ==
+ *
+ * Drop packets that do not match previous flows.
+ */
+add_default_drop_flow(OFTABLE_CHK_LB_HAIRPIN, flow_table);
+
+/* Table 70, priority 0.
+ * ==
+ *
+ * Drop packets that do not match previous flows.
+ */
+add_default_drop_flow(OFTABLE_CT_SNAT_HAIRPIN, flow_table);
  ofpbuf_uninit();
  }
diff --git a/northd/northd.c b/northd/northd.c
index 6771ccce5..ce4f0af56 100644
--- a/northd/northd.c
+++ b/northd/northd.c
@@ -5164,6 +5164,16 @@ ovn_lflow_add_at(struct hmap *lflow_map, struct 
ovn_datapath *od,
 io_port, ctrl_meter, stage_hint, where, hash);
  }

+static void
+__ovn_lflow_add_default_drop(struct hmap *lflow_map,
+ struct ovn_datapath *od,
+ enum ovn_stage stage,
+ const char *where)
+{
+ovn_lflow_add_at(lflow_map, od, stage, 0, "1", "drop;",
+ NULL, NULL, NULL, where );
+}
+
  /* Adds a row with the specified contents to the Logical_Flow table. */
  #define ovn_lflow_add_with_hint__(LFLOW_MAP, OD, STAGE, PRIORITY, MATCH, \
ACTIONS, IN_OUT_PORT, CTRL_METER, \
@@ -5176,6 +5186,10 @@ ovn_lflow_add_at(struct hmap *lflow_map, struct 
ovn_datapath *od,
  ovn_lflow_add_at(LFLOW_MAP, OD, STAGE, PRIORITY, MATCH, ACTIONS, \
   NULL, NULL, STAGE_HINT, OVS_SOURCE_LOCATOR)

+#define ovn_lflow_add_default_drop(LFLOW_MAP, OD, STAGE)\
+__ovn_lflow_add_default_drop(LFLOW_MAP, OD, STAGE, OVS_SOURCE_LOCATOR)
+
+
  /* This macro is similar to ovn_lflow_add_with_hint, except that it requires
   * the IN_OUT_PORT argument, which tells the lport name that appears in the
   * MATCH, which helps ovn-controller to bypass lflows parsing when the lport 
is
@@ -10983,6 +10997,9 @@ build_adm_ctrl_flows_for_lrouter(
   * Broadcast/multicast source address is invalid. */
  ovn_lflow_add(lflows, od, S_ROUTER_IN_ADMISSION, 100,
"vlan.present || eth.src[40]", "drop;");
+
+/* Default action for L2 security is to drop. */
+

[ovs-dev] [PATCH ovn v5 3/3] northd: add drop sampling

2022-11-04 Thread Adrian Moreno
Two new options are added to NB_Global table that enable drop
sampling by specifying the collector_set_id and the obs_domain_id of
the sample actions added to all drop flows.

For drops coming from an lflow, the sample has the following fields:
- obs_domain_id (32-bit): obs_domain_id << 8 | datapath_key
  - 8 most significant bits: the obs_domain_id specified in the
NB_Global options.
  - 24 least significant bits: the datapath key.
- obs_point_id: the cookie (first 32-bits of the lflow's UUID).

For drops that are inserted by ovn-controller without any associated
lflow, the sample will have the follwing fields:
- obs_domain_id (32-bit): obs_domain_id << 8
  - 8 most significant bits: the obs_domain_id specified in the
NB_Global options.
  - 24 least significant bits: 0.
- obs_point_id: The openflow table number.

Adding this configuration is not enough to make OVS sample drops. The
apropriate configuration IPFIX needs to be added to those chassis that
you wish to sample from. See man(5) ovs-vswitchd.conf for more details.

Signed-off-by: Adrian Moreno 
---
 NEWS|  2 +
 controller/ovn-controller.c | 44 +
 controller/physical.c   | 44 +
 controller/physical.h   |  6 +++
 northd/automake.mk  |  2 +
 northd/debug.c  | 98 +
 northd/debug.h  | 30 
 northd/northd.c | 77 -
 northd/ovn-northd.8.xml | 26 ++
 ovn-nb.xml  | 28 +++
 ovn-sb.xml  | 29 +++
 tests/ovn.at| 67 -
 12 files changed, 384 insertions(+), 69 deletions(-)
 create mode 100644 northd/debug.c
 create mode 100644 northd/debug.h

diff --git a/NEWS b/NEWS
index 224a7b83e..6c4573b50 100644
--- a/NEWS
+++ b/NEWS
@@ -1,5 +1,7 @@
 Post v22.09.0
 -
+  - ovn-northd: Add configuration knobs to enable drop sampling using OVS's
+per-flow IPFIX sampling.
 
 OVN v22.09.0 - 16 Sep 2022
 --
diff --git a/controller/ovn-controller.c b/controller/ovn-controller.c
index 8895c7a2b..686d5fa86 100644
--- a/controller/ovn-controller.c
+++ b/controller/ovn-controller.c
@@ -3150,6 +3150,8 @@ lflow_output_sb_meter_handler(struct engine_node *node, 
void *data)
 struct ed_type_pflow_output {
 /* Desired physical flows. */
 struct ovn_desired_flow_table flow_table;
+/* Drop debugging options. */
+struct physical_debug debug;
 };
 
 static void init_physical_ctx(struct engine_node *node,
@@ -3194,6 +3196,12 @@ static void init_physical_ctx(struct engine_node *node,
 chassis = chassis_lookup_by_name(sbrec_chassis_by_name, chassis_id);
 }
 
+struct sbrec_sb_global_table *sb_global_table =
+(struct sbrec_sb_global_table *)EN_OVSDB_GET(
+engine_get_input("SB_sb_global", node));
+const struct sbrec_sb_global *sb_global =
+sbrec_sb_global_table_first(sb_global_table);
+
 ovs_assert(br_int && chassis);
 
 struct ed_type_ct_zones *ct_zones_data =
@@ -3215,6 +3223,13 @@ static void init_physical_ctx(struct engine_node *node,
 p_ctx->local_bindings = _data->lbinding_data.bindings;
 p_ctx->patch_ofports = _vif_data->patch_ofports;
 p_ctx->chassis_tunnels = _vif_data->chassis_tunnels;
+p_ctx->debug.collector_set_id = smap_get_uint(_global->options,
+  "debug_drop_collector_set",
+  0);
+
+p_ctx->debug.obs_domain_id = smap_get_uint(_global->options,
+   "debug_drop_domain_id",
+   0);
 }
 
 static void *
@@ -3417,6 +3432,33 @@ pflow_output_activated_ports_handler(struct engine_node 
*node, void *data)
 return true;
 }
 
+static bool
+pflow_output_sb_sb_global_handler(struct engine_node *node, void *data)
+{
+struct sbrec_sb_global_table *sb_global_table =
+(struct sbrec_sb_global_table *)EN_OVSDB_GET(
+engine_get_input("SB_sb_global", node));
+const struct sbrec_sb_global *sb_global =
+sbrec_sb_global_table_first(sb_global_table);
+
+struct ed_type_pflow_output *pfo = data;
+
+uint32_t collector_set_id = smap_get_uint(_global->options,
+  "debug_drop_collector_set",
+  0);
+uint32_t obs_domain_id = smap_get_uint(_global->options,
+   "debug_drop_domain_id",
+   0);
+
+if (pfo->debug.collector_set_id != collector_set_id ||
+pfo->debug.obs_domain_id != obs_domain_id) {
+engine_set_node_state(node, EN_UPDATED);
+pfo->debug.collector_set_id = collector_set_id;
+pfo->debug.obs_domain_id = obs_domain_id;
+}
+return true;
+}
+
 static void *
 

[ovs-dev] [PATCH ovn v5 2/3] northd: make default drops explicit

2022-11-04 Thread Adrian Moreno
By default, traffic that doesn't match any configured flow will be dropped.
But having that behavior implicit makes those drops more difficult to
visualize.

Make default drops explicit both as default logical flows and as default
openflow flows (e.g: for physical tables).

Signed-off-by: Adrian Moreno 
---
 controller/physical.c   |  45 +++
 northd/northd.c |  34 +-
 northd/ovn-northd.8.xml |  40 ++-
 tests/ovn-northd.at |  84 +
 tests/ovn.at| 256 +++-
 5 files changed, 421 insertions(+), 38 deletions(-)

diff --git a/controller/physical.c b/controller/physical.c
index 705146316..415d16b76 100644
--- a/controller/physical.c
+++ b/controller/physical.c
@@ -833,6 +833,17 @@ put_zones_ofpacts(const struct zone_ids *zone_ids, struct 
ofpbuf *ofpacts_p)
 }
 }
 
+static void
+add_default_drop_flow(uint8_t table_id,
+  struct ovn_desired_flow_table *flow_table)
+{
+struct match match = MATCH_CATCHALL_INITIALIZER;
+struct ofpbuf ofpacts;
+ofpbuf_init(, 0);
+ofctrl_add_flow(flow_table, table_id, 0, 0, ,
+, hc_uuid);
+}
+
 static void
 put_local_common_flows(uint32_t dp_key,
const struct sbrec_port_binding *pb,
@@ -2114,6 +2125,13 @@ physical_run(struct physical_ctx *p_ctx,
 }
 }
 
+/* Table 0, priority 0.
+ * ==
+ *
+ * Drop packets tha do not match any tunnel in_port.
+ */
+add_default_drop_flow(OFTABLE_PHY_TO_LOG, flow_table);
+
 /* Table 37, priority 150.
  * ===
  *
@@ -2159,6 +2177,13 @@ physical_run(struct physical_ctx *p_ctx,
 ofctrl_add_flow(flow_table, OFTABLE_REMOTE_OUTPUT, 0, 0, ,
 , hc_uuid);
 
+/* Table 38, priority 0.
+ * ==
+ *
+ * Drop packets that do not match previous flows.
+ */
+add_default_drop_flow(OFTABLE_LOCAL_OUTPUT, flow_table);
+
 /* Table 39, Priority 0.
  * ===
  *
@@ -2185,5 +2210,25 @@ physical_run(struct physical_ctx *p_ctx,
 ofctrl_add_flow(flow_table, OFTABLE_SAVE_INPORT, 0, 0, ,
 , hc_uuid);
 
+/* Table 65, priority 0.
+ * ==
+ *
+ * Drop packets that do not match previous flows.
+ */
+add_default_drop_flow(OFTABLE_LOG_TO_PHY, flow_table);
+
+/* Table 68, priority 0.
+ * ==
+ *
+ * Drop packets that do not match previous flows.
+ */
+add_default_drop_flow(OFTABLE_CHK_LB_HAIRPIN, flow_table);
+
+/* Table 70, priority 0.
+ * ==
+ *
+ * Drop packets that do not match previous flows.
+ */
+add_default_drop_flow(OFTABLE_CT_SNAT_HAIRPIN, flow_table);
 ofpbuf_uninit();
 }
diff --git a/northd/northd.c b/northd/northd.c
index b7388afc5..4b1829d37 100644
--- a/northd/northd.c
+++ b/northd/northd.c
@@ -5155,6 +5155,16 @@ ovn_lflow_add_at(struct hmap *lflow_map, struct 
ovn_datapath *od,
io_port, ctrl_meter, stage_hint, where, hash);
 }
 
+static void
+__ovn_lflow_add_default_drop(struct hmap *lflow_map,
+ struct ovn_datapath *od,
+ enum ovn_stage stage,
+ const char *where)
+{
+ovn_lflow_add_at(lflow_map, od, stage, 0, "1", "drop;",
+ NULL, NULL, NULL, where );
+}
+
 /* Adds a row with the specified contents to the Logical_Flow table. */
 #define ovn_lflow_add_with_hint__(LFLOW_MAP, OD, STAGE, PRIORITY, MATCH, \
   ACTIONS, IN_OUT_PORT, CTRL_METER, \
@@ -5167,6 +5177,10 @@ ovn_lflow_add_at(struct hmap *lflow_map, struct 
ovn_datapath *od,
 ovn_lflow_add_at(LFLOW_MAP, OD, STAGE, PRIORITY, MATCH, ACTIONS, \
  NULL, NULL, STAGE_HINT, OVS_SOURCE_LOCATOR)
 
+#define ovn_lflow_add_default_drop(LFLOW_MAP, OD, STAGE)\
+__ovn_lflow_add_default_drop(LFLOW_MAP, OD, STAGE, OVS_SOURCE_LOCATOR)
+
+
 /* This macro is similar to ovn_lflow_add_with_hint, except that it requires
  * the IN_OUT_PORT argument, which tells the lport name that appears in the
  * MATCH, which helps ovn-controller to bypass lflows parsing when the lport is
@@ -10974,6 +10988,9 @@ build_adm_ctrl_flows_for_lrouter(
  * Broadcast/multicast source address is invalid. */
 ovn_lflow_add(lflows, od, S_ROUTER_IN_ADMISSION, 100,
   "vlan.present || eth.src[40]", "drop;");
+
+/* Default action for L2 security is to drop. */
+ovn_lflow_add_default_drop(lflows, od, S_ROUTER_IN_ADMISSION);
 }
 }
 
@@ -11215,6 +11232,8 @@ build_neigh_learning_flows_for_lrouter(
   "nd_ns", "put_nd(inport, ip6.src, nd.sll); next;",
   copp_meter_get(COPP_ND_NS, od->nbr->copp,
  meter_groups));

[ovs-dev] [PATCH ovn v5 1/3] actions: add sample action

2022-11-04 Thread Adrian Moreno
sample ovn action encodes into the OFPACT_SAMPLE ovs action.

OVN action allows the following parameters:

- obs_domain_id: 8-bit integer that identifies the sampling application.
  This value will be combined with the datapath's tunnel_id to form the
  final observation_domain_id that will be used in the OVS action as:
ObservationDomainID = obs_domain_id << 24 | (dp_key & 0xFF)

- obs_point_id: a 32-bit integer or the $cookie macro that will be
  expanded into the first 32 bits of the lflow's UUID.

- probability: a 16-bit integer that specifies the sampling probability.
  Specifying 0 has no effect and 65535 means sampling all packets.

- collector_set: the 32-bit id that has to be configured in OVS's
  Flow_Sample_Collector_Set table in order to configure IPFIX sampling.

Signed-off-by: Adrian Moreno 
---
 controller/lflow.c|   1 +
 include/ovn/actions.h |  16 ++
 lib/actions.c | 120 ++
 ovn-sb.xml|  52 ++
 tests/ovn.at  |  28 ++
 tests/test-ovn.c  |   3 ++
 utilities/ovn-trace.c |   2 +
 7 files changed, 222 insertions(+)

diff --git a/controller/lflow.c b/controller/lflow.c
index cc0f31db0..ad316c17f 100644
--- a/controller/lflow.c
+++ b/controller/lflow.c
@@ -1007,6 +1007,7 @@ add_matches_to_flow_table(const struct sbrec_logical_flow 
*lflow,
 .group_table = l_ctx_out->group_table,
 .meter_table = l_ctx_out->meter_table,
 .lflow_uuid = lflow->header_.uuid,
+.dp_key = ldp->datapath->tunnel_key,
 
 .pipeline = ingress ? OVNACT_P_INGRESS : OVNACT_P_EGRESS,
 .ingress_ptable = OFTABLE_LOG_INGRESS_PIPELINE,
diff --git a/include/ovn/actions.h b/include/ovn/actions.h
index d7ee84dac..009487cfc 100644
--- a/include/ovn/actions.h
+++ b/include/ovn/actions.h
@@ -121,6 +121,7 @@ struct ovn_extend_table;
 OVNACT(COMMIT_ECMP_NH,ovnact_commit_ecmp_nh)  \
 OVNACT(CHK_ECMP_NH_MAC,   ovnact_result)  \
 OVNACT(CHK_ECMP_NH,   ovnact_result)  \
+OVNACT(SAMPLE,ovnact_sample)  \
 
 /* enum ovnact_type, with a member OVNACT_ for each action. */
 enum OVS_PACKED_ENUM ovnact_type {
@@ -456,6 +457,18 @@ struct ovnact_lookup_fdb {
 struct expr_field dst; /* 1-bit destination field. */
 };
 
+/* OVNACT_SAMPLE */
+struct ovnact_sample {
+struct ovnact ovnact;
+uint16_t probability;   /* probability over UINT16_MAX. */
+uint8_t obs_domain_id;  /* most significant byte of the
+   observation domain id. The other 24 bits
+   will come from the datapath's tunnel key. */
+uint32_t collector_set_id;  /* colector_set_id. */
+uint32_t obs_point_id;  /* observation point id. */
+bool use_cookie;/* use cookie as obs_point_id */
+};
+
 /* OVNACT_COMMIT_ECMP_NH. */
 struct ovnact_commit_ecmp_nh {
 struct ovnact ovnact;
@@ -785,6 +798,9 @@ struct ovnact_encode_params {
 /* The logical flow uuid that drove this action. */
 struct uuid lflow_uuid;
 
+/* The datapath key. */
+uint32_t dp_key;
+
 /* OVN maps each logical flow table (ltable), one-to-one, onto a physical
  * OpenFlow flow table (ptable).  A number of parameters describe this
  * mapping and data related to flow tables:
diff --git a/lib/actions.c b/lib/actions.c
index adbb42db4..cef626f84 100644
--- a/lib/actions.c
+++ b/lib/actions.c
@@ -4279,6 +4279,124 @@ encode_CHECK_OUT_PORT_SEC(const struct ovnact_result 
*dl,
MLF_CHECK_PORT_SEC_BIT, ofpacts);
 }
 
+static void
+format_SAMPLE(const struct ovnact_sample *sample, struct ds *s)
+{
+ds_put_format(s, "sample(probability=%"PRId16, sample->probability);
+
+ds_put_format(s, ",collector_set=%"PRId32, sample->collector_set_id);
+ds_put_format(s, ",obs_domain=%"PRId8, sample->obs_domain_id);
+if (sample->use_cookie) {
+ds_put_cstr(s, ",obs_point=$cookie");
+} else {
+ds_put_format(s, ",obs_point=%"PRId32, sample->obs_point_id);
+}
+ds_put_format(s, ");");
+}
+
+static void
+encode_SAMPLE(const struct ovnact_sample *sample,
+  const struct ovnact_encode_params *ep,
+  struct ofpbuf *ofpacts)
+{
+struct ofpact_sample *os = ofpact_put_SAMPLE(ofpacts);
+os->probability = sample->probability;
+os->collector_set_id = sample->collector_set_id;
+os->obs_domain_id =
+(sample->obs_domain_id << 24) | (ep->dp_key & 0xFF);
+
+if (sample->use_cookie) {
+os->obs_point_id = ep->lflow_uuid.parts[0];
+} else {
+os->obs_point_id = sample->obs_point_id;
+}
+os->sampling_port = OFPP_NONE;
+}
+
+static void
+parse_sample_arg(struct action_context *ctx, struct ovnact_sample *sample)
+{
+if (lexer_match_id(ctx->lexer, "probability")) {
+if (!lexer_force_match(ctx->lexer, LEX_T_EQUALS)) {
+return;
+}
+if 

[ovs-dev] [PATCH ovn v5 0/3] Add ovn drop debugging

2022-11-04 Thread Adrian Moreno
Very often when troubleshooting networking issues in an OVN cluster one
would like to know if any packet (or a specific one) is being dropped by
OVN.

Currently, this cannot be known because of two main reasons:

1 - Implicit drops: Some tables do not have a default action
(priority=0, match=1). In this case, a packet that does not match any
rule will be silently dropped.

2 - Even on explicit drops, we only know a packet was dropped. We lack
information about that packet.

In order to improve this, this series introduces a two-fold solution:

- First, make all drops explicit:
   - northd add a default (match = "1") "drop;" action to those tables
   that currently lack one.
   - ovn-controller add an explicit drop action on those tables are not
   associated with logical flows (i.e: physical-to-logical mappings).

- Secondly, allow sampling of all drops. By introducing a new OVN
  action: "sample" (equivalent to OVS's), OVN can make OVS sample the
  packets as they are dropped. In order to be able to correlate those
  samples back to what exact rule generated them, the user specifies the
  a 8-bit observation_domain_id. Based on that, the samples contain
  the following fields:
  - obs_domain_id:
 - 8 most significant bits = the provided observation_domain_id.
 - 24 least significant bits = the datapath's tunnely key if the
   drop comes from a lflow or zero otherwise.
  - obs_point_id: the first 32-bits of the lflow's UUID (i.e: the
cookie) if the drop comes from an lflow or the table number
otherwise.

Based on the above changes in the flows, all of which are optional,
users can collect IPFIX samples of the packets that are dropped by OVN
which contain header information useful for debugging.

* Note on observation_domain_ids:
By allowing the user to specify only the 8 most significant bits of the
obs_domain_id and having OVN combine it with the datapath's tunnel key,
OVN could be extended to support more than one "sampling" application.
For instance, ACL sampling could be developed in the future and, by
specifying a different observation_domain_id, it could co-exist with the
drop sampling mode implemented in the current series while still
allowing to uniquely identify the flow that created the sample.

* Notes on testing and usage:
Any IPFIX collector that parses ObservationPointID and
ObservationDomainID fields can be used. For instance, nfdump 1.7
supports these fields in nfdump. Example of how to capture and analyze
drops:
# Enable debug sampling:
$ ovn-nbctl set NB_Global . options:debug_drop_collector_set=1 
options:debug_drop_domain_id=1
# Start nfcapd:
nfcapd -p 2055 -l nfcap &
# Configue sampling on the OVS you want to inspect:
$ ovs-vsctl --id=@br get Bridge br-int -- --id=@i create IPFIX
targets=\"172.18.0.1:2055\" --  create Flow_Sample_Collector_Set
bridge=@br id=1
# Inspect samples and figure out what LogicalFlow caused them:
$ nfdump -r nfcap -o fmt:'%line %odid %opid'
Date first seen Duration Proto  Src IP Addr:Port
Dst IP Addr:Port   PacketsBytes Flows obsDomainID   obsPointID
1970-01-01 01:09:36.000 00:00:00.000 UDP 172.18.0.1:49230 ->
239.255.255.250:190012 6356 1 0x00109 0x00d8dd23c7
1970-01-01 01:01:34.000 00:00:00.000 UDP 172.18.0.1:5353  ->
224.0.0.251:5353   16589257 1 0x00109 0x00d8dd23c7
[...]
$ ovn-sb vn-sbctl list Logical_Flow | grep -A 11 d8dd23c7
_uuid   : d8dd23c7-1451-4ea3-add7-8d68b4be4691
actions :
"sample(probability=65535,collector_set=1,obs_domain=1,obs_point=$cookie);
/* drop */"
controller_meter: []
external_ids: {source="northd.c:12504",
stage-name=lr_in_ip_input}
logical_datapath: []
logical_dp_group: 0dc1b195-c647-4277-aea0-0bad5e896f51
match   : "ip4.mcast || ip6.mcast"
pipeline: ingress
priority: 82
table_id: 3
tags: {}
hash: 0

V4 -> V5: Added documentation
V3 -> V4: Make explicit drops the default behavior.
V2 -> V3: Fix rebase problem on unit test
V1 -> V2
- Rebased and Addressed Mark's comments.
- Added NEWS section.


Adrian Moreno (3):
  actions: add sample action
  northd: make default drops explicit
  northd: add drop sampling

 NEWS|   2 +
 controller/lflow.c  |   1 +
 controller/ovn-controller.c |  44 ++
 controller/physical.c   |  77 -
 controller/physical.h   |   6 +
 include/ovn/actions.h   |  16 ++
 lib/actions.c   | 120 ++
 northd/automake.mk  |   2 +
 northd/debug.c  |  98 
 northd/debug.h  |  30 
 northd/northd.c | 109 -
 northd/ovn-northd.8.xml |  66 +++-
 ovn-nb.xml  |  28 
 ovn-sb.xml  |  81 ++
 tests/ovn-northd.at |  84 ++
 tests/ovn.at| 303 
 

[ovs-dev] [PATCH] ovs-thread: Detect changes in number of cpus

2022-11-04 Thread Adrian Moreno
Currently, things like the number of handler and revalidator threads are
calculated based on the number of available CPUs. However, this number
is considered static and only calculated once, hence ignoring events
such as cpus being hotplugged, switched on/off or affinity mask
changing.

On the other hand, checking the number of available CPUs multiple times
per second seems like an overkill.
Affinity should not change that often and, even if it does, the impact
of destroying and recreating all the threads so often is probably a
price too expensive to pay.

This patch makes the number of cpus be calculated every time 5 seconds
which seems a reasonable middle point.
It generates an impact in the main loop duration of <1% and a worst-case
scenario impact in throughput of < 5% [1].

As a result of these changes (assuming the patch is backported):
- >=2.16: a change in the cpu affinity reflects on the number of threads
  in (at most) 5 seconds.
- < 2.16: a change in the cpu affinity will be reflected on
  the number of threads the next time there is a call to
  bridge_reconfigure() (e.g: on the next DB change), and 5 seconds
  have passed.

The difference in behavior is because on older versions the thread
number calculation was done on bridge reconfiguration while newer
versions moved this logic down to the dpif layer and is run on
dpif->run() stage.
Considering it has not been a huge problem up to today and that the
cpu change would be reflected sooner or later (e.g the user could
force a recalculation with a simple ovs-vsctl command), I think it
might be OK to leave like that.

[1] Tested in the worst-case scenario of disabling the kernel cache
(other_config:flow-size=0), modifying ovs-vswithd's affinity so the
number of handlers go up and down every 5 seconds and calculated the
difference in netperf's ops/sec.

Fixes: be15ec48d766 ("lib: Use a more accurate value for CPU count 
(sched_getaffinity).")
Cc: david.march...@redhat.com
Signed-off-by: Adrian Moreno 
---
 lib/ovs-thread.c | 61 +---
 1 file changed, 37 insertions(+), 24 deletions(-)

diff --git a/lib/ovs-thread.c b/lib/ovs-thread.c
index 78ed3e970..4709f7ead 100644
--- a/lib/ovs-thread.c
+++ b/lib/ovs-thread.c
@@ -31,6 +31,7 @@
 #include "openvswitch/poll-loop.h"
 #include "seq.h"
 #include "socket-util.h"
+#include "timeval.h"
 #include "util.h"
 
 #ifdef __CHECKER__
@@ -627,42 +628,54 @@ ovs_thread_stats_next_bucket(const struct ovsthread_stats 
*stats, size_t i)
 }
 
 
-/* Returns the total number of cores available to this process, or 0 if the
- * number cannot be determined. */
-int
-count_cpu_cores(void)
+static int
+count_cpu_cores__(void)
 {
-static struct ovsthread_once once = OVSTHREAD_ONCE_INITIALIZER;
-static long int n_cores;
+long int n_cores;
 
-if (ovsthread_once_start()) {
 #ifndef _WIN32
-n_cores = sysconf(_SC_NPROCESSORS_ONLN);
+n_cores = sysconf(_SC_NPROCESSORS_ONLN);
+#else
+SYSTEM_INFO sysinfo;
+GetSystemInfo();
+n_cores = sysinfo.dwNumberOfProcessors;
+#endif
 #ifdef __linux__
-if (n_cores > 0) {
-cpu_set_t *set = CPU_ALLOC(n_cores);
+if (n_cores > 0) {
+cpu_set_t *set = CPU_ALLOC(n_cores);
 
-if (set) {
-size_t size = CPU_ALLOC_SIZE(n_cores);
+if (set) {
+size_t size = CPU_ALLOC_SIZE(n_cores);
 
-if (!sched_getaffinity(0, size, set)) {
-n_cores = CPU_COUNT_S(size, set);
-}
-CPU_FREE(set);
+if (!sched_getaffinity(0, size, set)) {
+n_cores = CPU_COUNT_S(size, set);
 }
+CPU_FREE(set);
 }
-#endif
-#else
-SYSTEM_INFO sysinfo;
-GetSystemInfo();
-n_cores = sysinfo.dwNumberOfProcessors;
-#endif
-ovsthread_once_done();
 }
-
+#endif
 return n_cores > 0 ? n_cores : 0;
 }
 
+/* It's unlikely that the available cpus change several times per second and
+ * even if it does, it's not needed (or desired) to react to such changes so
+ * quickly.*/
+#define COUNT_CPU_UPDATE_TIME_MS 5000
+/* Returns the current total number of cores available to this process, or 0
+ * if the number cannot be determined.
+ * It is assumed that this function is only called from the main thread.*/
+int count_cpu_cores(void) {
+static int cpu_cores;
+static long long int last_updated = 0;
+long long int now = time_msec();
+
+if (now - last_updated >= COUNT_CPU_UPDATE_TIME_MS) {
+last_updated = now;
+cpu_cores = count_cpu_cores__();
+}
+return cpu_cores;
+}
+
 /* Returns the total number of cores on the system, or 0 if the
  * number cannot be determined. */
 int
-- 
2.37.3

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH] rculist: Fix iteration macros.

2022-11-04 Thread Ilya Maximets
Some macros for rculist have no users and there are no unit tests
specific to that library as well, so broken code wasn't spotted
while updating to multi-variable iterators.

Fixing multiple problems like missing commas, parenthesis, incorrect
variable and macro names.

Fixes: d293965d7b06 ("rculist: use multi-variable helpers for loop macros.")
Reported-by: Subrata Nath 
Co-authored-by: Dumitru Ceara 
Signed-off-by: Dumitru Ceara 
Signed-off-by: Ilya Maximets 
---
 lib/rculist.h | 18 +-
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/lib/rculist.h b/lib/rculist.h
index c0d77acf9..9bb8cbf3e 100644
--- a/lib/rculist.h
+++ b/lib/rculist.h
@@ -380,18 +380,18 @@ rculist_is_singleton_protected(const struct rculist *list)
 #define RCULIST_FOR_EACH_REVERSE_PROTECTED(ITER, MEMBER, RCULIST) \
 for (INIT_MULTIVAR(ITER, MEMBER, (RCULIST)->prev, struct rculist);\
  CONDITION_MULTIVAR(ITER, MEMBER, ITER_VAR(ITER) != (RCULIST));   \
- UPDATE_MULTIVAR(ITER, ITER_VAR(VAR).prev))
+ UPDATE_MULTIVAR(ITER, ITER_VAR(ITER)->prev))
 
 #define RCULIST_FOR_EACH_REVERSE_PROTECTED_CONTINUE(ITER, MEMBER, RCULIST)\
 for (INIT_MULTIVAR(ITER, MEMBER, (ITER)->MEMBER.prev, struct rculist);\
  CONDITION_MULTIVAR(ITER, MEMBER, ITER_VAR(ITER) != (RCULIST));   \
- UPDATE_MULTIVAR(ITER, ITER_VAR(VAR).prev))
+ UPDATE_MULTIVAR(ITER, ITER_VAR(ITER)->prev))
 
 #define RCULIST_FOR_EACH_PROTECTED(ITER, MEMBER, RCULIST) \
 for (INIT_MULTIVAR(ITER, MEMBER, rculist_next_protected(RCULIST), \
struct rculist);   \
  CONDITION_MULTIVAR(ITER, MEMBER, ITER_VAR(ITER) != (RCULIST));   \
- UPDATE_MULTIVAR(ITER, rculist_next_protected(ITER_VAR(ITER)))\
+ UPDATE_MULTIVAR(ITER, rculist_next_protected(ITER_VAR(ITER   \
 
 #define RCULIST_FOR_EACH_SAFE_SHORT_PROTECTED(ITER, MEMBER, RCULIST)  \
 for (INIT_MULTIVAR_SAFE_SHORT(ITER, MEMBER,   \
@@ -399,18 +399,18 @@ rculist_is_singleton_protected(const struct rculist *list)
   struct rculist);\
  CONDITION_MULTIVAR_SAFE_SHORT(ITER, MEMBER,  \
ITER_VAR(ITER) != (RCULIST),   \
- ITER_NEXT_VAR(ITER) = rculist_next_protected(ITER_VAR(VAR)));\
-UPDATE_MULTIVAR_SHORT(ITER))
+ ITER_NEXT_VAR(ITER) = rculist_next_protected(ITER_VAR(ITER)));   \
+UPDATE_MULTIVAR_SAFE_SHORT(ITER))
 
 #define RCULIST_FOR_EACH_SAFE_LONG_PROTECTED(ITER, NEXT, MEMBER, RCULIST) \
 for (INIT_MULTIVAR_SAFE_LONG(ITER, NEXT, MEMBER,  \
- rculist_next_protected(RCULIST)  \
+ rculist_next_protected(RCULIST), \
  struct rculist); \
- CONDITION_MULTIVAR_SAFE_LONG(VAR, NEXT, MEMBER   \
+ CONDITION_MULTIVAR_SAFE_LONG(ITER, NEXT, MEMBER, \
   ITER_VAR(ITER) != (RCULIST),\
- ITER_VAR(NEXT) = rculist_next_protected(ITER_VAR(VAR)),  \
+ ITER_VAR(NEXT) = rculist_next_protected(ITER_VAR(ITER)), \
   ITER_VAR(NEXT) != (RCULIST));   \
-UPDATE_MULTIVAR_LONG(ITER))
+UPDATE_MULTIVAR_SAFE_LONG(ITER, NEXT))
 
 #define RCULIST_FOR_EACH_SAFE_PROTECTED(...)  \
 OVERLOAD_SAFE_MACRO(RCULIST_FOR_EACH_SAFE_LONG_PROTECTED, \
-- 
2.37.3

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [PATCH ovn] northd: Refactor build_lrouter_nat_flows_for_lb function

2022-11-04 Thread Ales Musil
On Fri, Nov 4, 2022 at 2:57 PM Mark Michelson  wrote:

> Hi Ales,
>

Hi Mark,
thank you for the review.


>
> On 10/20/22 10:14, Ales Musil wrote:
> > To avoid make it easier to add flow to this stage refactor
> > the function, this has also the benefit that we should
> > see fewer allocations due to rearrange how we create flows
> > and how do we manipulate with match string.
>
> This commit message was kind of hard to follow. I think it could be
> rewritten as:
>
> "To make it easier to add flows to this stage, refactor the function.
> This also has the benefit that we should see fewer allocations due to
> rearranging how we create flows and how we manipulate the match string."
>

I am not sure what was going through my mind, but I am completely fine with
changing the
commit message to your suggestion. So if there is anything that will need
to be changed I'll update it in v2.
If not, perhaps the maintainer that will apply could change it.


>
> Other than that, I like that you restricted some of the more unorthodox
> cases of dynamic string use to within small functions. It helps to
> preserve the performance improvements while not not adding any
> unnecessary "surprises" for most developers.
>
> Acked-by: Mark Michelson 
>
> >
> > Signed-off-by: Ales Musil 
> > ---
> >   northd/northd.c | 377 ++--
> >   1 file changed, 173 insertions(+), 204 deletions(-)
> >
> > diff --git a/northd/northd.c b/northd/northd.c
> > index 6771ccce5..42b9d6272 100644
> > --- a/northd/northd.c
> > +++ b/northd/northd.c
> > @@ -9923,50 +9923,125 @@ get_force_snat_ip(struct ovn_datapath *od,
> const char *key_type,
> >   return true;
> >   }
> >
> > +#define LROUTER_NAT_LB_FLOW_INIT(MATCH, ACTION, PRIO) \
> > +(struct lrouter_nat_lb_flow)  \
> > +{ .action = (ACTION), .lflow_ref = NULL,  \
> > +  .hash = ovn_logical_flow_hash(  \
> > +  ovn_stage_get_table(S_ROUTER_IN_DNAT),  \
> > +  ovn_stage_get_pipeline(S_ROUTER_IN_DNAT),   \
> > +  (PRIO), ds_cstr(MATCH), (ACTION)) }
> > +
> > +enum lrouter_nat_lb_flow_type {
> > +LROUTER_NAT_LB_FLOW_NORMAL = 0,
> > +LROUTER_NAT_LB_FLOW_SKIP_SNAT,
> > +LROUTER_NAT_LB_FLOW_FORCE_SNAT,
> > +LROUTER_NAT_LB_FLOW_MAX,
> > +};
> > +
> > +struct lrouter_nat_lb_flow {
> > +char *action;
> > +struct ovn_lflow *lflow_ref;
> > +
> > +uint32_t hash;
> > +};
> > +
> > +struct lrouter_nat_lb_flows_ctx {
> > +struct lrouter_nat_lb_flow new[LROUTER_NAT_LB_FLOW_MAX];
> > +struct lrouter_nat_lb_flow est[LROUTER_NAT_LB_FLOW_MAX];
> > +
> > +struct ds *new_match;
> > +struct ds *est_match;
> > +struct ds *undnat_match;
> > +
> > +struct ovn_lb_vip *lb_vip;
> > +struct ovn_northd_lb *lb;
> > +bool reject;
> > +
> > +int prio;
> > +
> > +struct hmap *lflows;
> > +const struct shash *meter_groups;
> > +};
> > +
> >   static void
> > -build_gw_lrouter_nat_flows_for_lb(struct ovn_northd_lb *lb,
> > -  struct ovn_datapath **dplist, int
> n_dplist,
> > -  bool reject, char *new_match,
> > -  char *new_action, char *est_match,
> > -  char *est_action, struct hmap *lflows,
> > -  int prio, const struct shash
> *meter_groups)
> > +build_distr_lrouter_nat_flows_for_lb(struct lrouter_nat_lb_flows_ctx
> *ctx,
> > + enum lrouter_nat_lb_flow_type type,
> > + struct ovn_datapath *od)
> >   {
> > -if (!n_dplist) {
> > +char *gw_action = od->is_gw_router ? "ct_dnat;" :
> "ct_dnat_in_czone;";
> > +/* Store the match lengths, so we can reuse the ds buffer. */
> > +size_t new_match_len = ctx->new_match->length;
> > +size_t est_match_len = ctx->est_match->length;
> > +size_t undnat_match_len = ctx->undnat_match->length;
> > +
> > +
> > +const char *meter = NULL;
> > +
> > +if (ctx->reject) {
> > +meter = copp_meter_get(COPP_REJECT, od->nbr->copp,
> ctx->meter_groups);
> > +}
> > +
> > +if (ctx->lb_vip->n_backends || !ctx->lb_vip->empty_backend_rej) {
> > +ds_put_format(ctx->new_match, " && is_chassis_resident(%s)",
> > +  od->l3dgw_ports[0]->cr_port->json_key);
> > +ds_put_format(ctx->est_match, " && is_chassis_resident(%s)",
> > +  od->l3dgw_ports[0]->cr_port->json_key);
> > +}
> > +
> > +ovn_lflow_add_with_hint__(ctx->lflows, od, S_ROUTER_IN_DNAT,
> ctx->prio,
> > +  ds_cstr(ctx->new_match),
> ctx->new[type].action,
> > +  NULL, meter, >lb->nlb->header_);
> > +ovn_lflow_add_with_hint(ctx->lflows, od, S_ROUTER_IN_DNAT,
> ctx->prio,
> > +ds_cstr(ctx->est_match),
> ctx->est[type].action,
> > +   

Re: [ovs-dev] [PATCH ovn] northd: Refactor build_lrouter_nat_flows_for_lb function

2022-11-04 Thread Mark Michelson

Hi Ales,

On 10/20/22 10:14, Ales Musil wrote:

To avoid make it easier to add flow to this stage refactor
the function, this has also the benefit that we should
see fewer allocations due to rearrange how we create flows
and how do we manipulate with match string.


This commit message was kind of hard to follow. I think it could be 
rewritten as:


"To make it easier to add flows to this stage, refactor the function. 
This also has the benefit that we should see fewer allocations due to 
rearranging how we create flows and how we manipulate the match string."


Other than that, I like that you restricted some of the more unorthodox 
cases of dynamic string use to within small functions. It helps to 
preserve the performance improvements while not not adding any 
unnecessary "surprises" for most developers.


Acked-by: Mark Michelson 



Signed-off-by: Ales Musil 
---
  northd/northd.c | 377 ++--
  1 file changed, 173 insertions(+), 204 deletions(-)

diff --git a/northd/northd.c b/northd/northd.c
index 6771ccce5..42b9d6272 100644
--- a/northd/northd.c
+++ b/northd/northd.c
@@ -9923,50 +9923,125 @@ get_force_snat_ip(struct ovn_datapath *od, const char 
*key_type,
  return true;
  }
  
+#define LROUTER_NAT_LB_FLOW_INIT(MATCH, ACTION, PRIO) \

+(struct lrouter_nat_lb_flow)  \
+{ .action = (ACTION), .lflow_ref = NULL,  \
+  .hash = ovn_logical_flow_hash(  \
+  ovn_stage_get_table(S_ROUTER_IN_DNAT),  \
+  ovn_stage_get_pipeline(S_ROUTER_IN_DNAT),   \
+  (PRIO), ds_cstr(MATCH), (ACTION)) }
+
+enum lrouter_nat_lb_flow_type {
+LROUTER_NAT_LB_FLOW_NORMAL = 0,
+LROUTER_NAT_LB_FLOW_SKIP_SNAT,
+LROUTER_NAT_LB_FLOW_FORCE_SNAT,
+LROUTER_NAT_LB_FLOW_MAX,
+};
+
+struct lrouter_nat_lb_flow {
+char *action;
+struct ovn_lflow *lflow_ref;
+
+uint32_t hash;
+};
+
+struct lrouter_nat_lb_flows_ctx {
+struct lrouter_nat_lb_flow new[LROUTER_NAT_LB_FLOW_MAX];
+struct lrouter_nat_lb_flow est[LROUTER_NAT_LB_FLOW_MAX];
+
+struct ds *new_match;
+struct ds *est_match;
+struct ds *undnat_match;
+
+struct ovn_lb_vip *lb_vip;
+struct ovn_northd_lb *lb;
+bool reject;
+
+int prio;
+
+struct hmap *lflows;
+const struct shash *meter_groups;
+};
+
  static void
-build_gw_lrouter_nat_flows_for_lb(struct ovn_northd_lb *lb,
-  struct ovn_datapath **dplist, int n_dplist,
-  bool reject, char *new_match,
-  char *new_action, char *est_match,
-  char *est_action, struct hmap *lflows,
-  int prio, const struct shash *meter_groups)
+build_distr_lrouter_nat_flows_for_lb(struct lrouter_nat_lb_flows_ctx *ctx,
+ enum lrouter_nat_lb_flow_type type,
+ struct ovn_datapath *od)
  {
-if (!n_dplist) {
+char *gw_action = od->is_gw_router ? "ct_dnat;" : "ct_dnat_in_czone;";
+/* Store the match lengths, so we can reuse the ds buffer. */
+size_t new_match_len = ctx->new_match->length;
+size_t est_match_len = ctx->est_match->length;
+size_t undnat_match_len = ctx->undnat_match->length;
+
+
+const char *meter = NULL;
+
+if (ctx->reject) {
+meter = copp_meter_get(COPP_REJECT, od->nbr->copp, ctx->meter_groups);
+}
+
+if (ctx->lb_vip->n_backends || !ctx->lb_vip->empty_backend_rej) {
+ds_put_format(ctx->new_match, " && is_chassis_resident(%s)",
+  od->l3dgw_ports[0]->cr_port->json_key);
+ds_put_format(ctx->est_match, " && is_chassis_resident(%s)",
+  od->l3dgw_ports[0]->cr_port->json_key);
+}
+
+ovn_lflow_add_with_hint__(ctx->lflows, od, S_ROUTER_IN_DNAT, ctx->prio,
+  ds_cstr(ctx->new_match), ctx->new[type].action,
+  NULL, meter, >lb->nlb->header_);
+ovn_lflow_add_with_hint(ctx->lflows, od, S_ROUTER_IN_DNAT, ctx->prio,
+ds_cstr(ctx->est_match), ctx->est[type].action,
+>lb->nlb->header_);
+
+ds_truncate(ctx->new_match, new_match_len);
+ds_truncate(ctx->est_match, est_match_len);
+
+if (!ctx->lb_vip->n_backends) {
  return;
  }
  
-struct ovn_lflow *lflow_ref_new = NULL, *lflow_ref_est = NULL;

-uint32_t hash_new = ovn_logical_flow_hash(
-ovn_stage_get_table(S_ROUTER_IN_DNAT),
-ovn_stage_get_pipeline(S_ROUTER_IN_DNAT),
-prio, new_match, new_action);
-uint32_t hash_est = ovn_logical_flow_hash(
-ovn_stage_get_table(S_ROUTER_IN_DNAT),
-ovn_stage_get_pipeline(S_ROUTER_IN_DNAT),
-prio, est_match, est_action);
+char *action = type == LROUTER_NAT_LB_FLOW_NORMAL
+   ? gw_action : 

[ovs-dev] [PATCH] flow: Consistent VXLAN UDP src ports for fragmented packets

2022-11-04 Thread Hemanth Aramadaka via dev
Issue:

The src-port for UDP is based on RSS hash in the packet metadata.
In case of packets coming from VM it will be 5-tuple, if available,
otherwise just IP addresses.If the VM fragments a large IP packet
and sends the fragments to ovs, only the first fragment will contain
the L4 header. Therefore, the first fragment and subsequent fragments
get different UDP src ports in the outgoing VXLAN header.This can
lead to fragment re-ordering in the fabric as packet will take
different paths.

Fix:

Intention of this is to avoid fragment packets taking different paths.
For example, due to presence of firewalls, fragment packets will take
different paths and will get dropped.To avoid this we ignore the L4
header during hash calculation only in the case of fragmented packets.

Signed-off-by: Hemanth Aramadaka 
---
 lib/flow.c | 20 +---
 1 file changed, 17 insertions(+), 3 deletions(-)

diff --git a/lib/flow.c b/lib/flow.c
index c3a3aa3ce..20cca5937 100644
--- a/lib/flow.c
+++ b/lib/flow.c
@@ -1018,7 +1018,9 @@ miniflow_extract(struct dp_packet *packet, struct 
miniflow *dst)
 miniflow_push_be16(mf, ct_tp_src, ct_tp_src);
 miniflow_push_be16(mf, ct_tp_dst, ct_tp_dst);
 if (dl_type == htons(ETH_TYPE_IP)) {
-dp_packet_update_rss_hash_ipv4_tcp_udp(packet);
+if (!(nw_frag & FLOW_NW_FRAG_MASK)) {
+dp_packet_update_rss_hash_ipv4_tcp_udp(packet);
+}
 } else if (dl_type == htons(ETH_TYPE_IPV6)) {
 dp_packet_update_rss_hash_ipv6_tcp_udp(packet);
 }
@@ -1033,7 +1035,9 @@ miniflow_extract(struct dp_packet *packet, struct 
miniflow *dst)
 miniflow_push_be16(mf, ct_tp_src, ct_tp_src);
 miniflow_push_be16(mf, ct_tp_dst, ct_tp_dst);
 if (dl_type == htons(ETH_TYPE_IP)) {
-dp_packet_update_rss_hash_ipv4_tcp_udp(packet);
+if (!(nw_frag & FLOW_NW_FRAG_MASK)) {
+dp_packet_update_rss_hash_ipv4_tcp_udp(packet);
+}
 } else if (dl_type == htons(ETH_TYPE_IPV6)) {
 dp_packet_update_rss_hash_ipv6_tcp_udp(packet);
 }
@@ -2248,7 +2252,7 @@ miniflow_hash_5tuple(const struct miniflow *flow, 
uint32_t basis)
 
 if (flow) {
 ovs_be16 dl_type = MINIFLOW_GET_BE16(flow, dl_type);
-uint8_t nw_proto;
+uint8_t nw_proto, nw_frag;
 
 if (dl_type == htons(ETH_TYPE_IPV6)) {
 struct flowmap map = FLOWMAP_EMPTY_INITIALIZER;
@@ -2270,6 +2274,11 @@ miniflow_hash_5tuple(const struct miniflow *flow, 
uint32_t basis)
 
 nw_proto = MINIFLOW_GET_U8(flow, nw_proto);
 hash = hash_add(hash, nw_proto);
+
+nw_frag = MINIFLOW_GET_U8(flow, nw_frag);
+if (nw_frag & FLOW_NW_FRAG_MASK) {
+goto out;
+}
 if (nw_proto != IPPROTO_TCP && nw_proto != IPPROTO_UDP
 && nw_proto != IPPROTO_SCTP && nw_proto != IPPROTO_ICMP
 && nw_proto != IPPROTO_ICMPV6) {
@@ -2292,6 +2301,7 @@ flow_hash_5tuple(const struct flow *flow, uint32_t basis)
 {
 BUILD_ASSERT_DECL(FLOW_WC_SEQ == 42);
 uint32_t hash = basis;
+uint8_t nw_frag;
 
 if (flow) {
 
@@ -2312,6 +2322,10 @@ flow_hash_5tuple(const struct flow *flow, uint32_t basis)
 }
 
 hash = hash_add(hash, flow->nw_proto);
+nw_frag = MINIFLOW_GET_U8(flow, nw_frag);
+if (nw_frag & FLOW_NW_FRAG_MASK) {
+goto out;
+}
 if (flow->nw_proto != IPPROTO_TCP && flow->nw_proto != IPPROTO_UDP
 && flow->nw_proto != IPPROTO_SCTP && flow->nw_proto != IPPROTO_ICMP
 && flow->nw_proto != IPPROTO_ICMPV6) {
-- 
2.34.1

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [PATCH] flow: Consistent VXLAN UDP src ports for fragmented packets

2022-11-04 Thread Ilya Maximets
On 11/4/22 13:58, Hemanth Aramadaka via dev wrote:
> Issue:
> 
> The src-port for UDP is based on RSS hash in the packet metadata.
> In case of packets coming from VM it will be 5-tuple, if available,
> otherwise just IP addresses.If the VM fragments a large IP packet
> and sends the fragments to ovs, only the first fragment will contain
> the L4 header. Therefore, the first fragment and subsequent fragments
> get different UDP src ports in the outgoing VXLAN header.This can
> lead to fragment re-ordering in the fabric as packet will take
> different paths.
> 
> Fix:
> 
> Intention of this is to avoid fragment packets taking different paths.
> For example, due to presence of firewalls, fragment packets will take
> different paths and will get dropped.To avoid this we ignore the L4
> header during hash calculation only in the case of fragmented packets.
> 
> Signed-off-by: Hemanth Aramadaka 
> ---
>  lib/flow.c | 17 ++---
>  1 file changed, 14 insertions(+), 3 deletions(-)
> 
> diff --git a/lib/flow.c b/lib/flow.c
> index c3a3aa3ce..e8a2dc74e 100644
> --- a/lib/flow.c
> +++ b/lib/flow.c
> @@ -1018,7 +1018,9 @@ miniflow_extract(struct dp_packet *packet, struct 
> miniflow *dst)
>  miniflow_push_be16(mf, ct_tp_src, ct_tp_src);
>  miniflow_push_be16(mf, ct_tp_dst, ct_tp_dst);
>  if (dl_type == htons(ETH_TYPE_IP)) {
> -dp_packet_update_rss_hash_ipv4_tcp_udp(packet);
> +if (!(nw_frag & FLOW_NW_FRAG_MASK)) {
> +dp_packet_update_rss_hash_ipv4_tcp_udp(packet);
> +}
>  } else if (dl_type == htons(ETH_TYPE_IPV6)) {
>  dp_packet_update_rss_hash_ipv6_tcp_udp(packet);
>  }
> @@ -1033,7 +1035,9 @@ miniflow_extract(struct dp_packet *packet, struct 
> miniflow *dst)
>  miniflow_push_be16(mf, ct_tp_src, ct_tp_src);
>  miniflow_push_be16(mf, ct_tp_dst, ct_tp_dst);
>  if (dl_type == htons(ETH_TYPE_IP)) {
> -dp_packet_update_rss_hash_ipv4_tcp_udp(packet);
> +if (!(nw_frag & FLOW_NW_FRAG_MASK)) {
> +dp_packet_update_rss_hash_ipv4_tcp_udp(packet);
> +}
>  } else if (dl_type == htons(ETH_TYPE_IPV6)) {
>  dp_packet_update_rss_hash_ipv6_tcp_udp(packet);
>  }
> @@ -2248,7 +2252,7 @@ miniflow_hash_5tuple(const struct miniflow *flow, 
> uint32_t basis)
>  
>  if (flow) {
>  ovs_be16 dl_type = MINIFLOW_GET_BE16(flow, dl_type);
> -uint8_t nw_proto;
> +uint8_t nw_proto, nw_frag = 0;
>  
>  if (dl_type == htons(ETH_TYPE_IPV6)) {
>  struct flowmap map = FLOWMAP_EMPTY_INITIALIZER;
> @@ -2270,6 +2274,9 @@ miniflow_hash_5tuple(const struct miniflow *flow, 
> uint32_t basis)
>  
>  nw_proto = MINIFLOW_GET_U8(flow, nw_proto);
>  hash = hash_add(hash, nw_proto);
> +if (nw_frag & FLOW_NW_FRAG_MASK) {

Now the nw_frag is initialized, but it is always zero here, so that
code makes no sense.  You need to read the value from the miniflow.

Also, please, add the version number to the next patch, i.e. the subject
prefix for the next patch should be '[PATCH v5]'.

Best regards, Ilya Maximets.
___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [PATCH] flow: Consistent VXLAN UDP src ports for fragmented packets

2022-11-04 Thread Ilya Maximets
On 11/4/22 14:03, Hemanth Aramadaka wrote:
> Hi ,
> 
> Thanks for the review and comments. I have addressed the comments and raised 
> the review request again. 
> 
> Ran these tests (tunnel_push_pop - packet_out and tunnel_push_pop - 
> packet_out debug_slow   ). I am attaching the test results as well here.  

Hi, sorry for not being clear, but I was talking about
creating a new test, not just running existing ones.
Existing tests do not cover the use case you're trying
to fix.

Best regards, Ilya Maximets.

> 
> 
> R620-10-CSSCI-5:/home/sdn/zarahem/ovs # make check TESTSUITEFLAGS='795'
> make  check-am
> make[1]: Entering directory '/home/sdn/zarahem/ovs'
> make  utilities/ovs-appctl-bashcomp.bash utilities/ovs-vsctl-bashcomp.bash 
> tests/atlocal tests/testpki-cacert.pem tests/testpki-cert.pem 
> tests/testpki-privkey.pem tests/testpki-req.pem tests/testpki-cert2.pem 
> tests/testpki-privkey2.pem tests/testpki-req2.pem
> make[2]: Entering directory '/home/sdn/zarahem/ovs'
> make[2]: Nothing to be done for 'utilities/ovs-appctl-bashcomp.bash'.
> make[2]: Nothing to be done for 'utilities/ovs-vsctl-bashcomp.bash'.
> make[2]: 'tests/atlocal' is up to date.
> make[2]: 'tests/testpki-cacert.pem' is up to date.
> make[2]: 'tests/testpki-cert.pem' is up to date.
> make[2]: 'tests/testpki-privkey.pem' is up to date.
> make[2]: 'tests/testpki-req.pem' is up to date.
> make[2]: 'tests/testpki-cert2.pem' is up to date.
> make[2]: 'tests/testpki-privkey2.pem' is up to date.
> make[2]: 'tests/testpki-req2.pem' is up to date.
> make[2]: Leaving directory '/home/sdn/zarahem/ovs'
> make  check-local
> make[2]: Entering directory '/home/sdn/zarahem/ovs'
> set /bin/sh './tests/testsuite' -C tests 
> AUTOTEST_PATH=utilities:vswitchd:ovsdb:vtep:tests:ipsec::; \
> "$@" 795 || \
> (test -z "$(find /home/sdn/zarahem/ovs/tests/testsuite.dir -name 'asan.*')" 
> && \
>  test -z "$(find /home/sdn/zarahem/ovs/tests/testsuite.dir -name 'ubsan.*')" 
> && \
>  test X'' = Xyes && "$@" --recheck)
> Illegal "police"
> ## -- ##
> ## openvswitch 3.0.90 test suite. ##
> ## -- ##
> 795: tunnel_push_pop - packet_outok
> 
> ## - ##
> ## Test results. ##
> ## - ##
> 
> 1 test was successful.
> make[2]: Leaving directory '/home/sdn/zarahem/ovs'
> make[1]: Leaving directory '/home/sdn/zarahem/ovs'
> R620-10-CSSCI-5:/home/sdn/zarahem/ovs # make check TESTSUITEFLAGS='796'
> make  check-am
> make[1]: Entering directory '/home/sdn/zarahem/ovs'
> make  utilities/ovs-appctl-bashcomp.bash utilities/ovs-vsctl-bashcomp.bash 
> tests/atlocal tests/testpki-cacert.pem tests/testpki-cert.pem 
> tests/testpki-privkey.pem tests/testpki-req.pem tests/testpki-cert2.pem 
> tests/testpki-privkey2.pem tests/testpki-req2.pem
> make[2]: Entering directory '/home/sdn/zarahem/ovs'
> make[2]: Nothing to be done for 'utilities/ovs-appctl-bashcomp.bash'.
> make[2]: Nothing to be done for 'utilities/ovs-vsctl-bashcomp.bash'.
> make[2]: 'tests/atlocal' is up to date.
> make[2]: 'tests/testpki-cacert.pem' is up to date.
> make[2]: 'tests/testpki-cert.pem' is up to date.
> make[2]: 'tests/testpki-privkey.pem' is up to date.
> make[2]: 'tests/testpki-req.pem' is up to date.
> make[2]: 'tests/testpki-cert2.pem' is up to date.
> make[2]: 'tests/testpki-privkey2.pem' is up to date.
> make[2]: 'tests/testpki-req2.pem' is up to date.
> make[2]: Leaving directory '/home/sdn/zarahem/ovs'
> make  check-local
> make[2]: Entering directory '/home/sdn/zarahem/ovs'
> set /bin/sh './tests/testsuite' -C tests 
> AUTOTEST_PATH=utilities:vswitchd:ovsdb:vtep:tests:ipsec::; \
> "$@" 796 || \
> (test -z "$(find /home/sdn/zarahem/ovs/tests/testsuite.dir -name 'asan.*')" 
> && \
>  test -z "$(find /home/sdn/zarahem/ovs/tests/testsuite.dir -name 'ubsan.*')" 
> && \
>  test X'' = Xyes && "$@" --recheck)
> Illegal "police"
> ## -- ##
> ## openvswitch 3.0.90 test suite. ##
> ## -- ##
> 796: tunnel_push_pop - packet_out debug_slow ok
> 
> ## - ##
> ## Test results. ##
> ## - ##
> 
> 1 test was successful.
> make[2]: Leaving directory '/home/sdn/zarahem/ovs'
> make[1]: Leaving directory '/home/sdn/zarahem/ovs'
> 
> 
> Thanks,
> Hemanth.
> 
> -Original Message-
> From: Ilya Maximets  
> Sent: 04 November 2022 03:56
> To: Hemanth Aramadaka ; ovs-dev@openvswitch.org
> Cc: i.maxim...@ovn.org
> Subject: Re: [ovs-dev] [PATCH] flow: Consistent VXLAN UDP src ports for 
> fragmented packets
> 
> On 11/3/22 13:11, Hemanth Aramadaka via dev wrote:
>> Issue:
>>
>> The src-port for UDP is based on RSS hash in the packet metadata.
>> In case of packets coming from VM it will be 5-tuple, if available, 
>> otherwise just IP addresses.If the VM fragments a large IP packet and 
>> sends the fragments to ovs, only the first fragment will contain the 
>> L4 header. Therefore, the first fragment and subsequent fragments get 
>> 

[ovs-dev] [PATCH ovn] binding: add the capability to apply QoS for lsp

2022-11-04 Thread Lorenzo Bianconi
Introduce the capability to apply QoS rules for logical switch ports
claimed by ovn-controller. Rely on shash instead of sset for
egress_ifaces.

Reported-at: https://bugzilla.redhat.com/show_bug.cgi?id=2129742
Signed-off-by: Lorenzo Bianconi 
---
 controller/binding.c| 78 +++--
 controller/binding.h|  2 +-
 controller/ovn-controller.c |  9 ++---
 tests/system-ovn.at | 27 +
 4 files changed, 89 insertions(+), 27 deletions(-)

diff --git a/controller/binding.c b/controller/binding.c
index c3d2b2e42..6e596e6ca 100644
--- a/controller/binding.c
+++ b/controller/binding.c
@@ -115,6 +115,7 @@ struct qos_queue {
 uint32_t min_rate;
 uint32_t max_rate;
 uint32_t burst;
+char *port_name;
 };
 
 void
@@ -147,6 +148,8 @@ static void update_lport_tracking(const struct 
sbrec_port_binding *pb,
   struct hmap *tracked_dp_bindings,
   bool claimed);
 
+static bool is_lport_vif(const struct sbrec_port_binding *pb);
+
 static void
 get_qos_params(const struct sbrec_port_binding *pb, struct hmap *queue_map)
 {
@@ -166,6 +169,7 @@ get_qos_params(const struct sbrec_port_binding *pb, struct 
hmap *queue_map)
 node->max_rate = max_rate;
 node->burst = burst;
 node->queue_id = queue_id;
+node->port_name = xstrdup(pb->logical_port);
 }
 
 static const struct ovsrec_qos *
@@ -191,7 +195,7 @@ static bool
 set_noop_qos(struct ovsdb_idl_txn *ovs_idl_txn,
  const struct ovsrec_port_table *port_table,
  const struct ovsrec_qos_table *qos_table,
- struct sset *egress_ifaces)
+ struct shash *egress_ifaces)
 {
 if (!ovs_idl_txn) {
 return false;
@@ -206,11 +210,11 @@ set_noop_qos(struct ovsdb_idl_txn *ovs_idl_txn,
 size_t count = 0;
 
 OVSREC_PORT_TABLE_FOR_EACH (port, port_table) {
-if (sset_contains(egress_ifaces, port->name)) {
+if (shash_find(egress_ifaces, port->name)) {
 ovsrec_port_set_qos(port, noop_qos);
 count++;
 }
-if (sset_count(egress_ifaces) == count) {
+if (shash_count(egress_ifaces) == count) {
 break;
 }
 }
@@ -229,9 +233,10 @@ set_qos_type(struct netdev *netdev, const char *type)
 }
 
 static void
-setup_qos(const char *egress_iface, struct hmap *queue_map)
+setup_qos(struct shash_node *entry, struct hmap *queue_map)
 {
 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 5);
+const char *egress_iface = entry->name;
 struct netdev *netdev_phy;
 
 if (!egress_iface) {
@@ -331,6 +336,10 @@ setup_qos(const char *egress_iface, struct hmap *queue_map)
 continue;
 }
 
+if (strcmp(sb_info->port_name, entry->data)) {
+continue;
+}
+
 smap_clear(_details);
 smap_add_format(_details, "min-rate", "%d", sb_info->min_rate);
 smap_add_format(_details, "max-rate", "%d", sb_info->max_rate);
@@ -352,6 +361,7 @@ destroy_qos_map(struct hmap *qos_map)
 {
 struct qos_queue *qos_queue;
 HMAP_FOR_EACH_POP (qos_queue, node, qos_map) {
+free(qos_queue->port_name);
 free(qos_queue);
 }
 
@@ -397,7 +407,7 @@ sbrec_get_port_encap(const struct sbrec_chassis 
*chassis_rec,
 static void
 add_localnet_egress_interface_mappings(
 const struct sbrec_port_binding *port_binding,
-struct shash *bridge_mappings, struct sset *egress_ifaces)
+struct shash *bridge_mappings, struct shash *egress_ifaces)
 {
 const char *network = smap_get(_binding->options, "network_name");
 if (!network) {
@@ -422,7 +432,8 @@ add_localnet_egress_interface_mappings(
 if (!is_egress_iface) {
 continue;
 }
-sset_add(egress_ifaces, iface_rec->name);
+shash_add(egress_ifaces, iface_rec->name,
+  port_binding->logical_port);
 }
 }
 }
@@ -467,7 +478,7 @@ update_ld_multichassis_ports(const struct 
sbrec_port_binding *binding_rec,
 static void
 update_ld_localnet_port(const struct sbrec_port_binding *binding_rec,
 struct shash *bridge_mappings,
-struct sset *egress_ifaces,
+struct shash *egress_ifaces,
 struct hmap *local_datapaths)
 {
 /* Ignore localnet ports for unplugged networks. */
@@ -1512,6 +1523,28 @@ consider_vif_lport(const struct sbrec_port_binding *pb,
 b_lport = local_binding_add_lport(binding_lports, lbinding, pb,
   LP_VIF);
 }
+
+for (size_t i = 0; i < b_ctx_in->br_int->n_ports; i++) {
+const struct ovsrec_port *port_rec = b_ctx_in->br_int->ports[i];
+if (!strcmp(port_rec->name, b_ctx_in->br_int->name)) {
+continue;
+}
+
+for (size_t j = 0; j < port_rec->n_interfaces; j++) {
+ 

[ovs-dev] [PATCH v3 3/3] dpif-netlink: Remove redundant null assignment

2022-11-04 Thread Roi Dayan via dev
The assignment of the features pointer is not doing
anything and can be removed.

CC: Justin Pettit 
Signed-off-by: Roi Dayan 
---

Notes:
v3:
- fix get meter features commit to just remove redundant null assignment.

v2:
- move memset from wrapper call

 lib/dpif-netlink.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/lib/dpif-netlink.c b/lib/dpif-netlink.c
index a620a6ec52dd..026b0daa8d83 100644
--- a/lib/dpif-netlink.c
+++ b/lib/dpif-netlink.c
@@ -4105,7 +4105,6 @@ dpif_netlink_meter_get_features(const struct dpif *dpif_,
 struct ofputil_meter_features *features)
 {
 if (probe_broken_meters(CONST_CAST(struct dpif *, dpif_))) {
-features = NULL;
 return;
 }
 
-- 
2.38.0

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH v3 2/3] tc: Fix coverity dereference null return value

2022-11-04 Thread Roi Dayan via dev
CID 550702 (#1 of 1): Dereference null return value (NULL_RETURNS)
7. dereference: Dereferencing a pointer that might be NULL ex_type when calling 
nl_attr_get_u16.

Signed-off-by: Roi Dayan 
---
 lib/tc.c | 4 
 1 file changed, 4 insertions(+)

diff --git a/lib/tc.c b/lib/tc.c
index 0a6bc5ecc0b0..9f2eea4f0780 100644
--- a/lib/tc.c
+++ b/lib/tc.c
@@ -1087,6 +1087,10 @@ nl_parse_act_pedit(struct nlattr *options, struct 
tc_flower *flower)
 }
 
 ex_type = nl_attr_find_nested(nla, TCA_PEDIT_KEY_EX_HTYPE);
+if (!ex_type) {
+return EOPNOTSUPP;
+}
+
 type = nl_attr_get_u16(ex_type);
 
 err = csum_update_flag(flower, type);
-- 
2.38.0

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH v3 1/3] tc: Pass tun_metadata by reference

2022-11-04 Thread Roi Dayan via dev
Fix coverity big parameter passed by value

CID 549858 (#1 of 1): Big parameter passed by value (PASS_BY_VALUE)
pass_by_value: Passing parameter metadata of type struct tun_metadata (size 272 
bytes) by value,
which exceeds the medium threshold of 256 bytes

Signed-off-by: Roi Dayan 
---
 lib/tc.c | 20 ++--
 1 file changed, 10 insertions(+), 10 deletions(-)

diff --git a/lib/tc.c b/lib/tc.c
index 4d7de8adde45..0a6bc5ecc0b0 100644
--- a/lib/tc.c
+++ b/lib/tc.c
@@ -2496,13 +2496,13 @@ nl_msg_put_act_tunnel_key_release(struct ofpbuf 
*request)
 
 static void
 nl_msg_put_act_tunnel_geneve_option(struct ofpbuf *request,
-struct tun_metadata tun_metadata)
+struct tun_metadata *tun_metadata)
 {
 const struct geneve_opt *opt;
 size_t outer, inner;
 int len, cnt = 0;
 
-len = tun_metadata.present.len;
+len = tun_metadata->present.len;
 if (!len) {
 return;
 }
@@ -2510,7 +2510,7 @@ nl_msg_put_act_tunnel_geneve_option(struct ofpbuf 
*request,
 outer = nl_msg_start_nested(request, TCA_TUNNEL_KEY_ENC_OPTS);
 
 while (len) {
-opt = _metadata.opts.gnv[cnt];
+opt = _metadata->opts.gnv[cnt];
 inner = nl_msg_start_nested(request, TCA_TUNNEL_KEY_ENC_OPTS_GENEVE);
 
 nl_msg_put_be16(request, TCA_TUNNEL_KEY_ENC_OPT_GENEVE_CLASS,
@@ -2534,7 +2534,7 @@ nl_msg_put_act_tunnel_key_set(struct ofpbuf *request, 
bool id_present,
   ovs_be32 ipv4_dst, struct in6_addr *ipv6_src,
   struct in6_addr *ipv6_dst,
   ovs_be16 tp_dst, uint8_t tos, uint8_t ttl,
-  struct tun_metadata tun_metadata,
+  struct tun_metadata *tun_metadata,
   uint8_t no_csum, uint32_t action_pc)
 {
 size_t offset;
@@ -3202,7 +3202,7 @@ nl_msg_put_flower_acts(struct ofpbuf *request, struct 
tc_flower *flower)
   action->encap.tp_dst,
   action->encap.tos,
   action->encap.ttl,
-  action->encap.data,
+  >encap.data,
   action->encap.no_csum,
   action_pc);
 nl_msg_put_act_flags(request);
@@ -3374,20 +3374,20 @@ nl_msg_put_masked_value(struct ofpbuf *request, 
uint16_t type,
 
 static void
 nl_msg_put_flower_tunnel_opts(struct ofpbuf *request, uint16_t type,
-  struct tun_metadata metadata)
+  struct tun_metadata *metadata)
 {
 struct geneve_opt *opt;
 size_t outer, inner;
 int len, cnt = 0;
 
-len = metadata.present.len;
+len = metadata->present.len;
 if (!len) {
 return;
 }
 
 outer = nl_msg_start_nested(request, type);
 while (len) {
-opt = [cnt];
+opt = >opts.gnv[cnt];
 inner = nl_msg_start_nested(request, TCA_FLOWER_KEY_ENC_OPTS_GENEVE);
 
 nl_msg_put_be16(request, TCA_FLOWER_KEY_ENC_OPT_GENEVE_CLASS,
@@ -3464,9 +3464,9 @@ nl_msg_put_flower_tunnel(struct ofpbuf *request, struct 
tc_flower *flower)
 nl_msg_put_be32(request, TCA_FLOWER_KEY_ENC_KEY_ID, id);
 }
 nl_msg_put_flower_tunnel_opts(request, TCA_FLOWER_KEY_ENC_OPTS,
-  flower->key.tunnel.metadata);
+  >key.tunnel.metadata);
 nl_msg_put_flower_tunnel_opts(request, TCA_FLOWER_KEY_ENC_OPTS_MASK,
-  flower->mask.tunnel.metadata);
+  >mask.tunnel.metadata);
 }
 
 #define FLOWER_PUT_MASKED_VALUE(member, type) \
-- 
2.38.0

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH v3 0/3] Small fixes found with coverity tool

2022-11-04 Thread Roi Dayan via dev
Hi,

The following are fixes to small issues found with coverity tool.

Thanks,
Roi


Roi Dayan (3):
  tc: Pass tun_metadata by reference
  tc: Fix coverity dereference null return value
  dpif-netlink: Remove redundant null assignment

 lib/dpif-netlink.c |  1 -
 lib/tc.c   | 24 ++--
 2 files changed, 14 insertions(+), 11 deletions(-)

-- 
2.38.0

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [PATCH] flow: Consistent VXLAN UDP src ports for fragmented packets

2022-11-04 Thread Hemanth Aramadaka via dev
Hi ,

Thanks for the review and comments. I have addressed the comments and raised 
the review request again. 

Ran these tests (tunnel_push_pop - packet_out and tunnel_push_pop - packet_out 
debug_slow   ). I am attaching the test results as well here.  


R620-10-CSSCI-5:/home/sdn/zarahem/ovs # make check TESTSUITEFLAGS='795'
make  check-am
make[1]: Entering directory '/home/sdn/zarahem/ovs'
make  utilities/ovs-appctl-bashcomp.bash utilities/ovs-vsctl-bashcomp.bash 
tests/atlocal tests/testpki-cacert.pem tests/testpki-cert.pem 
tests/testpki-privkey.pem tests/testpki-req.pem tests/testpki-cert2.pem 
tests/testpki-privkey2.pem tests/testpki-req2.pem
make[2]: Entering directory '/home/sdn/zarahem/ovs'
make[2]: Nothing to be done for 'utilities/ovs-appctl-bashcomp.bash'.
make[2]: Nothing to be done for 'utilities/ovs-vsctl-bashcomp.bash'.
make[2]: 'tests/atlocal' is up to date.
make[2]: 'tests/testpki-cacert.pem' is up to date.
make[2]: 'tests/testpki-cert.pem' is up to date.
make[2]: 'tests/testpki-privkey.pem' is up to date.
make[2]: 'tests/testpki-req.pem' is up to date.
make[2]: 'tests/testpki-cert2.pem' is up to date.
make[2]: 'tests/testpki-privkey2.pem' is up to date.
make[2]: 'tests/testpki-req2.pem' is up to date.
make[2]: Leaving directory '/home/sdn/zarahem/ovs'
make  check-local
make[2]: Entering directory '/home/sdn/zarahem/ovs'
set /bin/sh './tests/testsuite' -C tests 
AUTOTEST_PATH=utilities:vswitchd:ovsdb:vtep:tests:ipsec::; \
"$@" 795 || \
(test -z "$(find /home/sdn/zarahem/ovs/tests/testsuite.dir -name 'asan.*')" && \
 test -z "$(find /home/sdn/zarahem/ovs/tests/testsuite.dir -name 'ubsan.*')" && 
\
 test X'' = Xyes && "$@" --recheck)
Illegal "police"
## -- ##
## openvswitch 3.0.90 test suite. ##
## -- ##
795: tunnel_push_pop - packet_outok

## - ##
## Test results. ##
## - ##

1 test was successful.
make[2]: Leaving directory '/home/sdn/zarahem/ovs'
make[1]: Leaving directory '/home/sdn/zarahem/ovs'
R620-10-CSSCI-5:/home/sdn/zarahem/ovs # make check TESTSUITEFLAGS='796'
make  check-am
make[1]: Entering directory '/home/sdn/zarahem/ovs'
make  utilities/ovs-appctl-bashcomp.bash utilities/ovs-vsctl-bashcomp.bash 
tests/atlocal tests/testpki-cacert.pem tests/testpki-cert.pem 
tests/testpki-privkey.pem tests/testpki-req.pem tests/testpki-cert2.pem 
tests/testpki-privkey2.pem tests/testpki-req2.pem
make[2]: Entering directory '/home/sdn/zarahem/ovs'
make[2]: Nothing to be done for 'utilities/ovs-appctl-bashcomp.bash'.
make[2]: Nothing to be done for 'utilities/ovs-vsctl-bashcomp.bash'.
make[2]: 'tests/atlocal' is up to date.
make[2]: 'tests/testpki-cacert.pem' is up to date.
make[2]: 'tests/testpki-cert.pem' is up to date.
make[2]: 'tests/testpki-privkey.pem' is up to date.
make[2]: 'tests/testpki-req.pem' is up to date.
make[2]: 'tests/testpki-cert2.pem' is up to date.
make[2]: 'tests/testpki-privkey2.pem' is up to date.
make[2]: 'tests/testpki-req2.pem' is up to date.
make[2]: Leaving directory '/home/sdn/zarahem/ovs'
make  check-local
make[2]: Entering directory '/home/sdn/zarahem/ovs'
set /bin/sh './tests/testsuite' -C tests 
AUTOTEST_PATH=utilities:vswitchd:ovsdb:vtep:tests:ipsec::; \
"$@" 796 || \
(test -z "$(find /home/sdn/zarahem/ovs/tests/testsuite.dir -name 'asan.*')" && \
 test -z "$(find /home/sdn/zarahem/ovs/tests/testsuite.dir -name 'ubsan.*')" && 
\
 test X'' = Xyes && "$@" --recheck)
Illegal "police"
## -- ##
## openvswitch 3.0.90 test suite. ##
## -- ##
796: tunnel_push_pop - packet_out debug_slow ok

## - ##
## Test results. ##
## - ##

1 test was successful.
make[2]: Leaving directory '/home/sdn/zarahem/ovs'
make[1]: Leaving directory '/home/sdn/zarahem/ovs'


Thanks,
Hemanth.

-Original Message-
From: Ilya Maximets  
Sent: 04 November 2022 03:56
To: Hemanth Aramadaka ; ovs-dev@openvswitch.org
Cc: i.maxim...@ovn.org
Subject: Re: [ovs-dev] [PATCH] flow: Consistent VXLAN UDP src ports for 
fragmented packets

On 11/3/22 13:11, Hemanth Aramadaka via dev wrote:
> Issue:
> 
> The src-port for UDP is based on RSS hash in the packet metadata.
> In case of packets coming from VM it will be 5-tuple, if available, 
> otherwise just IP addresses.If the VM fragments a large IP packet and 
> sends the fragments to ovs, only the first fragment will contain the 
> L4 header. Therefore, the first fragment and subsequent fragments get 
> different UDP src ports in the outgoing VXLAN header.This can lead to 
> fragment re-ordering in the fabric as packet will take different 
> paths.
> 
> Fix:
> 
> Intention of this is to avoid fragment packets taking different paths.
> For example, due to presence of firewalls, fragment packets will take 
> different paths and will get dropped.To avoid this we ignore the L4 
> header during hash calculation only in the case of fragmented packets.
> 
> Signed-off-by: 

Re: [ovs-dev] [PATCH v2 3/3] dpif-netlink: Move meter features reset from wrapper

2022-11-04 Thread Roi Dayan via dev



On 04/11/2022 0:48, Justin Pettit wrote:
> 
>> On Nov 3, 2022, at 3:38 PM, Ilya Maximets  wrote:
>>
>> On 11/3/22 09:47, Roi Dayan wrote:
>>>
>>> diff --git a/lib/dpif-netlink.c b/lib/dpif-netlink.c
>>> index a620a6ec52dd..2bdd2137af36 100644
>>> --- a/lib/dpif-netlink.c
>>> +++ b/lib/dpif-netlink.c
>>> @@ -4105,7 +4105,7 @@ dpif_netlink_meter_get_features(const struct dpif 
>>> *dpif_,
>>> struct ofputil_meter_features *features)
>>> {
>>> if (probe_broken_meters(CONST_CAST(struct dpif *, dpif_))) {
>>> -features = NULL;
>>> +memset(features, 0, sizeof *features);>  return;
>>> }
>>>
>>> diff --git a/lib/dpif.c b/lib/dpif.c
>>> index 40f5fe44606e..f00aeea37428 100644
>>> --- a/lib/dpif.c
>>> +++ b/lib/dpif.c
>>> @@ -1935,7 +1935,6 @@ void
>>> dpif_meter_get_features(const struct dpif *dpif,
>>> struct ofputil_meter_features *features)
>>> {
>>> -memset(features, 0, sizeof *features);
>>
>> I'm not sure this is correct, because we should still clear them
>> even if the dpif_class doesn't have the callback.
>>
>> All providers seem to implement this callback, but not clearing
>> features doesn't seem correct from the API clarity point of view.
>>
>> Just returning without clearing the structure in
>> dpif_netlink_meter_get_features() might be a better option.
>> Clearing in both places is also fine as it's not a performance
>> critical code path.
> 
> Agreed.  This is the point I was trying to make in my original feedback: I 
> think the patch can just remove the "features = NULL;" line in 
> dpif_netlink_meter_get_features() and leave everything else as-is.  So the 
> patch essentially becomes, since dpif_meter_get_features() already cleared 
> out 'features':
> 
> -=-=-=-=-=-=-=-
> --- a/lib/dpif-netlink.c
> +++ b/lib/dpif-netlink.c
> @@ -4105,7 +4105,6 @@ dpif_netlink_meter_get_features(const struct dpif 
> *dpif_,
>struct ofputil_meter_features *features)
> {
>if (probe_broken_meters(CONST_CAST(struct dpif *, dpif_))) {
> -features = NULL;
>return;
>}
> -=-=-=-=-=-=-=-
> 
> --Justin
> 
> 

ok i see. in ct get features it's different as the function is not
void and returns not supported if callback doesn't exists.
i'll fix the commit as suggested. thanks.
___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH] flow: Consistent VXLAN UDP src ports for fragmented packets

2022-11-04 Thread Hemanth Aramadaka via dev
Issue:

The src-port for UDP is based on RSS hash in the packet metadata.
In case of packets coming from VM it will be 5-tuple, if available,
otherwise just IP addresses.If the VM fragments a large IP packet
and sends the fragments to ovs, only the first fragment will contain
the L4 header. Therefore, the first fragment and subsequent fragments
get different UDP src ports in the outgoing VXLAN header.This can
lead to fragment re-ordering in the fabric as packet will take
different paths.

Fix:

Intention of this is to avoid fragment packets taking different paths.
For example, due to presence of firewalls, fragment packets will take
different paths and will get dropped.To avoid this we ignore the L4
header during hash calculation only in the case of fragmented packets.

Signed-off-by: Hemanth Aramadaka 
---
 lib/flow.c | 17 ++---
 1 file changed, 14 insertions(+), 3 deletions(-)

diff --git a/lib/flow.c b/lib/flow.c
index c3a3aa3ce..e8a2dc74e 100644
--- a/lib/flow.c
+++ b/lib/flow.c
@@ -1018,7 +1018,9 @@ miniflow_extract(struct dp_packet *packet, struct 
miniflow *dst)
 miniflow_push_be16(mf, ct_tp_src, ct_tp_src);
 miniflow_push_be16(mf, ct_tp_dst, ct_tp_dst);
 if (dl_type == htons(ETH_TYPE_IP)) {
-dp_packet_update_rss_hash_ipv4_tcp_udp(packet);
+if (!(nw_frag & FLOW_NW_FRAG_MASK)) {
+dp_packet_update_rss_hash_ipv4_tcp_udp(packet);
+}
 } else if (dl_type == htons(ETH_TYPE_IPV6)) {
 dp_packet_update_rss_hash_ipv6_tcp_udp(packet);
 }
@@ -1033,7 +1035,9 @@ miniflow_extract(struct dp_packet *packet, struct 
miniflow *dst)
 miniflow_push_be16(mf, ct_tp_src, ct_tp_src);
 miniflow_push_be16(mf, ct_tp_dst, ct_tp_dst);
 if (dl_type == htons(ETH_TYPE_IP)) {
-dp_packet_update_rss_hash_ipv4_tcp_udp(packet);
+if (!(nw_frag & FLOW_NW_FRAG_MASK)) {
+dp_packet_update_rss_hash_ipv4_tcp_udp(packet);
+}
 } else if (dl_type == htons(ETH_TYPE_IPV6)) {
 dp_packet_update_rss_hash_ipv6_tcp_udp(packet);
 }
@@ -2248,7 +2252,7 @@ miniflow_hash_5tuple(const struct miniflow *flow, 
uint32_t basis)
 
 if (flow) {
 ovs_be16 dl_type = MINIFLOW_GET_BE16(flow, dl_type);
-uint8_t nw_proto;
+uint8_t nw_proto, nw_frag = 0;
 
 if (dl_type == htons(ETH_TYPE_IPV6)) {
 struct flowmap map = FLOWMAP_EMPTY_INITIALIZER;
@@ -2270,6 +2274,9 @@ miniflow_hash_5tuple(const struct miniflow *flow, 
uint32_t basis)
 
 nw_proto = MINIFLOW_GET_U8(flow, nw_proto);
 hash = hash_add(hash, nw_proto);
+if (nw_frag & FLOW_NW_FRAG_MASK) {
+goto out;
+}
 if (nw_proto != IPPROTO_TCP && nw_proto != IPPROTO_UDP
 && nw_proto != IPPROTO_SCTP && nw_proto != IPPROTO_ICMP
 && nw_proto != IPPROTO_ICMPV6) {
@@ -2292,6 +2299,7 @@ flow_hash_5tuple(const struct flow *flow, uint32_t basis)
 {
 BUILD_ASSERT_DECL(FLOW_WC_SEQ == 42);
 uint32_t hash = basis;
+uint8_t nw_frag = 0;
 
 if (flow) {
 
@@ -2312,6 +2320,9 @@ flow_hash_5tuple(const struct flow *flow, uint32_t basis)
 }
 
 hash = hash_add(hash, flow->nw_proto);
+if (nw_frag & FLOW_NW_FRAG_MASK) {
+goto out;
+}
 if (flow->nw_proto != IPPROTO_TCP && flow->nw_proto != IPPROTO_UDP
 && flow->nw_proto != IPPROTO_SCTP && flow->nw_proto != IPPROTO_ICMP
 && flow->nw_proto != IPPROTO_ICMPV6) {
-- 
2.34.1

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [PATCH ovn v3 4/4] pinctrl: Send RARPs for external ipv6 interfaces

2022-11-04 Thread 0-day Robot
Bleep bloop.  Greetings Felix Hüttner, I am a robot and I have tried out your 
patch.
Thanks for your contribution.

I encountered some error that I wasn't expecting.  See the details below.


checkpatch:
ERROR: Author Felix Hüttner  needs to sign off.
WARNING: Unexpected sign-offs from developers who are not authors or co-authors 
or committers: Felix Huettner 
Lines checked: 179, Warnings: 1, Errors: 1


Please check this out.  If you feel there has been an error, please email 
acon...@redhat.com

Thanks,
0-day Robot
___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [PATCH ovn v3 3/4] ovn-macros: support ipv6 in ovn_attach

2022-11-04 Thread 0-day Robot
Bleep bloop.  Greetings Felix Hüttner, I am a robot and I have tried out your 
patch.
Thanks for your contribution.

I encountered some error that I wasn't expecting.  See the details below.


checkpatch:
ERROR: Author Felix Hüttner  needs to sign off.
WARNING: Unexpected sign-offs from developers who are not authors or co-authors 
or committers: Felix Huettner 
Lines checked: 53, Warnings: 1, Errors: 1


Please check this out.  If you feel there has been an error, please email 
acon...@redhat.com

Thanks,
0-day Robot
___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [PATCH ovn v3 2/4] northd: handle own rarps like garps

2022-11-04 Thread 0-day Robot
Bleep bloop.  Greetings Felix Hüttner, I am a robot and I have tried out your 
patch.
Thanks for your contribution.

I encountered some error that I wasn't expecting.  See the details below.


checkpatch:
ERROR: Author Felix Hüttner  needs to sign off.
WARNING: Unexpected sign-offs from developers who are not authors or co-authors 
or committers: Felix Huettner 
Lines checked: 190, Warnings: 1, Errors: 1


Please check this out.  If you feel there has been an error, please email 
acon...@redhat.com

Thanks,
0-day Robot
___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [PATCH ovn v3 1/4] logical-fields: add rarp fields

2022-11-04 Thread 0-day Robot
Bleep bloop.  Greetings Felix Hüttner, I am a robot and I have tried out your 
patch.
Thanks for your contribution.

I encountered some error that I wasn't expecting.  See the details below.


checkpatch:
ERROR: Author Felix Hüttner  needs to sign off.
WARNING: Unexpected sign-offs from developers who are not authors or co-authors 
or committers: Felix Huettner 
WARNING: Line is 125 characters long (recommended limit is 79)
#58 FILE: ovn-sb.xml:1055:
rarp.op rarp.spa rarp.tpa 
rarp.sha rarp.tha

Lines checked: 73, Warnings: 2, Errors: 1


Please check this out.  If you feel there has been an error, please email 
acon...@redhat.com

Thanks,
0-day Robot
___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [PATCH ovn branch-22.03 8/8] northd: Properly check the wakeup time in MAC banding aging

2022-11-04 Thread 0-day Robot
Bleep bloop.  Greetings Ales Musil, I am a robot and I have tried out your 
patch.
Thanks for your contribution.

I encountered some error that I wasn't expecting.  See the details below.


checkpatch:
WARNING: Unexpected sign-offs from developers who are not authors or co-authors 
or committers: Han Zhou 
Lines checked: 50, Warnings: 1, Errors: 0


Please check this out.  If you feel there has been an error, please email 
acon...@redhat.com

Thanks,
0-day Robot
___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [PATCH ovn branch-22.03 7/8] northd: Increase the MAC binding removal delay

2022-11-04 Thread 0-day Robot
Bleep bloop.  Greetings Ales Musil, I am a robot and I have tried out your 
patch.
Thanks for your contribution.

I encountered some error that I wasn't expecting.  See the details below.


checkpatch:
WARNING: Unexpected sign-offs from developers who are not authors or co-authors 
or committers: Mark Michelson 
Lines checked: 44, Warnings: 1, Errors: 0


Please check this out.  If you feel there has been an error, please email 
acon...@redhat.com

Thanks,
0-day Robot
___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [PATCH ovn branch-22.03 5/8] northd: Add config to limit bulk removal of MAC binding

2022-11-04 Thread 0-day Robot
Bleep bloop.  Greetings Ales Musil, I am a robot and I have tried out your 
patch.
Thanks for your contribution.

I encountered some error that I wasn't expecting.  See the details below.


checkpatch:
WARNING: Line is 84 characters long (recommended limit is 79)
#128 FILE: ovn-nb.xml:166:
  type='{"type": "integer", "minInteger": 0, "maxInteger": 
4294967295}'>

Lines checked: 141, Warnings: 1, Errors: 0


Please check this out.  If you feel there has been an error, please email 
acon...@redhat.com

Thanks,
0-day Robot
___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [PATCH ovn branch-22.03 4/8] northd: Add MAC binding aging mechanism

2022-11-04 Thread 0-day Robot
Bleep bloop.  Greetings Ales Musil, I am a robot and I have tried out your 
patch.
Thanks for your contribution.

I encountered some error that I wasn't expecting.  See the details below.


checkpatch:
WARNING: Comment with 'xxx' marker
#99 FILE: northd/inc-proc-northd.c:222:
/* XXX: The "en_mac_binding_aging" should be separate "root" node

WARNING: Line is 84 characters long (recommended limit is 79)
#340 FILE: ovn-nb.xml:2352:
  type='{"type": "integer", "minInteger": 0, "maxInteger": 
4294967295}'>

Lines checked: 472, Warnings: 2, Errors: 0


Please check this out.  If you feel there has been an error, please email 
acon...@redhat.com

Thanks,
0-day Robot
___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH ovn v3 4/4] pinctrl: Send RARPs for external ipv6 interfaces

2022-11-04 Thread Felix Hüttner via dev
previously garps/rarps were only sent for NAT IPs if these had an
ipv4 address attached. For lsp's on gateway routers that do not have
an ipv4 address assigned (e.g. if they are ipv6 only) no rarps where
send out.

This causes traffic outages when changing the priority of a gateway
chassis as the physical switches to not get the information where the
mac address now resides. To fix this, we send out rarps with just the mac
address of the interface and no ip address.

This change has been tested in an environment with 600 logical routers
on a single ipv6 external network.

Acked-by: Numan Siddique 
Signed-off-by: Felix Huettner 
---
 controller/pinctrl.c | 23 +
 tests/ovn.at | 80 +---
 2 files changed, 99 insertions(+), 4 deletions(-)

diff --git a/controller/pinctrl.c b/controller/pinctrl.c
index 8859cb080..767fa02d8 100644
--- a/controller/pinctrl.c
+++ b/controller/pinctrl.c
@@ -4512,6 +4512,24 @@ send_garp_rarp_update(struct ovsdb_idl_txn 
*ovnsb_idl_txn,
 }
 free(name);
 }
+/*
+ * Send RARPs even if we do not have a ipv4 address as it e.g.
+ * happens on ipv6 only ports.
+ */
+if (laddrs->n_ipv4_addrs == 0) {
+char *name = xasprintf("%s-noip",
+   binding_rec->logical_port);
+garp_rarp = shash_find_data(_garp_rarp_data, name);
+if (garp_rarp) {
+garp_rarp->dp_key = binding_rec->datapath->tunnel_key;
+garp_rarp->port_key = binding_rec->tunnel_key;
+} else {
+add_garp_rarp(name, laddrs->ea,
+  0, binding_rec->datapath->tunnel_key,
+  binding_rec->tunnel_key);
+}
+free(name);
+}
 destroy_lport_addresses(laddrs);
 free(laddrs);
 }
@@ -5824,6 +5842,11 @@ consider_nat_address(struct ovsdb_idl_index 
*sbrec_port_binding_by_name,
 sset_add(nat_address_keys, name);
 free(name);
 }
+if (laddrs->n_ipv4_addrs == 0) {
+char *name = xasprintf("%s-noip", pb->logical_port);
+sset_add(nat_address_keys, name);
+free(name);
+}
 shash_add(nat_addresses, pb->logical_port, laddrs);
 }

diff --git a/tests/ovn.at b/tests/ovn.at
index 184fc0fdd..6552681bd 100644
--- a/tests/ovn.at
+++ b/tests/ovn.at
@@ -9017,6 +9017,76 @@ OVN_CLEANUP([hv1])
 AT_CLEANUP
 ])

+OVN_FOR_EACH_NORTHD([
+AT_SETUP([send reverse arp for router without ipv4 address])
+ovn_start
+# Create logical switch
+ovn-nbctl ls-add ls0
+# Create gateway router
+ovn-nbctl create Logical_Router name=lr0 options:chassis=hv1
+# Add router port to gateway router
+ovn-nbctl lrp-add lr0 lrp0 f0:00:00:00:00:01 fd12:3456:789a:1::1/64
+ovn-nbctl lsp-add ls0 lrp0-rp -- set Logical_Switch_Port lrp0-rp \
+type=router options:router-port=lrp0 addresses='"f0:00:00:00:00:01"'
+# Add nat-address option
+ovn-nbctl lsp-set-options lrp0-rp router-port=lrp0 nat-addresses="router"
+
+net_add n1
+sim_add hv1
+as hv1
+ovs-vsctl \
+-- add-br br-phys \
+-- add-br br-eth0
+
+ovn_attach n1 br-phys fd12:3456:789a:1::1 64
+
+AT_CHECK([ovs-vsctl set Open_vSwitch . 
external-ids:ovn-bridge-mappings=physnet1:br-eth0])
+AT_CHECK([ovs-vsctl add-port br-eth0 snoopvif -- set Interface snoopvif 
options:tx_pcap=hv1/snoopvif-tx.pcap options:rxq_pcap=hv1/snoopvif-rx.pcap])
+
+# Create a localnet port.
+AT_CHECK([ovn-nbctl lsp-add ls0 ln_port])
+AT_CHECK([ovn-nbctl lsp-set-addresses ln_port unknown])
+AT_CHECK([ovn-nbctl lsp-set-type ln_port localnet])
+AT_CHECK([ovn-nbctl lsp-set-options ln_port network_name=physnet1])
+
+# Wait until the patch ports are created to connect br-int to br-eth0
+OVS_WAIT_UNTIL([test 1 = `ovs-vsctl show | \
+grep "Port patch-br-int-to-ln_port" | wc -l`])
+
+ovn-sbctl list port_binding lrp0-rp
+echo "*"
+ovn-nbctl list logical_switch_port lrp0-rp
+ovn-nbctl list logical_router_port lrp0
+ovn-nbctl show
+# Wait for packet to be received.
+OVS_WAIT_UNTIL([test `wc -c < "hv1/snoopvif-tx.pcap"` -ge 50])
+$PYTHON "$ovs_srcdir/utilities/ovs-pcap.in" hv1/snoopvif-tx.pcap  | sort | 
uniq > packets
+expected="f00180350001080006040003f001f001"
+echo $expected > expout
+AT_CHECK([sort packets], [0], [expout])
+
+# Temporarily remove nat-addresses option to avoid race conditions
+# due to GARP backoff
+ovn-nbctl lsp-set-options lrp0-rp router-port=lrp0 nat-addresses=""
+
+as hv1 reset_pcap_file snoopvif hv1/snoopvif
+
+# Re-add nat-addresses option
+ovn-nbctl lsp-set-options lrp0-rp router-port=lrp0 nat-addresses="router" 
exclude-lb-vips-from-garp="true"
+
+# Wait for packets to be received.
+OVS_WAIT_UNTIL([test `wc -c < "hv1/snoopvif-tx.pcap"` -ge 50])
+

[ovs-dev] [PATCH ovn v3 2/4] northd: handle own rarps like garps

2022-11-04 Thread Felix Hüttner via dev
Previously graceful rarps sent from ovn-controller were handled as
normal packets and flooded to other routers. As the other routers should
already have that information, we can skip flooding (just like it is done
for GARPs already) and thereby mitigate ovs refusing to send the packet
because of too many resubmits.

This change has been tested in combination with the previous one in the
series and works well in environments which contain an external ipv6
network with 600 ovn logical routers.

Acked-by: Numan Siddique 
Signed-off-by: Felix Huettner 
---
 northd/northd.c | 11 ++-
 northd/ovn-northd.8.xml |  4 ++--
 tests/ovn-northd.at | 18 +-
 tests/ovn.at|  8 +++-
 4 files changed, 24 insertions(+), 17 deletions(-)

diff --git a/northd/northd.c b/northd/northd.c
index b7388afc5..e1f3bace8 100644
--- a/northd/northd.c
+++ b/northd/northd.c
@@ -7336,8 +7336,8 @@ build_lrouter_groups(struct hmap *ports, struct ovs_list 
*lr_list)
 }

 /*
- * Ingress table 24: Flows that flood self originated ARP/ND packets in the
- * switching domain.
+ * Ingress table 24: Flows that flood self originated ARP/RARP/ND packets in
+ * the switching domain.
  */
 static void
 build_lswitch_rport_arp_req_self_orig_flow(struct ovn_port *op,
@@ -7369,7 +7369,7 @@ build_lswitch_rport_arp_req_self_orig_flow(struct 
ovn_port *op,
 sset_add(_eth_addrs, nat->external_mac);
 }

-/* Self originated ARP requests/ND need to be flooded to the L2 domain
+/* Self originated ARP requests/RARP/ND need to be flooded to the L2 domain
  * (except on router ports).  Determine that packets are self originated
  * by also matching on source MAC. Matching on ingress port is not
  * reliable in case this is a VLAN-backed network.
@@ -7385,7 +7385,8 @@ build_lswitch_rport_arp_req_self_orig_flow(struct 
ovn_port *op,
 ds_chomp(_src, ',');
 ds_put_cstr(_src, "}");

-ds_put_format(, "eth.src == %s && (arp.op == 1 || nd_ns)",
+ds_put_format(,
+  "eth.src == %s && (arp.op == 1 || rarp.op == 3 || nd_ns)",
   ds_cstr(_src));
 ovn_lflow_add(lflows, od, S_SWITCH_IN_L2_LKUP, priority, ds_cstr(),
   "outport = \""MC_FLOOD_L2"\"; output;");
@@ -7581,7 +7582,7 @@ build_lswitch_rport_arp_req_flows(struct ovn_port *op,
 lflows, stage_hint);
 }

-/* Self originated ARP requests/ND need to be flooded as usual.
+/* Self originated ARP requests/RARP/ND need to be flooded as usual.
  *
  * However, if the switch doesn't have any non-router ports we shouldn't
  * even try to flood.
diff --git a/northd/ovn-northd.8.xml b/northd/ovn-northd.8.xml
index a70f2e678..051f3dc6e 100644
--- a/northd/ovn-northd.8.xml
+++ b/northd/ovn-northd.8.xml
@@ -1723,8 +1723,8 @@ output;

   
 Priority-75 flows for each port connected to a logical router
-matching self originated ARP request/ND packets.  These packets
-are flooded to the MC_FLOOD_L2 which contains all
+matching self originated ARP request/RARP request/ND packets.  These
+packets are flooded to the MC_FLOOD_L2 which contains all
 non-router logical ports.
   

diff --git a/tests/ovn-northd.at b/tests/ovn-northd.at
index 4f399eccb..e849afd85 100644
--- a/tests/ovn-northd.at
+++ b/tests/ovn-northd.at
@@ -4759,7 +4759,7 @@ AT_CHECK([grep "ls_in_l2_lkup" ls1_lflows | sed 
's/table=../table=??/' | sort],
   table=??(ls_in_l2_lkup  ), priority=50   , match=(eth.dst == 
00:00:00:00:01:01), action=(outport = "ls1-ro1"; output;)
   table=??(ls_in_l2_lkup  ), priority=50   , match=(eth.dst == 
00:00:00:00:01:02), action=(outport = "vm1"; output;)
   table=??(ls_in_l2_lkup  ), priority=70   , match=(eth.mcast), 
action=(outport = "_MC_flood"; output;)
-  table=??(ls_in_l2_lkup  ), priority=75   , match=(eth.src == 
{00:00:00:00:01:01} && (arp.op == 1 || nd_ns)), action=(outport = 
"_MC_flood_l2"; output;)
+  table=??(ls_in_l2_lkup  ), priority=75   , match=(eth.src == 
{00:00:00:00:01:01} && (arp.op == 1 || rarp.op == 3 || nd_ns)), action=(outport 
= "_MC_flood_l2"; output;)
   table=??(ls_in_l2_lkup  ), priority=80   , match=(flags[[1]] == 0 && 
arp.op == 1 && arp.tpa == 192.168.1.1), action=(clone {outport = "ls1-ro1"; 
output; }; outport = "_MC_flood_l2"; output;)
   table=??(ls_in_l2_lkup  ), priority=80   , match=(flags[[1]] == 0 && 
nd_ns && nd.target == fe80::200:ff:fe00:101), action=(clone {outport = 
"ls1-ro1"; output; }; outport = "_MC_flood_l2"; output;)
 ])
@@ -4771,7 +4771,7 @@ AT_CHECK([grep "ls_in_l2_lkup" ls2_lflows | sed 
's/table=../table=??/' | sort],
   table=??(ls_in_l2_lkup  ), priority=50   , match=(eth.dst == 
00:00:00:00:02:01), action=(outport = "ls2-ro2"; output;)
   table=??(ls_in_l2_lkup  ), priority=50   , match=(eth.dst == 
00:00:00:00:02:02), action=(outport = "vm2"; output;)
   table=??(ls_in_l2_lkup  ), priority=70   , 

[ovs-dev] [PATCH ovn v3 3/4] ovn-macros: support ipv6 in ovn_attach

2022-11-04 Thread Felix Hüttner via dev
in order to easily add future ipv6 test cases the common `ovn_attach`
function should also support ipv6 addresses.

Acked-by: Numan Siddique 
Signed-off-by: Felix Huettner 
---
 tests/ovn-macros.at | 11 ---
 1 file changed, 8 insertions(+), 3 deletions(-)

diff --git a/tests/ovn-macros.at b/tests/ovn-macros.at
index b234019a9..ee6e09d39 100644
--- a/tests/ovn-macros.at
+++ b/tests/ovn-macros.at
@@ -288,14 +288,19 @@ net_attach () {
 || return 1
 }

-# ovn_az_attach AZ NETWORK BRIDGE IP [MASKLEN]
+# ovn_az_attach AZ NETWORK BRIDGE IP [MASKLEN] [ENCAP]
 ovn_az_attach() {
 local az=$1 net=$2 bridge=$3 ip=$4 masklen=${5-24} encap=${6-geneve,vxlan}
 net_attach $net $bridge || return 1

 mac=`ovs-vsctl get Interface $bridge mac_in_use | sed s/\"//g`
 arp_table="$arp_table $sandbox,$bridge,$ip,$mac"
-ovs-appctl netdev-dummy/ip4addr $bridge $ip/$masklen >/dev/null || return 1
+if test -z $(echo $ip | sed '/:/d'); then
+ipversion="6"
+else
+ipversion="4"
+fi
+ovs-appctl netdev-dummy/ip${ipversion}addr $bridge $ip/$masklen >/dev/null 
|| return 1
 ovs-appctl ovs/route/add $ip/$masklen $bridge >/dev/null || return 1

 local ovn_remote
@@ -329,7 +334,7 @@ ovn_az_attach() {
 start_daemon ovn-controller --enable-dummy-vif-plug || return 1
 }

-# ovn_attach NETWORK BRIDGE IP [MASKLEN]
+# ovn_attach NETWORK BRIDGE IP [MASKLEN] [ENCAP]
 #
 # First, this command attaches BRIDGE to interconnection network NETWORK, just
 # like "net_attach NETWORK BRIDGE".  Second, it configures (simulated) IP
--
2.38.1
Diese E Mail enthält möglicherweise vertrauliche Inhalte und ist nur für die 
Verwertung durch den vorgesehenen Empfänger bestimmt. Sollten Sie nicht der 
vorgesehene Empfänger sein, setzen Sie den Absender bitte unverzüglich in 
Kenntnis und löschen diese E Mail. Hinweise zum Datenschutz finden Sie 
hier.
___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH ovn v3 1/4] logical-fields: add rarp fields

2022-11-04 Thread Felix Hüttner via dev
We need to be able to handle rarp fields in order to ensure we can
handle rarp messages we send ourselves.
This will be used by the next patch in the series.

Acked-by: Numan Siddique 
Signed-off-by: Felix Huettner 
---
 lib/logical-fields.c | 8 
 lib/ovn-util.c   | 2 +-
 ovn-sb.xml   | 2 ++
 3 files changed, 11 insertions(+), 1 deletion(-)

diff --git a/lib/logical-fields.c b/lib/logical-fields.c
index ed3ec62e1..fc131791e 100644
--- a/lib/logical-fields.c
+++ b/lib/logical-fields.c
@@ -261,6 +261,14 @@ ovn_init_symtab(struct shash *symtab)
 expr_symtab_add_field(symtab, "arp.tpa", MFF_ARP_TPA, "arp", false);
 expr_symtab_add_field(symtab, "arp.tha", MFF_ARP_THA, "arp", false);

+/* RARPs use the same layout as arp packets -> use the same field_id */
+expr_symtab_add_predicate(symtab, "rarp", "eth.type == 0x8035");
+expr_symtab_add_field(symtab, "rarp.op", MFF_ARP_OP, "rarp", false);
+expr_symtab_add_field(symtab, "rarp.spa", MFF_ARP_SPA, "rarp", false);
+expr_symtab_add_field(symtab, "rarp.sha", MFF_ARP_SHA, "rarp", false);
+expr_symtab_add_field(symtab, "rarp.tpa", MFF_ARP_TPA, "rarp", false);
+expr_symtab_add_field(symtab, "rarp.tha", MFF_ARP_THA, "rarp", false);
+
 expr_symtab_add_predicate(symtab, "nd",
   "icmp6.type == {135, 136} && icmp6.code == 0 && ip.ttl == 255");
 expr_symtab_add_predicate(symtab, "nd_ns",
diff --git a/lib/ovn-util.c b/lib/ovn-util.c
index 5dca72714..597625a29 100644
--- a/lib/ovn-util.c
+++ b/lib/ovn-util.c
@@ -817,7 +817,7 @@ ip_address_and_port_from_lb_key(const char *key, char 
**ip_address,
  *
  * This value is also used to handle some backward compatibility during
  * upgrading. It should never decrease or rewind. */
-#define OVN_INTERNAL_MINOR_VER 4
+#define OVN_INTERNAL_MINOR_VER 5

 /* Returns the OVN version. The caller must free the returned value. */
 char *
diff --git a/ovn-sb.xml b/ovn-sb.xml
index 315d60853..42e6fa3ee 100644
--- a/ovn-sb.xml
+++ b/ovn-sb.xml
@@ -1052,6 +1052,7 @@
 ip4.src ip4.dst
 ip6.src ip6.dst 
ip6.label
 arp.op arp.spa arp.tpa 
arp.sha arp.tha
+rarp.op rarp.spa rarp.tpa 
rarp.sha rarp.tha
 tcp.src tcp.dst 
tcp.flags
 udp.src udp.dst
 sctp.src sctp.dst
@@ -1115,6 +1116,7 @@
 ip.later_frag expands to ip.frag[1]
 ip.first_frag expands to ip.is_frag  
!ip.later_frag
 arp expands to eth.type == 0x806
+rarp expands to eth.type == 0x8035
 nd expands to icmp6.type == {135, 136} 
 icmp6.code == 0  ip.ttl == 255
 nd_ns expands to icmp6.type == 135  
icmp6.code == 0  ip.ttl == 255
 nd_na expands to icmp6.type == 136  
icmp6.code == 0  ip.ttl == 255
--
2.38.1
Diese E Mail enthält möglicherweise vertrauliche Inhalte und ist nur für die 
Verwertung durch den vorgesehenen Empfänger bestimmt. Sollten Sie nicht der 
vorgesehene Empfänger sein, setzen Sie den Absender bitte unverzüglich in 
Kenntnis und löschen diese E Mail. Hinweise zum Datenschutz finden Sie 
hier.
___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH ovn v3 0/4] Send Rarps for ipv6 router lsp

2022-11-04 Thread Felix Hüttner via dev
previously garps/rarps where only sent for "external" lsp's if these
had an ipv4 address attached. For lsp's on gateway routers that do
not have an ipv4 address assigned (e.g. if they are ipv6 only) no
rarps were send out.

This causes traffic outages when changing the priority of a gateway
chassis as the phyiscal switches to not get the information where the
mac address now resides. To fix this we send out rarps with just the mac
address of the interface and no ip address.

This change has been tested in an environment with 600 logical routers
on a single ipv6 external network.

Additionally we fix the issue that self-created rarp's are flooded
to logical routers even if this is unnecessary (causing ovs to potentially
drop the packet because of too many resubmits).

This change is also available as a PR at https://github.com/ovn-org/ovn/pull/157

Changes since v2:
- simplified the support of ipv6 in ovn_attach
Changes since v1:
- fix documentation
- remove unnecessary ddlog change

Felix Huettner (4):
  logical-fields: add rarp fields
  northd: handle own rarps like garps
  ovn-macros: support ipv6 in ovn_attach
  pinctrl: Send RARPs for external ipv6 interfaces

 controller/pinctrl.c| 23 +++
 lib/logical-fields.c|  8 
 lib/ovn-util.c  |  2 +-
 northd/northd.c | 11 +++---
 northd/ovn-northd.8.xml |  4 +-
 ovn-sb.xml  |  2 +
 tests/ovn-macros.at | 11 --
 tests/ovn-northd.at | 18 -
 tests/ovn.at| 88 ++---
 9 files changed, 142 insertions(+), 25 deletions(-)

--
2.38.1
Diese E Mail enthält möglicherweise vertrauliche Inhalte und ist nur für die 
Verwertung durch den vorgesehenen Empfänger bestimmt. Sollten Sie nicht der 
vorgesehene Empfänger sein, setzen Sie den Absender bitte unverzüglich in 
Kenntnis und löschen diese E Mail. Hinweise zum Datenschutz finden Sie 
hier.
___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH ovn branch-22.03 8/8] northd: Properly check the wakeup time in MAC banding aging

2022-11-04 Thread Ales Musil
We shouldn't run the MAC binding aging if the waker did not
expire. This wouldn't cause any issue if the bulk removal limit
is not configured. When the limit is configured and hit there
will be some MAC bindings expired, just waiting for the
bulk removal delay. If something wakes up the aging run earlier
it could remove those rows without respecting the delay.
Add check if we are past the next wake to prevent that.

Signed-off-by: Ales Musil 
Signed-off-by: Han Zhou 
---
 northd/mac-binding-aging.c | 7 ---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/northd/mac-binding-aging.c b/northd/mac-binding-aging.c
index 0196a116b..f65353a69 100644
--- a/northd/mac-binding-aging.c
+++ b/northd/mac-binding-aging.c
@@ -91,9 +91,12 @@ en_mac_binding_aging_run(struct engine_node *node, void 
*data OVS_UNUSED)
 {
 const struct engine_context *eng_ctx = engine_get_context();
 struct northd_data *northd_data = engine_get_input_data("northd", node);
+struct mac_binding_waker *waker =
+engine_get_input_data("mac_binding_aging_waker", node);
 
 if (!eng_ctx->ovnsb_idl_txn ||
-!northd_data->features.mac_binding_timestamp) {
+!northd_data->features.mac_binding_timestamp ||
+time_msec() < waker->next_wake_msec) {
 return;
 }
 
@@ -101,8 +104,6 @@ en_mac_binding_aging_run(struct engine_node *node, void 
*data OVS_UNUSED)
 int64_t now = time_wall_msec();
 uint32_t removal_limit = get_removal_limit(node);
 uint32_t removed_n = 0;
-struct mac_binding_waker *waker =
-engine_get_input_data("mac_binding_aging_waker", node);
 struct ovsdb_idl_index *sbrec_mac_binding_by_datapath =
 engine_ovsdb_node_get_index(engine_get_input("SB_mac_binding", node),
 "sbrec_mac_binding_by_datapath");
-- 
2.37.3

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH ovn branch-22.03 7/8] northd: Increase the MAC binding removal delay

2022-11-04 Thread Ales Musil
Reported-at: https://bugzilla.redhat.com/2084668
Signed-off-by: Ales Musil 
Acked-by: Dumitru Ceara 
Signed-off-by: Mark Michelson 
---
 northd/mac-binding-aging.c | 2 +-
 ovn-nb.xml | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/northd/mac-binding-aging.c b/northd/mac-binding-aging.c
index 4a2dfbbf8..0196a116b 100644
--- a/northd/mac-binding-aging.c
+++ b/northd/mac-binding-aging.c
@@ -28,7 +28,7 @@
 
 VLOG_DEFINE_THIS_MODULE(mac_binding_aging);
 
-#define MAC_BINDING_BULK_REMOVAL_DELAY_MSEC 10
+#define MAC_BINDING_BULK_REMOVAL_DELAY_MSEC 5000
 
 struct mac_binding_waker {
 bool should_schedule;
diff --git a/ovn-nb.xml b/ovn-nb.xml
index 29789c4b6..cc6f24887 100644
--- a/ovn-nb.xml
+++ b/ovn-nb.xml
@@ -167,7 +167,7 @@
 MAC binding aging bulk removal limit. This limits how many rows
 can expire in a single transaction. Default value is 0 which
 is unlimited. When we hit the limit next batch removal is delayed by
-10 ms.
+5 s.
   
 
   
-- 
2.37.3

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH ovn branch-22.03 6/8] northd, controller: Add MAC binding timestamp feature indication

2022-11-04 Thread Ales Musil
It can happen that northd and SB DB are updated before ovn-controller
in that case the new MAC binding would be added with timestamp=0.
In combination with enabled MAC binding aging, the affected rows
would be deleted over and over until the controller is upgraded.

To prevent the before mentioned issue add indication if
the controller supports MAC binding timestamps.

Signed-off-by: Ales Musil 
Acked-by: Dumitru Ceara 
---
 controller/chassis.c   |  7 +++
 include/ovn/features.h |  1 +
 northd/mac-binding-aging.c |  5 +++--
 northd/northd.c| 24 ++--
 northd/northd.h|  1 +
 5 files changed, 30 insertions(+), 8 deletions(-)

diff --git a/controller/chassis.c b/controller/chassis.c
index 92850fcc1..c6a1c50ae 100644
--- a/controller/chassis.c
+++ b/controller/chassis.c
@@ -351,6 +351,7 @@ chassis_build_other_config(const struct ovs_chassis_cfg 
*ovs_cfg,
  ovs_cfg->is_interconn ? "true" : "false");
 smap_replace(config, OVN_FEATURE_PORT_UP_NOTIF, "true");
 smap_replace(config, OVN_FEATURE_CT_NO_MASKED_LABEL, "true");
+smap_replace(config, OVN_FEATURE_MAC_BINDING_TIMESTAMP, "true");
 }
 
 /*
@@ -462,6 +463,12 @@ chassis_other_config_changed(const struct ovs_chassis_cfg 
*ovs_cfg,
 return true;
 }
 
+if (!smap_get_bool(_rec->other_config,
+   OVN_FEATURE_MAC_BINDING_TIMESTAMP,
+   false)) {
+return true;
+}
+
 return false;
 }
 
diff --git a/include/ovn/features.h b/include/ovn/features.h
index 8fbdbf19a..679f67457 100644
--- a/include/ovn/features.h
+++ b/include/ovn/features.h
@@ -23,6 +23,7 @@
 /* ovn-controller supported feature names. */
 #define OVN_FEATURE_PORT_UP_NOTIF  "port-up-notif"
 #define OVN_FEATURE_CT_NO_MASKED_LABEL "ct-no-masked-label"
+#define OVN_FEATURE_MAC_BINDING_TIMESTAMP "mac-binding-timestamp"
 
 /* OVS datapath supported features.  Based on availability OVN might generate
  * different types of openflows.
diff --git a/northd/mac-binding-aging.c b/northd/mac-binding-aging.c
index 36d0a6fd7..4a2dfbbf8 100644
--- a/northd/mac-binding-aging.c
+++ b/northd/mac-binding-aging.c
@@ -90,8 +90,10 @@ void
 en_mac_binding_aging_run(struct engine_node *node, void *data OVS_UNUSED)
 {
 const struct engine_context *eng_ctx = engine_get_context();
+struct northd_data *northd_data = engine_get_input_data("northd", node);
 
-if (!eng_ctx->ovnsb_idl_txn) {
+if (!eng_ctx->ovnsb_idl_txn ||
+!northd_data->features.mac_binding_timestamp) {
 return;
 }
 
@@ -99,7 +101,6 @@ en_mac_binding_aging_run(struct engine_node *node, void 
*data OVS_UNUSED)
 int64_t now = time_wall_msec();
 uint32_t removal_limit = get_removal_limit(node);
 uint32_t removed_n = 0;
-struct northd_data *northd_data = engine_get_input_data("northd", node);
 struct mac_binding_waker *waker =
 engine_get_input_data("mac_binding_aging_waker", node);
 struct ovsdb_idl_index *sbrec_mac_binding_by_datapath =
diff --git a/northd/northd.c b/northd/northd.c
index dd42498e2..44f607d3e 100644
--- a/northd/northd.c
+++ b/northd/northd.c
@@ -396,14 +396,23 @@ build_chassis_features(const struct northd_input 
*input_data,
 const struct sbrec_chassis *chassis;
 
 SBREC_CHASSIS_TABLE_FOR_EACH (chassis, input_data->sbrec_chassis) {
-if (!smap_get_bool(>other_config,
-   OVN_FEATURE_CT_NO_MASKED_LABEL,
-   false)) {
+bool ct_no_masked_label =
+smap_get_bool(>other_config,
+  OVN_FEATURE_CT_NO_MASKED_LABEL,
+  false);
+if (!ct_no_masked_label && chassis_features->ct_no_masked_label) {
 chassis_features->ct_no_masked_label = false;
-return;
+}
+
+bool mac_binding_timestamp =
+smap_get_bool(>other_config,
+  OVN_FEATURE_MAC_BINDING_TIMESTAMP,
+  false);
+if (!mac_binding_timestamp &&
+chassis_features->mac_binding_timestamp) {
+chassis_features->mac_binding_timestamp = false;
 }
 }
-chassis_features->ct_no_masked_label = true;
 }
 
 struct ovn_chassis_qdisc_queues {
@@ -15116,7 +15125,10 @@ northd_init(struct northd_data *data)
 hmap_init(>lbs);
 hmap_init(>bfd_connections);
 ovs_list_init(>lr_list);
-memset(>features, 0, sizeof data->features);
+data->features = (struct chassis_features) {
+.ct_no_masked_label = true,
+.mac_binding_timestamp = true,
+};
 data->ovn_internal_version_changed = false;
 }
 
diff --git a/northd/northd.h b/northd/northd.h
index c66023b4b..8dc11940e 100644
--- a/northd/northd.h
+++ b/northd/northd.h
@@ -59,6 +59,7 @@ struct northd_input {
 
 struct chassis_features {
 bool ct_no_masked_label;
+bool mac_binding_timestamp;
 };
 
 struct northd_data {
-- 
2.37.3


[ovs-dev] [PATCH ovn branch-22.03 3/8] northd: Move struct ovn_datapath and related structs to northd.h

2022-11-04 Thread Ales Musil
The struct ovn_datapath could not be used outside the northd.c
move it to northd.h that it can be used by other .c files later on.

Reported-at: https://bugzilla.redhat.com/2084668
Acked-by: Dumitru Ceara 
Acked-by: Mark Michelson 
Acked-by: Han Zhou 
Signed-off-by: Ales Musil 
---
 northd/northd.c | 153 --
 northd/northd.h | 157 
 2 files changed, 157 insertions(+), 153 deletions(-)

diff --git a/northd/northd.c b/northd/northd.c
index 0e8df6ae9..dd42498e2 100644
--- a/northd/northd.c
+++ b/northd/northd.c
@@ -503,74 +503,6 @@ port_has_qos_params(const struct smap *opts)
 }
 
 
-/*
- * Multicast snooping and querier per datapath configuration.
- */
-struct mcast_switch_info {
-
-bool enabled;   /* True if snooping enabled. */
-bool querier;   /* True if querier enabled. */
-bool flood_unregistered;/* True if unregistered multicast should be
- * flooded.
- */
-bool flood_relay;   /* True if the switch is connected to a
- * multicast router and unregistered multicast
- * should be flooded to the mrouter. Only
- * applicable if flood_unregistered == false.
- */
-bool flood_reports; /* True if the switch has at least one port
- * configured to flood reports.
- */
-bool flood_static;  /* True if the switch has at least one port
- * configured to flood traffic.
- */
-int64_t table_size; /* Max number of IP multicast groups. */
-int64_t idle_timeout;   /* Timeout after which an idle group is
- * flushed.
- */
-int64_t query_interval; /* Interval between multicast queries. */
-char *eth_src;  /* ETH src address of the queries. */
-char *ipv4_src; /* IPv4 src address of the queries. */
-char *ipv6_src; /* IPv6 src address of the queries. */
-
-int64_t query_max_response; /* Expected time after which reports should
- * be received for queries that were sent out.
- */
-
-atomic_uint64_t active_v4_flows;   /* Current number of active IPv4 
multicast
- * flows.
- */
-atomic_uint64_t active_v6_flows;   /* Current number of active IPv6 
multicast
- * flows.
- */
-};
-
-struct mcast_router_info {
-bool relay;/* True if the router should relay IP multicast. */
-bool flood_static; /* True if the router has at least one port configured
-* to flood traffic.
-*/
-};
-
-struct mcast_info {
-
-struct hmap group_tnlids;  /* Group tunnel IDs in use on this DP. */
-uint32_t group_tnlid_hint; /* Hint for allocating next group tunnel ID. */
-struct ovs_list groups;/* List of groups learnt on this DP. */
-
-union {
-struct mcast_switch_info sw;  /* Switch specific multicast info. */
-struct mcast_router_info rtr; /* Router specific multicast info. */
-};
-};
-
-struct mcast_port_info {
-bool flood; /* True if the port should flood IP multicast traffic
- * regardless if it's registered or not. */
-bool flood_reports; /* True if the port should flood IP multicast reports
- * (e.g., IGMP join/leave). */
-};
-
 static void
 init_mcast_port_info(struct mcast_port_info *mcast_info,
  const struct nbrec_logical_switch_port *nbsp,
@@ -600,91 +532,6 @@ ovn_mcast_group_allocate_key(struct mcast_info *mcast_info)
   _info->group_tnlid_hint);
 }
 
-/* The 'key' comes from nbs->header_.uuid or nbr->header_.uuid or
- * sb->external_ids:logical-switch. */
-struct ovn_datapath {
-struct hmap_node key_node;  /* Index on 'key'. */
-struct uuid key;/* (nbs/nbr)->header_.uuid. */
-
-const struct nbrec_logical_switch *nbs;  /* May be NULL. */
-const struct nbrec_logical_router *nbr;  /* May be NULL. */
-const struct sbrec_datapath_binding *sb; /* May be NULL. */
-
-struct ovs_list list;   /* In list of similar records. */
-
-uint32_t tunnel_key;
-
-/* Logical switch data. */
-struct ovn_port **router_ports;
-size_t n_router_ports;
-
-struct hmap port_tnlids;
-uint32_t port_key_hint;
-
-bool has_stateful_acl;
-bool has_lb_vip;
-bool has_unknown;
-bool has_acls;
-
-/* IPAM data. */
-struct ipam_info ipam_info;
-
-/* Multicast data. */
- 

[ovs-dev] [PATCH ovn branch-22.03 4/8] northd: Add MAC binding aging mechanism

2022-11-04 Thread Ales Musil
Add MAC binding aging mechanism, that utilizes
the timestamp column of MAC_Binding table.
When the MAC binding exceeds the threshold it is
removed from SB DB, this is postponed only in case
we receive update ARP with update to MAC address.

The threshold is configurable via option
"mac_binding_age_threshold" that can be specified
for each logical router. The option is defaulting to
0 which means that by default the aging is disabled
and the MAC binding rows will be persisted the same
way as before.

Reported-at: https://bugzilla.redhat.com/2084668
Acked-by: Dumitru Ceara 
Signed-off-by: Ales Musil 
---
 NEWS   |   3 +
 northd/automake.mk |   2 +
 northd/inc-proc-northd.c   |  15 
 northd/mac-binding-aging.c | 161 +
 northd/mac-binding-aging.h |  33 
 ovn-nb.xml |   7 ++
 tests/ovn.at   | 113 ++
 7 files changed, 334 insertions(+)
 create mode 100644 northd/mac-binding-aging.c
 create mode 100644 northd/mac-binding-aging.h

diff --git a/NEWS b/NEWS
index c1616124b..355074a30 100644
--- a/NEWS
+++ b/NEWS
@@ -5,6 +5,9 @@ OVN v22.03.2 - xx xxx 
   - Bump python version required for building OVN to 3.6.
   - Added support for setting the Next server IP in the DHCP header
 using the private DHCP option - 253 in native OVN DHCPv4 responder.
+  - Added MAC binding aging mechanism, that is disabled by default.
+It can be enabled per logical router with option
+"mac_binding_age_threshold".
 
 OVN v22.03.1 - 03 Jun 2022
 --
diff --git a/northd/automake.mk b/northd/automake.mk
index 4862ec7b7..81582867d 100644
--- a/northd/automake.mk
+++ b/northd/automake.mk
@@ -1,6 +1,8 @@
 # ovn-northd
 bin_PROGRAMS += northd/ovn-northd
 northd_ovn_northd_SOURCES = \
+   northd/mac-binding-aging.c \
+   northd/mac-binding-aging.h \
northd/northd.c \
northd/northd.h \
northd/ovn-northd.c \
diff --git a/northd/inc-proc-northd.c b/northd/inc-proc-northd.c
index af55221e3..e30b281b8 100644
--- a/northd/inc-proc-northd.c
+++ b/northd/inc-proc-northd.c
@@ -21,9 +21,11 @@
 #include "chassis-index.h"
 #include "ip-mcast-index.h"
 #include "lib/inc-proc-eng.h"
+#include "lib/mac-binding-index.h"
 #include "lib/ovn-nb-idl.h"
 #include "lib/ovn-sb-idl.h"
 #include "mcast-group-index.h"
+#include "northd/mac-binding-aging.h"
 #include "openvswitch/poll-loop.h"
 #include "openvswitch/vlog.h"
 #include "inc-proc-northd.h"
@@ -146,6 +148,8 @@ enum sb_engine_node {
  * avoid sparse errors. */
 static ENGINE_NODE(northd, "northd");
 static ENGINE_NODE(lflow, "lflow");
+static ENGINE_NODE(mac_binding_aging, "mac_binding_aging");
+static ENGINE_NODE(mac_binding_aging_waker, "mac_binding_aging_waker");
 
 void inc_proc_northd_init(struct ovsdb_idl_loop *nb,
   struct ovsdb_idl_loop *sb)
@@ -206,12 +210,18 @@ void inc_proc_northd_init(struct ovsdb_idl_loop *nb,
 engine_add_input(_northd, _sb_service_monitor, NULL);
 engine_add_input(_northd, _sb_load_balancer, NULL);
 engine_add_input(_northd, _sb_fdb, NULL);
+engine_add_input(_mac_binding_aging, _sb_mac_binding, NULL);
+engine_add_input(_mac_binding_aging, _northd, NULL);
+engine_add_input(_mac_binding_aging, _mac_binding_aging_waker, NULL);
 engine_add_input(_lflow, _nb_bfd, NULL);
 engine_add_input(_lflow, _sb_bfd, NULL);
 engine_add_input(_lflow, _sb_logical_flow, NULL);
 engine_add_input(_lflow, _sb_multicast_group, NULL);
 engine_add_input(_lflow, _sb_igmp_group, NULL);
 engine_add_input(_lflow, _northd, NULL);
+/* XXX: The "en_mac_binding_aging" should be separate "root" node
+ * once I-P engine allows multiple root nodes. */
+engine_add_input(_lflow, _mac_binding_aging, NULL);
 
 struct engine_arg engine_arg = {
 .nb_idl = nb->idl,
@@ -228,6 +238,8 @@ void inc_proc_northd_init(struct ovsdb_idl_loop *nb,
  ip_mcast_index_create(sb->idl);
 struct ovsdb_idl_index *sbrec_chassis_by_hostname =
 chassis_hostname_index_create(sb->idl);
+struct ovsdb_idl_index *sbrec_mac_binding_by_datapath
+= mac_binding_by_datapath_index_create(sb->idl);
 
 engine_init(_lflow, _arg);
 
@@ -246,6 +258,9 @@ void inc_proc_northd_init(struct ovsdb_idl_loop *nb,
 engine_ovsdb_node_add_index(_sb_ip_multicast,
 "sbrec_ip_mcast_by_dp",
 sbrec_ip_mcast_by_dp);
+engine_ovsdb_node_add_index(_sb_mac_binding,
+"sbrec_mac_binding_by_datapath",
+sbrec_mac_binding_by_datapath);
 }
 
 void inc_proc_northd_run(struct ovsdb_idl_txn *ovnnb_txn,
diff --git a/northd/mac-binding-aging.c b/northd/mac-binding-aging.c
new file mode 100644
index 0..3859c050b
--- /dev/null
+++ b/northd/mac-binding-aging.c
@@ -0,0 +1,161 @@
+/* Copyright (c) 2022, Red Hat, Inc.

[ovs-dev] [PATCH ovn branch-22.03 5/8] northd: Add config to limit bulk removal of MAC binding

2022-11-04 Thread Ales Musil
Add configuration option into NB global table
called "mac_binding_removal_limit" defaulting to 0.
This option allows to limit number of MAC bindings
that can be removed by the aging mechanism in a single
transaction. The 0 means that the mechanism is disabled.
If the limit is reached next removal will be delayed by
10 ms. This option when being set has a downside that
in theory we could never finish the removal, however in
practice it is unlikely considering that not all routers
will have aging enabled and the enabled will be with
reasonable threshold.

Reported-at: https://bugzilla.redhat.com/2084668
Acked-by: Mark Michelson 
Acked-by: Dumitru Ceara 
Signed-off-by: Ales Musil 
---
 northd/inc-proc-northd.c   |  1 +
 northd/mac-binding-aging.c | 33 +++--
 ovn-nb.xml |  8 
 3 files changed, 40 insertions(+), 2 deletions(-)

diff --git a/northd/inc-proc-northd.c b/northd/inc-proc-northd.c
index e30b281b8..582cb5af7 100644
--- a/northd/inc-proc-northd.c
+++ b/northd/inc-proc-northd.c
@@ -210,6 +210,7 @@ void inc_proc_northd_init(struct ovsdb_idl_loop *nb,
 engine_add_input(_northd, _sb_service_monitor, NULL);
 engine_add_input(_northd, _sb_load_balancer, NULL);
 engine_add_input(_northd, _sb_fdb, NULL);
+engine_add_input(_mac_binding_aging, _nb_nb_global, NULL);
 engine_add_input(_mac_binding_aging, _sb_mac_binding, NULL);
 engine_add_input(_mac_binding_aging, _northd, NULL);
 engine_add_input(_mac_binding_aging, _mac_binding_aging_waker, NULL);
diff --git a/northd/mac-binding-aging.c b/northd/mac-binding-aging.c
index 3859c050b..36d0a6fd7 100644
--- a/northd/mac-binding-aging.c
+++ b/northd/mac-binding-aging.c
@@ -28,6 +28,8 @@
 
 VLOG_DEFINE_THIS_MODULE(mac_binding_aging);
 
+#define MAC_BINDING_BULK_REMOVAL_DELAY_MSEC 10
+
 struct mac_binding_waker {
 bool should_schedule;
 long long next_wake_msec;
@@ -37,7 +39,8 @@ static void
 mac_binding_aging_run_for_datapath(const struct sbrec_datapath_binding *dp,
const struct nbrec_logical_router *nbr,
struct ovsdb_idl_index *mb_by_datapath,
-   int64_t now, int64_t *wake_delay)
+   int64_t now, int64_t *wake_delay,
+   uint32_t removal_limit, uint32_t *removed_n)
 {
 uint64_t threshold = smap_get_uint(>options,
"mac_binding_age_threshold",
@@ -58,6 +61,10 @@ mac_binding_aging_run_for_datapath(const struct 
sbrec_datapath_binding *dp,
 continue;
 } else if (elapsed >= threshold) {
 sbrec_mac_binding_delete(mb);
+(*removed_n)++;
+if (removal_limit && *removed_n == removal_limit) {
+break;
+}
 } else {
 *wake_delay = MIN(*wake_delay, threshold - elapsed);
 }
@@ -65,6 +72,20 @@ mac_binding_aging_run_for_datapath(const struct 
sbrec_datapath_binding *dp,
 sbrec_mac_binding_index_destroy_row(mb_index_row);
 }
 
+static uint32_t
+get_removal_limit(struct engine_node *node)
+{
+const struct nbrec_nb_global_table *nb_global_table =
+EN_OVSDB_GET(engine_get_input("NB_nb_global", node));
+const struct nbrec_nb_global *nb =
+nbrec_nb_global_table_first(nb_global_table);
+if (!nb) {
+   return 0;
+}
+
+return smap_get_uint(>options, "mac_binding_removal_limit", 0);
+}
+
 void
 en_mac_binding_aging_run(struct engine_node *node, void *data OVS_UNUSED)
 {
@@ -76,6 +97,8 @@ en_mac_binding_aging_run(struct engine_node *node, void *data 
OVS_UNUSED)
 
 int64_t next_expire_msec = INT64_MAX;
 int64_t now = time_wall_msec();
+uint32_t removal_limit = get_removal_limit(node);
+uint32_t removed_n = 0;
 struct northd_data *northd_data = engine_get_input_data("northd", node);
 struct mac_binding_waker *waker =
 engine_get_input_data("mac_binding_aging_waker", node);
@@ -88,7 +111,13 @@ en_mac_binding_aging_run(struct engine_node *node, void 
*data OVS_UNUSED)
 if (od->sb && od->nbr) {
 mac_binding_aging_run_for_datapath(od->sb, od->nbr,
sbrec_mac_binding_by_datapath,
-   now, _expire_msec);
+   now, _expire_msec,
+   removal_limit, _n);
+if (removal_limit && removed_n == removal_limit) {
+/* Schedule the next run after specified delay. */
+next_expire_msec = MAC_BINDING_BULK_REMOVAL_DELAY_MSEC;
+break;
+}
 }
 }
 
diff --git a/ovn-nb.xml b/ovn-nb.xml
index 4cc97c368..29789c4b6 100644
--- a/ovn-nb.xml
+++ b/ovn-nb.xml
@@ -162,6 +162,14 @@
 dynamically assigned, e.g. 00:11:22
   
 
+  
+MAC binding aging bulk 

[ovs-dev] [PATCH ovn branch-22.03 2/8] controller: Add mac-binding-index.c/.h files

2022-11-04 Thread Ales Musil
Add helper source file for creating index
over MAC binding table.

Reported-at: https://bugzilla.redhat.com/2084668
Acked-by: Dumitru Ceara 
Acked-by: Mark Michelson 
Acked-by: Han Zhou 
Signed-off-by: Ales Musil 
---
 controller/ovn-controller.c |  8 +++-
 lib/automake.mk |  2 ++
 lib/mac-binding-index.c | 33 +
 lib/mac-binding-index.h | 26 ++
 4 files changed, 64 insertions(+), 5 deletions(-)
 create mode 100644 lib/mac-binding-index.c
 create mode 100644 lib/mac-binding-index.h

diff --git a/controller/ovn-controller.c b/controller/ovn-controller.c
index 246ca8435..cd7c60b16 100644
--- a/controller/ovn-controller.c
+++ b/controller/ovn-controller.c
@@ -53,6 +53,7 @@
 #include "lib/chassis-index.h"
 #include "lib/extend-table.h"
 #include "lib/ip-mcast-index.h"
+#include "lib/mac-binding-index.h"
 #include "lib/mcast-group-index.h"
 #include "lib/ovn-sb-idl.h"
 #include "lib/ovn-util.h"
@@ -3363,9 +3364,7 @@ main(int argc, char *argv[])
 = ovsdb_idl_index_create1(ovnsb_idl_loop.idl,
   _datapath_binding_col_tunnel_key);
 struct ovsdb_idl_index *sbrec_mac_binding_by_lport_ip
-= ovsdb_idl_index_create2(ovnsb_idl_loop.idl,
-  _mac_binding_col_logical_port,
-  _mac_binding_col_ip);
+= mac_binding_by_lport_ip_index_create(ovnsb_idl_loop.idl);
 struct ovsdb_idl_index *sbrec_ip_multicast
 = ip_mcast_index_create(ovnsb_idl_loop.idl);
 struct ovsdb_idl_index *sbrec_igmp_group
@@ -3378,8 +3377,7 @@ main(int argc, char *argv[])
   _fdb_col_mac,
   _fdb_col_dp_key);
 struct ovsdb_idl_index *sbrec_mac_binding_by_datapath
-= ovsdb_idl_index_create1(ovnsb_idl_loop.idl,
-  _mac_binding_col_datapath);
+= mac_binding_by_datapath_index_create(ovnsb_idl_loop.idl);
 
 ovsdb_idl_track_add_all(ovnsb_idl_loop.idl);
 ovsdb_idl_omit_alert(ovnsb_idl_loop.idl,
diff --git a/lib/automake.mk b/lib/automake.mk
index 829aedfc5..387d392a2 100644
--- a/lib/automake.mk
+++ b/lib/automake.mk
@@ -26,6 +26,8 @@ lib_libovn_la_SOURCES = \
lib/ovn-parallel-hmap.c \
lib/ip-mcast-index.c \
lib/ip-mcast-index.h \
+   lib/mac-binding-index.c \
+   lib/mac-binding-index.h \
lib/mcast-group-index.c \
lib/mcast-group-index.h \
lib/lex.c \
diff --git a/lib/mac-binding-index.c b/lib/mac-binding-index.c
new file mode 100644
index 0..d774f12d0
--- /dev/null
+++ b/lib/mac-binding-index.c
@@ -0,0 +1,33 @@
+/* Copyright (c) 2022, Red Hat, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include 
+
+#include "lib/mac-binding-index.h"
+#include "lib/ovn-sb-idl.h"
+
+struct ovsdb_idl_index *
+mac_binding_by_datapath_index_create(struct ovsdb_idl *idl)
+{
+return ovsdb_idl_index_create1(idl, _mac_binding_col_datapath);
+}
+
+struct ovsdb_idl_index *
+mac_binding_by_lport_ip_index_create(struct ovsdb_idl *idl)
+{
+return ovsdb_idl_index_create2(idl,
+   _mac_binding_col_logical_port,
+   _mac_binding_col_ip);
+}
diff --git a/lib/mac-binding-index.h b/lib/mac-binding-index.h
new file mode 100644
index 0..8e977ecea
--- /dev/null
+++ b/lib/mac-binding-index.h
@@ -0,0 +1,26 @@
+/* Copyright (c) 2022, Red Hat, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef OVN_MAC_BINDING_INDEX_H
+#define OVN_MAC_BINDING_INDEX_H 1
+
+#include "lib/ovn-sb-idl.h"
+
+struct ovsdb_idl_index *mac_binding_by_datapath_index_create(
+struct ovsdb_idl *idl);
+struct ovsdb_idl_index *mac_binding_by_lport_ip_index_create(
+struct ovsdb_idl *idl);
+
+#endif /* lib/mac-binding-index.h */
-- 
2.37.3

___
dev 

[ovs-dev] [PATCH ovn branch-22.03 1/8] northd, controller: Add timestamp column to MAC_Binding table

2022-11-04 Thread Ales Musil
The new timestamp column in MAC_Binding is
populated with current time whenever the row is
created or the MAC address is updated.
This can be utilized by MAC binding aging mechanism,
when we can check if enough time has passed since the
creation/update.

Reported-at: https://bugzilla.redhat.com/2084668
Acked-By: Ihar Hrachyshka 
Acked-by: Dumitru Ceara 
Acked-by: Mark Michelson 
Acked-by: Han Zhou 
Signed-off-by: Ales Musil 
---
 controller/pinctrl.c | 2 ++
 northd/ovn-northd.c  | 2 +-
 ovn-sb.ovsschema | 5 +++--
 ovn-sb.xml   | 6 ++
 4 files changed, 12 insertions(+), 3 deletions(-)

diff --git a/controller/pinctrl.c b/controller/pinctrl.c
index 2f930c33c..40ed7747b 100644
--- a/controller/pinctrl.c
+++ b/controller/pinctrl.c
@@ -4190,8 +4190,10 @@ mac_binding_add_to_sb(struct ovsdb_idl_txn 
*ovnsb_idl_txn,
 sbrec_mac_binding_set_ip(b, ip);
 sbrec_mac_binding_set_mac(b, mac_string);
 sbrec_mac_binding_set_datapath(b, dp);
+sbrec_mac_binding_set_timestamp(b, time_wall_msec());
 } else if (strcmp(b->mac, mac_string)) {
 sbrec_mac_binding_set_mac(b, mac_string);
+sbrec_mac_binding_set_timestamp(b, time_wall_msec());
 }
 }
 
diff --git a/northd/ovn-northd.c b/northd/ovn-northd.c
index 4d2baefe4..6256bc1bb 100644
--- a/northd/ovn-northd.c
+++ b/northd/ovn-northd.c
@@ -105,7 +105,7 @@ static const char *rbac_port_binding_update[] =
 static const char *rbac_mac_binding_auth[] =
 {""};
 static const char *rbac_mac_binding_update[] =
-{"logical_port", "ip", "mac", "datapath"};
+{"logical_port", "ip", "mac", "datapath", "timestamp"};
 
 static const char *rbac_svc_monitor_auth[] =
 {""};
diff --git a/ovn-sb.ovsschema b/ovn-sb.ovsschema
index 122614dd5..c2b86e4f2 100644
--- a/ovn-sb.ovsschema
+++ b/ovn-sb.ovsschema
@@ -1,7 +1,7 @@
 {
 "name": "OVN_Southbound",
-"version": "20.21.0",
-"cksum": "2362446865 26963",
+"version": "20.22.0",
+"cksum": "2915287929 27022",
 "tables": {
 "SB_Global": {
 "columns": {
@@ -244,6 +244,7 @@
 "logical_port": {"type": "string"},
 "ip": {"type": "string"},
 "mac": {"type": "string"},
+"timestamp": {"type": {"key": "integer"}},
 "datapath": {"type": {"key": {"type": "uuid",
   "refTable": 
"Datapath_Binding",
 "indexes": [["logical_port", "ip"]],
diff --git a/ovn-sb.xml b/ovn-sb.xml
index d36f55236..8c30eb8db 100644
--- a/ovn-sb.xml
+++ b/ovn-sb.xml
@@ -3494,6 +3494,12 @@ tcp.flags = RST;
 
   The Ethernet address to which the IP is bound.
 
+
+
+  The timestamp in msec when the MAC binding was added or updated.
+  Records that existed before this column will have 0.
+
+
 
   The logical datapath to which the logical port belongs.
 
-- 
2.37.3

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [PATCH ovn v2 2/2] northd: Allow related traffic through LB

2022-11-04 Thread 0-day Robot
Bleep bloop.  Greetings Ales Musil, I am a robot and I have tried out your 
patch.
Thanks for your contribution.

I encountered some error that I wasn't expecting.  See the details below.


checkpatch:
WARNING: Line is 89 characters long (recommended limit is 79)
#108 FILE: northd/ovn-northd.8.xml:3211:
  ct.new  !ct.rel  ip  reg0 == 
VIP

WARNING: Line is 90 characters long (recommended limit is 79)
#117 FILE: northd/ovn-northd.8.xml:3243:
  ct.est  !ct.rel  ip4  reg0 == 
VIP

Lines checked: 818, Warnings: 2, Errors: 0


Please check this out.  If you feel there has been an error, please email 
acon...@redhat.com

Thanks,
0-day Robot
___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [PATCH ovn branch-22.06 v2 8/8] northd: Properly check the wakeup time in MAC banding aging

2022-11-04 Thread 0-day Robot
Bleep bloop.  Greetings Ales Musil, I am a robot and I have tried out your 
patch.
Thanks for your contribution.

I encountered some error that I wasn't expecting.  See the details below.


checkpatch:
WARNING: Unexpected sign-offs from developers who are not authors or co-authors 
or committers: Han Zhou 
Lines checked: 50, Warnings: 1, Errors: 0


Please check this out.  If you feel there has been an error, please email 
acon...@redhat.com

Thanks,
0-day Robot
___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [PATCH ovn branch-22.06 v2 7/8] northd: Increase the MAC binding removal delay

2022-11-04 Thread 0-day Robot
Bleep bloop.  Greetings Ales Musil, I am a robot and I have tried out your 
patch.
Thanks for your contribution.

I encountered some error that I wasn't expecting.  See the details below.


checkpatch:
WARNING: Unexpected sign-offs from developers who are not authors or co-authors 
or committers: Mark Michelson 
Lines checked: 44, Warnings: 1, Errors: 0


Please check this out.  If you feel there has been an error, please email 
acon...@redhat.com

Thanks,
0-day Robot
___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [PATCH ovn branch-22.06 v2 5/8] northd: Add config to limit bulk removal of MAC binding

2022-11-04 Thread 0-day Robot
Bleep bloop.  Greetings Ales Musil, I am a robot and I have tried out your 
patch.
Thanks for your contribution.

I encountered some error that I wasn't expecting.  See the details below.


checkpatch:
WARNING: Line is 84 characters long (recommended limit is 79)
#128 FILE: ovn-nb.xml:166:
  type='{"type": "integer", "minInteger": 0, "maxInteger": 
4294967295}'>

Lines checked: 141, Warnings: 1, Errors: 0


Please check this out.  If you feel there has been an error, please email 
acon...@redhat.com

Thanks,
0-day Robot
___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [PATCH ovn branch-22.06 v2 4/8] northd: Add MAC binding aging mechanism

2022-11-04 Thread 0-day Robot
Bleep bloop.  Greetings Ales Musil, I am a robot and I have tried out your 
patch.
Thanks for your contribution.

I encountered some error that I wasn't expecting.  See the details below.


checkpatch:
WARNING: Comment with 'xxx' marker
#99 FILE: northd/inc-proc-northd.c:227:
/* XXX: The "en_mac_binding_aging" should be separate "root" node

WARNING: Line is 84 characters long (recommended limit is 79)
#340 FILE: ovn-nb.xml:2397:
  type='{"type": "integer", "minInteger": 0, "maxInteger": 
4294967295}'>

Lines checked: 472, Warnings: 2, Errors: 0


Please check this out.  If you feel there has been an error, please email 
acon...@redhat.com

Thanks,
0-day Robot
___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [PATCH ovn v2 1/8] northd, controller: Add timestamp column to MAC_Binding table

2022-11-04 Thread 0-day Robot
Bleep bloop.  Greetings Ales Musil, I am a robot and I have tried out your 
patch.
Thanks for your contribution.

I encountered some error that I wasn't expecting.  See the details below.


git-am:
error: Failed to merge in the changes.
hint: Use 'git am --show-current-patch=diff' to see the failed patch
Patch failed at 0001 northd, controller: Add timestamp column to MAC_Binding 
table
When you have resolved this problem, run "git am --continue".
If you prefer to skip this patch, run "git am --skip" instead.
To restore the original branch and stop patching, run "git am --abort".


Please check this out.  If you feel there has been an error, please email 
acon...@redhat.com

Thanks,
0-day Robot
___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH ovn v2 2/2] northd: Allow related traffic through LB

2022-11-04 Thread Ales Musil
In order to allow realted traffic use the
new action ct_commit_nat, which ensures that
the traffic is commited and NATted. In combination
with match on ct.rel it allows the related traffic
to go through with correct NAT being applied.

Reported-at: https://bugzilla.redhat.com/2126083
Signed-off-by: Ales Musil 
---
v2: Add e2e test case.
---
 northd/northd.c |  29 --
 northd/ovn-northd.8.xml |  19 +++-
 tests/ovn-northd.at | 208 +---
 tests/ovn.at|  10 +-
 tests/system-ovn.at | 128 +
 5 files changed, 279 insertions(+), 115 deletions(-)

diff --git a/northd/northd.c b/northd/northd.c
index b7388afc5..e188e4f6a 100644
--- a/northd/northd.c
+++ b/northd/northd.c
@@ -6686,7 +6686,8 @@ build_acls(struct ovn_datapath *od, const struct 
chassis_features *features,
 /* Ingress and Egress ACL Table (Priority 65535).
  *
  * Allow traffic that is related to an existing conntrack entry that
- * has not been marked for deletion (ct_mark.blocked).
+ * has not been marked for deletion (ct_mark.blocked). At the same
+ * time apply NAT on this traffic.
  *
  * This is enforced at a higher priority than ACLs can be defined.
  *
@@ -6699,9 +6700,9 @@ build_acls(struct ovn_datapath *od, const struct 
chassis_features *features,
   use_ct_inv_match ? " && !ct.inv" : "",
   ct_blocked_match);
 ovn_lflow_add(lflows, od, S_SWITCH_IN_ACL, UINT16_MAX - 3,
-  ds_cstr(), "next;");
+  ds_cstr(), "ct_commit_nat;");
 ovn_lflow_add(lflows, od, S_SWITCH_OUT_ACL, UINT16_MAX - 3,
-  ds_cstr(), "next;");
+  ds_cstr(), "ct_commit_nat;");
 
 /* Ingress and Egress ACL Table (Priority 65532).
  *
@@ -10006,16 +10007,16 @@ build_lrouter_nat_flows_for_lb(struct ovn_lb_vip 
*lb_vip,
 int prio = 110;
 if (lb_vip->vip_port) {
 prio = 120;
-new_match = xasprintf("ct.new && %s && %s && "
+new_match = xasprintf("ct.new && !ct.rel && %s && %s && "
   REG_ORIG_TP_DPORT_ROUTER" == %d",
   ds_cstr(match), lb->proto, lb_vip->vip_port);
-est_match = xasprintf("ct.est && %s && %s && "
+est_match = xasprintf("ct.est && !ct.rel && %s && %s && "
   REG_ORIG_TP_DPORT_ROUTER" == %d && %s == 1",
   ds_cstr(match), lb->proto, lb_vip->vip_port,
   ct_natted);
 } else {
-new_match = xasprintf("ct.new && %s", ds_cstr(match));
-est_match = xasprintf("ct.est && %s && %s == 1",
+new_match = xasprintf("ct.new && !ct.rel && %s", ds_cstr(match));
+est_match = xasprintf("ct.est && !ct.rel && %s && %s == 1",
   ds_cstr(match), ct_natted);
 }
 
@@ -13664,6 +13665,20 @@ build_lrouter_nat_defrag_and_lb(struct ovn_datapath 
*od, struct hmap *lflows,
 ovn_lflow_add(lflows, od, S_ROUTER_OUT_EGR_LOOP, 0, "1", "next;");
 ovn_lflow_add(lflows, od, S_ROUTER_IN_ECMP_STATEFUL, 0, "1", "next;");
 
+/* Ingress DNAT Table (Priority 50).
+ *
+ * Allow traffic that is related to an existing conntrack entry.
+ * At the same time apply NAT for this traffic.
+ *
+ * NOTE: This does not support related data sessions (eg,
+ * a dynamically negotiated FTP data channel), but will allow
+ * related traffic such as an ICMP Port Unreachable through
+ * that's generated from a non-listening UDP port.  */
+if (od->has_lb_vip) {
+ovn_lflow_add(lflows, od, S_ROUTER_IN_DNAT, 50,
+  "ct.rel && !ct.est && !ct.new", "ct_commit_nat;");
+}
+
 /* If the router has load balancer or DNAT rules, re-circulate every packet
  * through the DNAT zone so that packets that need to be unDNATed in the
  * reverse direction get unDNATed.
diff --git a/northd/ovn-northd.8.xml b/northd/ovn-northd.8.xml
index a70f2e678..175db6130 100644
--- a/northd/ovn-northd.8.xml
+++ b/northd/ovn-northd.8.xml
@@ -766,6 +766,8 @@
 related to a committed flow in the connection tracker (e.g., an
 ICMP Port Unreachable from a non-listening UDP port), as long
 as the committed flow does not have ct_mark.blocked set.
+This flow also applies NAT to the related traffic so that ICMP headers
+and the inner packet have correct addresses.
 If ACL logging and logging of related packets is enabled, then a
 companion priority-65533 flow will be installed that accomplishes the
 same thing but also logs the traffic.
@@ -3206,7 +3208,7 @@ icmp6 {
   Router with gateway port in OVN_Northbound database that
   includes a L4 port PORT of protocol P and IPv4
   or IPv6 address VIP, a priority-120 flow that matches on
-  

[ovs-dev] [PATCH ovn v2 1/2] actions: Add new action called ct_commit_nat

2022-11-04 Thread Ales Musil
Add action called ct_commit_nat, that performs
NAT while committing the connection. This is
useful for related traffic on which we need
to perform NAT, mainly ICMP. We need to
commit due to design decision of OvS[0]:

"Connections identified as rel are separate from
the originating connection and must be committed separately."

[0] http://www.openvswitch.org/support/dist-docs/ovs-fields.7.txt

Reported-at: https://bugzilla.redhat.com/2126083
Signed-off-by: Ales Musil 
---
 include/ovn/actions.h |  3 +++
 lib/actions.c | 42 +-
 ovn-sb.xml| 12 
 tests/ovn.at  |  5 +
 utilities/ovn-trace.c | 34 ++
 5 files changed, 95 insertions(+), 1 deletion(-)

diff --git a/include/ovn/actions.h b/include/ovn/actions.h
index d7ee84dac..5814a34aa 100644
--- a/include/ovn/actions.h
+++ b/include/ovn/actions.h
@@ -74,6 +74,7 @@ struct ovn_extend_table;
 OVNACT(CT_LB_MARK,ovnact_ct_lb)   \
 OVNACT(SELECT,ovnact_select)  \
 OVNACT(CT_CLEAR,  ovnact_null)\
+OVNACT(CT_COMMIT_NAT, ovnact_ct_nat)  \
 OVNACT(CLONE, ovnact_nest)\
 OVNACT(ARP,   ovnact_nest)\
 OVNACT(ICMP4, ovnact_nest)\
@@ -275,6 +276,8 @@ struct ovnact_ct_nat {
uint16_t port_hi;
 } port_range;
 
+bool commit;/* Explicit commit action. */
+
 uint8_t ltable; /* Logical table ID of next table. */
 };
 
diff --git a/lib/actions.c b/lib/actions.c
index adbb42db4..91bbabc0e 100644
--- a/lib/actions.c
+++ b/lib/actions.c
@@ -920,6 +920,7 @@ parse_ct_nat(struct action_context *ctx, const char *name,
 return;
 }
 cn->ltable = ctx->pp->cur_ltable + 1;
+cn->commit = false;
 
 if (lexer_match(ctx->lexer, LEX_T_LPAREN)) {
 if (ctx->lexer->token.type != LEX_T_INTEGER
@@ -929,9 +930,11 @@ parse_ct_nat(struct action_context *ctx, const char *name,
 return;
 }
 if (ctx->lexer->token.format == LEX_F_IPV4) {
+cn->commit = true;
 cn->family = AF_INET;
 cn->ipv4 = ctx->lexer->token.value.ipv4;
 } else if (ctx->lexer->token.format == LEX_F_IPV6) {
+cn->commit = true;
 cn->family = AF_INET6;
 cn->ipv6 = ctx->lexer->token.value.ipv6;
 }
@@ -1004,6 +1007,24 @@ parse_CT_SNAT_IN_CZONE(struct action_context *ctx)
  ovnact_put_CT_SNAT_IN_CZONE(ctx->ovnacts));
 }
 
+static void
+parse_CT_COMMIT_NAT(struct action_context *ctx)
+{
+add_prerequisite(ctx, "ip");
+
+if (ctx->pp->cur_ltable >= ctx->pp->n_tables) {
+lexer_error(ctx->lexer,
+"\"ct_commit_related\" action not allowed in last table.");
+return;
+}
+
+struct ovnact_ct_nat *cn = ovnact_put_CT_COMMIT_NAT(ctx->ovnacts);
+cn->commit = true;
+cn->ltable = ctx->pp->cur_ltable + 1;
+cn->family = AF_UNSPEC;
+cn->port_range.exists = false;
+}
+
 static void
 format_ct_nat(const struct ovnact_ct_nat *cn, const char *name, struct ds *s)
 {
@@ -1053,6 +1074,12 @@ format_CT_SNAT_IN_CZONE(const struct ovnact_ct_nat *cn, 
struct ds *s)
 format_ct_nat(cn, "ct_snat_in_czone", s);
 }
 
+static void
+format_CT_COMMIT_NAT(const struct ovnact_ct_nat *cn OVS_UNUSED, struct ds *s)
+{
+ds_put_cstr(s, "ct_commit_nat;");
+}
+
 static void
 encode_ct_nat(const struct ovnact_ct_nat *cn,
   const struct ovnact_encode_params *ep,
@@ -1104,7 +1131,7 @@ encode_ct_nat(const struct ovnact_ct_nat *cn,
 
 ofpacts->header = ofpbuf_push_uninit(ofpacts, nat_offset);
 ct = ofpacts->header;
-if (cn->family == AF_INET || cn->family == AF_INET6) {
+if (cn->commit) {
 ct->flags |= NX_CT_F_COMMIT;
 }
 ofpact_finish(ofpacts, >ofpact);
@@ -1143,6 +1170,17 @@ encode_CT_SNAT_IN_CZONE(const struct ovnact_ct_nat *cn,
 encode_ct_nat(cn, ep, true, ep->common_nat_ct_zone, ofpacts);
 }
 
+static void
+encode_CT_COMMIT_NAT(const struct ovnact_ct_nat *cn,
+ const struct ovnact_encode_params *ep,
+ struct ofpbuf *ofpacts)
+{
+enum mf_field_id zone = ep->is_switch
+? MFF_LOG_CT_ZONE
+: MFF_LOG_DNAT_ZONE;
+encode_ct_nat(cn, ep, false, zone, ofpacts);
+}
+
 static void
 ovnact_ct_nat_free(struct ovnact_ct_nat *ct_nat OVS_UNUSED)
 {
@@ -4732,6 +4770,8 @@ parse_action(struct action_context *ctx)
 parse_ct_lb_action(ctx, true);
 } else if (lexer_match_id(ctx->lexer, "ct_clear")) {
 ovnact_put_CT_CLEAR(ctx->ovnacts);
+} else if (lexer_match_id(ctx->lexer, "ct_commit_nat")) {
+parse_CT_COMMIT_NAT(ctx);
 } else if (lexer_match_id(ctx->lexer, "clone")) {
 parse_CLONE(ctx);
 } else if (lexer_match_id(ctx->lexer, "arp")) {
diff --git 

[ovs-dev] [PATCH ovn v2 0/2] Allow related traffic for LB

2022-11-04 Thread Ales Musil
The related traffic wasn't correctly forwarded
through the LB, the main issue was that the
traffic was not NATted. This series allows
the NAT to be applied and the traffic should
arrive with correct addresses.
---
v2: Add e2e test case.

Ales Musil (2):
  actions: Add new action called ct_commit_nat
  northd: Allow related traffic through LB

 include/ovn/actions.h   |   3 +
 lib/actions.c   |  42 +++-
 northd/northd.c |  29 --
 northd/ovn-northd.8.xml |  19 +++-
 ovn-sb.xml  |  12 +++
 tests/ovn-northd.at | 208 +---
 tests/ovn.at|  15 ++-
 tests/system-ovn.at | 128 +
 utilities/ovn-trace.c   |  34 +++
 9 files changed, 374 insertions(+), 116 deletions(-)

-- 
2.37.3

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH ovn branch-22.06 v2 8/8] northd: Properly check the wakeup time in MAC banding aging

2022-11-04 Thread Ales Musil
We shouldn't run the MAC binding aging if the waker did not
expire. This wouldn't cause any issue if the bulk removal limit
is not configured. When the limit is configured and hit there
will be some MAC bindings expired, just waiting for the
bulk removal delay. If something wakes up the aging run earlier
it could remove those rows without respecting the delay.
Add check if we are past the next wake to prevent that.

Signed-off-by: Ales Musil 
Signed-off-by: Han Zhou 
---
 northd/mac-binding-aging.c | 7 ---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/northd/mac-binding-aging.c b/northd/mac-binding-aging.c
index 0196a116b..f65353a69 100644
--- a/northd/mac-binding-aging.c
+++ b/northd/mac-binding-aging.c
@@ -91,9 +91,12 @@ en_mac_binding_aging_run(struct engine_node *node, void 
*data OVS_UNUSED)
 {
 const struct engine_context *eng_ctx = engine_get_context();
 struct northd_data *northd_data = engine_get_input_data("northd", node);
+struct mac_binding_waker *waker =
+engine_get_input_data("mac_binding_aging_waker", node);
 
 if (!eng_ctx->ovnsb_idl_txn ||
-!northd_data->features.mac_binding_timestamp) {
+!northd_data->features.mac_binding_timestamp ||
+time_msec() < waker->next_wake_msec) {
 return;
 }
 
@@ -101,8 +104,6 @@ en_mac_binding_aging_run(struct engine_node *node, void 
*data OVS_UNUSED)
 int64_t now = time_wall_msec();
 uint32_t removal_limit = get_removal_limit(node);
 uint32_t removed_n = 0;
-struct mac_binding_waker *waker =
-engine_get_input_data("mac_binding_aging_waker", node);
 struct ovsdb_idl_index *sbrec_mac_binding_by_datapath =
 engine_ovsdb_node_get_index(engine_get_input("SB_mac_binding", node),
 "sbrec_mac_binding_by_datapath");
-- 
2.37.3

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH ovn branch-22.06 v2 7/8] northd: Increase the MAC binding removal delay

2022-11-04 Thread Ales Musil
Reported-at: https://bugzilla.redhat.com/2084668
Signed-off-by: Ales Musil 
Acked-by: Dumitru Ceara 
Signed-off-by: Mark Michelson 
---
 northd/mac-binding-aging.c | 2 +-
 ovn-nb.xml | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/northd/mac-binding-aging.c b/northd/mac-binding-aging.c
index 4a2dfbbf8..0196a116b 100644
--- a/northd/mac-binding-aging.c
+++ b/northd/mac-binding-aging.c
@@ -28,7 +28,7 @@
 
 VLOG_DEFINE_THIS_MODULE(mac_binding_aging);
 
-#define MAC_BINDING_BULK_REMOVAL_DELAY_MSEC 10
+#define MAC_BINDING_BULK_REMOVAL_DELAY_MSEC 5000
 
 struct mac_binding_waker {
 bool should_schedule;
diff --git a/ovn-nb.xml b/ovn-nb.xml
index 2747b0a7d..8aa125093 100644
--- a/ovn-nb.xml
+++ b/ovn-nb.xml
@@ -167,7 +167,7 @@
 MAC binding aging bulk removal limit. This limits how many rows
 can expire in a single transaction. Default value is 0 which
 is unlimited. When we hit the limit next batch removal is delayed by
-10 ms.
+5 s.
   
 
   
-- 
2.37.3

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH ovn branch-22.06 v2 6/8] northd, controller: Add MAC binding timestamp feature indication

2022-11-04 Thread Ales Musil
It can happen that northd and SB DB are updated before ovn-controller
in that case the new MAC binding would be added with timestamp=0.
In combination with enabled MAC binding aging, the affected rows
would be deleted over and over until the controller is upgraded.

To prevent the before mentioned issue add indication if
the controller supports MAC binding timestamps.

Signed-off-by: Ales Musil 
Acked-by: Dumitru Ceara 
---
 controller/chassis.c   |  7 +++
 include/ovn/features.h |  1 +
 northd/mac-binding-aging.c |  5 +++--
 northd/northd.c| 24 ++--
 northd/northd.h|  1 +
 5 files changed, 30 insertions(+), 8 deletions(-)

diff --git a/controller/chassis.c b/controller/chassis.c
index 92850fcc1..c6a1c50ae 100644
--- a/controller/chassis.c
+++ b/controller/chassis.c
@@ -351,6 +351,7 @@ chassis_build_other_config(const struct ovs_chassis_cfg 
*ovs_cfg,
  ovs_cfg->is_interconn ? "true" : "false");
 smap_replace(config, OVN_FEATURE_PORT_UP_NOTIF, "true");
 smap_replace(config, OVN_FEATURE_CT_NO_MASKED_LABEL, "true");
+smap_replace(config, OVN_FEATURE_MAC_BINDING_TIMESTAMP, "true");
 }
 
 /*
@@ -462,6 +463,12 @@ chassis_other_config_changed(const struct ovs_chassis_cfg 
*ovs_cfg,
 return true;
 }
 
+if (!smap_get_bool(_rec->other_config,
+   OVN_FEATURE_MAC_BINDING_TIMESTAMP,
+   false)) {
+return true;
+}
+
 return false;
 }
 
diff --git a/include/ovn/features.h b/include/ovn/features.h
index 8fbdbf19a..679f67457 100644
--- a/include/ovn/features.h
+++ b/include/ovn/features.h
@@ -23,6 +23,7 @@
 /* ovn-controller supported feature names. */
 #define OVN_FEATURE_PORT_UP_NOTIF  "port-up-notif"
 #define OVN_FEATURE_CT_NO_MASKED_LABEL "ct-no-masked-label"
+#define OVN_FEATURE_MAC_BINDING_TIMESTAMP "mac-binding-timestamp"
 
 /* OVS datapath supported features.  Based on availability OVN might generate
  * different types of openflows.
diff --git a/northd/mac-binding-aging.c b/northd/mac-binding-aging.c
index 36d0a6fd7..4a2dfbbf8 100644
--- a/northd/mac-binding-aging.c
+++ b/northd/mac-binding-aging.c
@@ -90,8 +90,10 @@ void
 en_mac_binding_aging_run(struct engine_node *node, void *data OVS_UNUSED)
 {
 const struct engine_context *eng_ctx = engine_get_context();
+struct northd_data *northd_data = engine_get_input_data("northd", node);
 
-if (!eng_ctx->ovnsb_idl_txn) {
+if (!eng_ctx->ovnsb_idl_txn ||
+!northd_data->features.mac_binding_timestamp) {
 return;
 }
 
@@ -99,7 +101,6 @@ en_mac_binding_aging_run(struct engine_node *node, void 
*data OVS_UNUSED)
 int64_t now = time_wall_msec();
 uint32_t removal_limit = get_removal_limit(node);
 uint32_t removed_n = 0;
-struct northd_data *northd_data = engine_get_input_data("northd", node);
 struct mac_binding_waker *waker =
 engine_get_input_data("mac_binding_aging_waker", node);
 struct ovsdb_idl_index *sbrec_mac_binding_by_datapath =
diff --git a/northd/northd.c b/northd/northd.c
index bb92bbc6f..e7b53410b 100644
--- a/northd/northd.c
+++ b/northd/northd.c
@@ -407,14 +407,23 @@ build_chassis_features(const struct northd_input 
*input_data,
 const struct sbrec_chassis *chassis;
 
 SBREC_CHASSIS_TABLE_FOR_EACH (chassis, input_data->sbrec_chassis) {
-if (!smap_get_bool(>other_config,
-   OVN_FEATURE_CT_NO_MASKED_LABEL,
-   false)) {
+bool ct_no_masked_label =
+smap_get_bool(>other_config,
+  OVN_FEATURE_CT_NO_MASKED_LABEL,
+  false);
+if (!ct_no_masked_label && chassis_features->ct_no_masked_label) {
 chassis_features->ct_no_masked_label = false;
-return;
+}
+
+bool mac_binding_timestamp =
+smap_get_bool(>other_config,
+  OVN_FEATURE_MAC_BINDING_TIMESTAMP,
+  false);
+if (!mac_binding_timestamp &&
+chassis_features->mac_binding_timestamp) {
+chassis_features->mac_binding_timestamp = false;
 }
 }
-chassis_features->ct_no_masked_label = true;
 }
 
 struct ovn_chassis_qdisc_queues {
@@ -15324,7 +15333,10 @@ northd_init(struct northd_data *data)
 hmap_init(>lb_groups);
 hmap_init(>bfd_connections);
 ovs_list_init(>lr_list);
-memset(>features, 0, sizeof data->features);
+data->features = (struct chassis_features) {
+.ct_no_masked_label = true,
+.mac_binding_timestamp = true,
+};
 data->ovn_internal_version_changed = false;
 }
 
diff --git a/northd/northd.h b/northd/northd.h
index 68c55bff4..4319c49fa 100644
--- a/northd/northd.h
+++ b/northd/northd.h
@@ -66,6 +66,7 @@ struct northd_input {
 
 struct chassis_features {
 bool ct_no_masked_label;
+bool mac_binding_timestamp;
 };
 
 struct northd_data {
-- 
2.37.3


[ovs-dev] [PATCH ovn branch-22.06 v2 4/8] northd: Add MAC binding aging mechanism

2022-11-04 Thread Ales Musil
Add MAC binding aging mechanism, that utilizes
the timestamp column of MAC_Binding table.
When the MAC binding exceeds the threshold it is
removed from SB DB, this is postponed only in case
we receive update ARP with update to MAC address.

The threshold is configurable via option
"mac_binding_age_threshold" that can be specified
for each logical router. The option is defaulting to
0 which means that by default the aging is disabled
and the MAC binding rows will be persisted the same
way as before.

Reported-at: https://bugzilla.redhat.com/2084668
Acked-by: Dumitru Ceara 
Signed-off-by: Ales Musil 
---
 NEWS   |   3 +
 northd/automake.mk |   2 +
 northd/inc-proc-northd.c   |  15 
 northd/mac-binding-aging.c | 161 +
 northd/mac-binding-aging.h |  33 
 ovn-nb.xml |   7 ++
 tests/ovn.at   | 113 ++
 7 files changed, 334 insertions(+)
 create mode 100644 northd/mac-binding-aging.c
 create mode 100644 northd/mac-binding-aging.h

diff --git a/NEWS b/NEWS
index 1c8a3d165..236d0007f 100644
--- a/NEWS
+++ b/NEWS
@@ -5,6 +5,9 @@ OVN v22.06.1 - xx xxx 
 NAT-T UDP encapsulation. Requires OVS support for IPsec custom tunnel
 options (which is available in OVS 3.0).
   - Bump python version required for building OVN to 3.6.
+  - Added MAC binding aging mechanism, that is disabled by default.
+It can be enabled per logical router with option
+"mac_binding_age_threshold".
 
 OVN v22.06.0 - 03 Jun 2022
 --
diff --git a/northd/automake.mk b/northd/automake.mk
index 4862ec7b7..81582867d 100644
--- a/northd/automake.mk
+++ b/northd/automake.mk
@@ -1,6 +1,8 @@
 # ovn-northd
 bin_PROGRAMS += northd/ovn-northd
 northd_ovn_northd_SOURCES = \
+   northd/mac-binding-aging.c \
+   northd/mac-binding-aging.h \
northd/northd.c \
northd/northd.h \
northd/ovn-northd.c \
diff --git a/northd/inc-proc-northd.c b/northd/inc-proc-northd.c
index 43093cb5a..fc0d9e670 100644
--- a/northd/inc-proc-northd.c
+++ b/northd/inc-proc-northd.c
@@ -22,9 +22,11 @@
 #include "ip-mcast-index.h"
 #include "static-mac-binding-index.h"
 #include "lib/inc-proc-eng.h"
+#include "lib/mac-binding-index.h"
 #include "lib/ovn-nb-idl.h"
 #include "lib/ovn-sb-idl.h"
 #include "mcast-group-index.h"
+#include "northd/mac-binding-aging.h"
 #include "openvswitch/poll-loop.h"
 #include "openvswitch/vlog.h"
 #include "inc-proc-northd.h"
@@ -149,6 +151,8 @@ enum sb_engine_node {
  * avoid sparse errors. */
 static ENGINE_NODE(northd, "northd");
 static ENGINE_NODE(lflow, "lflow");
+static ENGINE_NODE(mac_binding_aging, "mac_binding_aging");
+static ENGINE_NODE(mac_binding_aging_waker, "mac_binding_aging_waker");
 
 void inc_proc_northd_init(struct ovsdb_idl_loop *nb,
   struct ovsdb_idl_loop *sb)
@@ -211,12 +215,18 @@ void inc_proc_northd_init(struct ovsdb_idl_loop *nb,
 engine_add_input(_northd, _sb_load_balancer, NULL);
 engine_add_input(_northd, _sb_fdb, NULL);
 engine_add_input(_northd, _sb_static_mac_binding, NULL);
+engine_add_input(_mac_binding_aging, _sb_mac_binding, NULL);
+engine_add_input(_mac_binding_aging, _northd, NULL);
+engine_add_input(_mac_binding_aging, _mac_binding_aging_waker, NULL);
 engine_add_input(_lflow, _nb_bfd, NULL);
 engine_add_input(_lflow, _sb_bfd, NULL);
 engine_add_input(_lflow, _sb_logical_flow, NULL);
 engine_add_input(_lflow, _sb_multicast_group, NULL);
 engine_add_input(_lflow, _sb_igmp_group, NULL);
 engine_add_input(_lflow, _northd, NULL);
+/* XXX: The "en_mac_binding_aging" should be separate "root" node
+ * once I-P engine allows multiple root nodes. */
+engine_add_input(_lflow, _mac_binding_aging, NULL);
 
 struct engine_arg engine_arg = {
 .nb_idl = nb->idl,
@@ -235,6 +245,8 @@ void inc_proc_northd_init(struct ovsdb_idl_loop *nb,
 chassis_hostname_index_create(sb->idl);
 struct ovsdb_idl_index *sbrec_static_mac_binding_by_lport_ip
 = static_mac_binding_index_create(sb->idl);
+struct ovsdb_idl_index *sbrec_mac_binding_by_datapath
+= mac_binding_by_datapath_index_create(sb->idl);
 
 engine_init(_lflow, _arg);
 
@@ -256,6 +268,9 @@ void inc_proc_northd_init(struct ovsdb_idl_loop *nb,
 engine_ovsdb_node_add_index(_sb_static_mac_binding,
 "sbrec_static_mac_binding_by_lport_ip",
 sbrec_static_mac_binding_by_lport_ip);
+engine_ovsdb_node_add_index(_sb_mac_binding,
+"sbrec_mac_binding_by_datapath",
+sbrec_mac_binding_by_datapath);
 }
 
 void inc_proc_northd_run(struct ovsdb_idl_txn *ovnnb_txn,
diff --git a/northd/mac-binding-aging.c b/northd/mac-binding-aging.c
new file mode 100644
index 0..3859c050b
--- /dev/null
+++ b/northd/mac-binding-aging.c
@@ -0,0 +1,161 @@
+/* 

[ovs-dev] [PATCH ovn branch-22.06 v2 5/8] northd: Add config to limit bulk removal of MAC binding

2022-11-04 Thread Ales Musil
Add configuration option into NB global table
called "mac_binding_removal_limit" defaulting to 0.
This option allows to limit number of MAC bindings
that can be removed by the aging mechanism in a single
transaction. The 0 means that the mechanism is disabled.
If the limit is reached next removal will be delayed by
10 ms. This option when being set has a downside that
in theory we could never finish the removal, however in
practice it is unlikely considering that not all routers
will have aging enabled and the enabled will be with
reasonable threshold.

Reported-at: https://bugzilla.redhat.com/2084668
Acked-by: Mark Michelson 
Acked-by: Dumitru Ceara 
Signed-off-by: Ales Musil 
---
 northd/inc-proc-northd.c   |  1 +
 northd/mac-binding-aging.c | 33 +++--
 ovn-nb.xml |  8 
 3 files changed, 40 insertions(+), 2 deletions(-)

diff --git a/northd/inc-proc-northd.c b/northd/inc-proc-northd.c
index fc0d9e670..54e0ad3b0 100644
--- a/northd/inc-proc-northd.c
+++ b/northd/inc-proc-northd.c
@@ -215,6 +215,7 @@ void inc_proc_northd_init(struct ovsdb_idl_loop *nb,
 engine_add_input(_northd, _sb_load_balancer, NULL);
 engine_add_input(_northd, _sb_fdb, NULL);
 engine_add_input(_northd, _sb_static_mac_binding, NULL);
+engine_add_input(_mac_binding_aging, _nb_nb_global, NULL);
 engine_add_input(_mac_binding_aging, _sb_mac_binding, NULL);
 engine_add_input(_mac_binding_aging, _northd, NULL);
 engine_add_input(_mac_binding_aging, _mac_binding_aging_waker, NULL);
diff --git a/northd/mac-binding-aging.c b/northd/mac-binding-aging.c
index 3859c050b..36d0a6fd7 100644
--- a/northd/mac-binding-aging.c
+++ b/northd/mac-binding-aging.c
@@ -28,6 +28,8 @@
 
 VLOG_DEFINE_THIS_MODULE(mac_binding_aging);
 
+#define MAC_BINDING_BULK_REMOVAL_DELAY_MSEC 10
+
 struct mac_binding_waker {
 bool should_schedule;
 long long next_wake_msec;
@@ -37,7 +39,8 @@ static void
 mac_binding_aging_run_for_datapath(const struct sbrec_datapath_binding *dp,
const struct nbrec_logical_router *nbr,
struct ovsdb_idl_index *mb_by_datapath,
-   int64_t now, int64_t *wake_delay)
+   int64_t now, int64_t *wake_delay,
+   uint32_t removal_limit, uint32_t *removed_n)
 {
 uint64_t threshold = smap_get_uint(>options,
"mac_binding_age_threshold",
@@ -58,6 +61,10 @@ mac_binding_aging_run_for_datapath(const struct 
sbrec_datapath_binding *dp,
 continue;
 } else if (elapsed >= threshold) {
 sbrec_mac_binding_delete(mb);
+(*removed_n)++;
+if (removal_limit && *removed_n == removal_limit) {
+break;
+}
 } else {
 *wake_delay = MIN(*wake_delay, threshold - elapsed);
 }
@@ -65,6 +72,20 @@ mac_binding_aging_run_for_datapath(const struct 
sbrec_datapath_binding *dp,
 sbrec_mac_binding_index_destroy_row(mb_index_row);
 }
 
+static uint32_t
+get_removal_limit(struct engine_node *node)
+{
+const struct nbrec_nb_global_table *nb_global_table =
+EN_OVSDB_GET(engine_get_input("NB_nb_global", node));
+const struct nbrec_nb_global *nb =
+nbrec_nb_global_table_first(nb_global_table);
+if (!nb) {
+   return 0;
+}
+
+return smap_get_uint(>options, "mac_binding_removal_limit", 0);
+}
+
 void
 en_mac_binding_aging_run(struct engine_node *node, void *data OVS_UNUSED)
 {
@@ -76,6 +97,8 @@ en_mac_binding_aging_run(struct engine_node *node, void *data 
OVS_UNUSED)
 
 int64_t next_expire_msec = INT64_MAX;
 int64_t now = time_wall_msec();
+uint32_t removal_limit = get_removal_limit(node);
+uint32_t removed_n = 0;
 struct northd_data *northd_data = engine_get_input_data("northd", node);
 struct mac_binding_waker *waker =
 engine_get_input_data("mac_binding_aging_waker", node);
@@ -88,7 +111,13 @@ en_mac_binding_aging_run(struct engine_node *node, void 
*data OVS_UNUSED)
 if (od->sb && od->nbr) {
 mac_binding_aging_run_for_datapath(od->sb, od->nbr,
sbrec_mac_binding_by_datapath,
-   now, _expire_msec);
+   now, _expire_msec,
+   removal_limit, _n);
+if (removal_limit && removed_n == removal_limit) {
+/* Schedule the next run after specified delay. */
+next_expire_msec = MAC_BINDING_BULK_REMOVAL_DELAY_MSEC;
+break;
+}
 }
 }
 
diff --git a/ovn-nb.xml b/ovn-nb.xml
index d137d68cf..2747b0a7d 100644
--- a/ovn-nb.xml
+++ b/ovn-nb.xml
@@ -162,6 +162,14 @@
 dynamically assigned, e.g. 00:11:22
   
 
+  
+MAC binding aging 

[ovs-dev] [PATCH ovn branch-22.06 v2 1/8] northd, controller: Add timestamp column to MAC_Binding table

2022-11-04 Thread Ales Musil
The new timestamp column in MAC_Binding is
populated with current time whenever the row is
created or the MAC address is updated.
This can be utilized by MAC binding aging mechanism,
when we can check if enough time has passed since the
creation/update.

Reported-at: https://bugzilla.redhat.com/2084668
Acked-By: Ihar Hrachyshka 
Acked-by: Dumitru Ceara 
Acked-by: Mark Michelson 
Acked-by: Han Zhou 
Signed-off-by: Ales Musil 
---
v2: Fix the SBDB schema version
---
 controller/pinctrl.c | 2 ++
 northd/ovn-northd.c  | 2 +-
 ovn-sb.ovsschema | 5 +++--
 ovn-sb.xml   | 6 ++
 4 files changed, 12 insertions(+), 3 deletions(-)

diff --git a/controller/pinctrl.c b/controller/pinctrl.c
index 9fe7859d5..ad79cda6e 100644
--- a/controller/pinctrl.c
+++ b/controller/pinctrl.c
@@ -4212,8 +4212,10 @@ mac_binding_add_to_sb(struct ovsdb_idl_txn 
*ovnsb_idl_txn,
 sbrec_mac_binding_set_ip(b, ip);
 sbrec_mac_binding_set_mac(b, mac_string);
 sbrec_mac_binding_set_datapath(b, dp);
+sbrec_mac_binding_set_timestamp(b, time_wall_msec());
 } else if (strcmp(b->mac, mac_string)) {
 sbrec_mac_binding_set_mac(b, mac_string);
+sbrec_mac_binding_set_timestamp(b, time_wall_msec());
 }
 }
 
diff --git a/northd/ovn-northd.c b/northd/ovn-northd.c
index ab28756af..bd35802ed 100644
--- a/northd/ovn-northd.c
+++ b/northd/ovn-northd.c
@@ -115,7 +115,7 @@ static const char *rbac_port_binding_update[] =
 static const char *rbac_mac_binding_auth[] =
 {""};
 static const char *rbac_mac_binding_update[] =
-{"logical_port", "ip", "mac", "datapath"};
+{"logical_port", "ip", "mac", "datapath", "timestamp"};
 
 static const char *rbac_svc_monitor_auth[] =
 {""};
diff --git a/ovn-sb.ovsschema b/ovn-sb.ovsschema
index 3b78ea6f6..de1a3c113 100644
--- a/ovn-sb.ovsschema
+++ b/ovn-sb.ovsschema
@@ -1,7 +1,7 @@
 {
 "name": "OVN_Southbound",
-"version": "20.23.0",
-"cksum": "4045988377 28575",
+"version": "20.24.0",
+"cksum": "1811467130 28634",
 "tables": {
 "SB_Global": {
 "columns": {
@@ -260,6 +260,7 @@
 "logical_port": {"type": "string"},
 "ip": {"type": "string"},
 "mac": {"type": "string"},
+"timestamp": {"type": {"key": "integer"}},
 "datapath": {"type": {"key": {"type": "uuid",
   "refTable": 
"Datapath_Binding",
 "indexes": [["logical_port", "ip"]],
diff --git a/ovn-sb.xml b/ovn-sb.xml
index 42b3d4d68..1a555175c 100644
--- a/ovn-sb.xml
+++ b/ovn-sb.xml
@@ -3672,6 +3672,12 @@ tcp.flags = RST;
 
   The Ethernet address to which the IP is bound.
 
+
+
+  The timestamp in msec when the MAC binding was added or updated.
+  Records that existed before this column will have 0.
+
+
 
   The logical datapath to which the logical port belongs.
 
-- 
2.37.3

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH ovn branch-22.06 v2 3/8] northd: Move struct ovn_datapath and related structs to northd.h

2022-11-04 Thread Ales Musil
The struct ovn_datapath could not be used outside the northd.c
move it to northd.h that it can be used by other .c files later on.

Reported-at: https://bugzilla.redhat.com/2084668
Acked-by: Dumitru Ceara 
Acked-by: Mark Michelson 
Acked-by: Han Zhou 
Signed-off-by: Ales Musil 
---
 northd/northd.c | 147 --
 northd/northd.h | 151 
 2 files changed, 151 insertions(+), 147 deletions(-)

diff --git a/northd/northd.c b/northd/northd.c
index 62f511613..bb92bbc6f 100644
--- a/northd/northd.c
+++ b/northd/northd.c
@@ -514,74 +514,6 @@ port_has_qos_params(const struct smap *opts)
 }
 
 
-/*
- * Multicast snooping and querier per datapath configuration.
- */
-struct mcast_switch_info {
-
-bool enabled;   /* True if snooping enabled. */
-bool querier;   /* True if querier enabled. */
-bool flood_unregistered;/* True if unregistered multicast should be
- * flooded.
- */
-bool flood_relay;   /* True if the switch is connected to a
- * multicast router and unregistered multicast
- * should be flooded to the mrouter. Only
- * applicable if flood_unregistered == false.
- */
-bool flood_reports; /* True if the switch has at least one port
- * configured to flood reports.
- */
-bool flood_static;  /* True if the switch has at least one port
- * configured to flood traffic.
- */
-int64_t table_size; /* Max number of IP multicast groups. */
-int64_t idle_timeout;   /* Timeout after which an idle group is
- * flushed.
- */
-int64_t query_interval; /* Interval between multicast queries. */
-char *eth_src;  /* ETH src address of the queries. */
-char *ipv4_src; /* IPv4 src address of the queries. */
-char *ipv6_src; /* IPv6 src address of the queries. */
-
-int64_t query_max_response; /* Expected time after which reports should
- * be received for queries that were sent out.
- */
-
-atomic_uint64_t active_v4_flows;   /* Current number of active IPv4 
multicast
- * flows.
- */
-atomic_uint64_t active_v6_flows;   /* Current number of active IPv6 
multicast
- * flows.
- */
-};
-
-struct mcast_router_info {
-bool relay;/* True if the router should relay IP multicast. */
-bool flood_static; /* True if the router has at least one port configured
-* to flood traffic.
-*/
-};
-
-struct mcast_info {
-
-struct hmap group_tnlids;  /* Group tunnel IDs in use on this DP. */
-uint32_t group_tnlid_hint; /* Hint for allocating next group tunnel ID. */
-struct ovs_list groups;/* List of groups learnt on this DP. */
-
-union {
-struct mcast_switch_info sw;  /* Switch specific multicast info. */
-struct mcast_router_info rtr; /* Router specific multicast info. */
-};
-};
-
-struct mcast_port_info {
-bool flood; /* True if the port should flood IP multicast traffic
- * regardless if it's registered or not. */
-bool flood_reports; /* True if the port should flood IP multicast reports
- * (e.g., IGMP join/leave). */
-};
-
 static void
 init_mcast_port_info(struct mcast_port_info *mcast_info,
  const struct nbrec_logical_switch_port *nbsp,
@@ -611,85 +543,6 @@ ovn_mcast_group_allocate_key(struct mcast_info *mcast_info)
   _info->group_tnlid_hint);
 }
 
-/* The 'key' comes from nbs->header_.uuid or nbr->header_.uuid or
- * sb->external_ids:logical-switch. */
-struct ovn_datapath {
-struct hmap_node key_node;  /* Index on 'key'. */
-struct uuid key;/* (nbs/nbr)->header_.uuid. */
-
-const struct nbrec_logical_switch *nbs;  /* May be NULL. */
-const struct nbrec_logical_router *nbr;  /* May be NULL. */
-const struct sbrec_datapath_binding *sb; /* May be NULL. */
-
-struct ovs_list list;   /* In list of similar records. */
-
-uint32_t tunnel_key;
-
-/* Logical switch data. */
-struct ovn_port **router_ports;
-size_t n_router_ports;
-size_t n_allocated_router_ports;
-
-struct hmap port_tnlids;
-uint32_t port_key_hint;
-
-bool has_stateful_acl;
-bool has_lb_vip;
-bool has_unknown;
-bool has_acls;
-
-/* IPAM data. */
-struct ipam_info 

[ovs-dev] [PATCH ovn branch-22.06 v2 2/8] controller: Add mac-binding-index.c/.h files

2022-11-04 Thread Ales Musil
Add helper source file for creating index
over MAC binding table.

Reported-at: https://bugzilla.redhat.com/2084668
Acked-by: Dumitru Ceara 
Acked-by: Mark Michelson 
Acked-by: Han Zhou 
Signed-off-by: Ales Musil 
---
 controller/ovn-controller.c |  8 +++-
 lib/automake.mk |  2 ++
 lib/mac-binding-index.c | 33 +
 lib/mac-binding-index.h | 26 ++
 4 files changed, 64 insertions(+), 5 deletions(-)
 create mode 100644 lib/mac-binding-index.c
 create mode 100644 lib/mac-binding-index.h

diff --git a/controller/ovn-controller.c b/controller/ovn-controller.c
index 000d57c41..57061b830 100644
--- a/controller/ovn-controller.c
+++ b/controller/ovn-controller.c
@@ -53,6 +53,7 @@
 #include "lib/chassis-index.h"
 #include "lib/extend-table.h"
 #include "lib/ip-mcast-index.h"
+#include "lib/mac-binding-index.h"
 #include "lib/mcast-group-index.h"
 #include "lib/ovn-sb-idl.h"
 #include "lib/ovn-util.h"
@@ -3496,9 +3497,7 @@ main(int argc, char *argv[])
 = ovsdb_idl_index_create1(ovnsb_idl_loop.idl,
   _datapath_binding_col_tunnel_key);
 struct ovsdb_idl_index *sbrec_mac_binding_by_lport_ip
-= ovsdb_idl_index_create2(ovnsb_idl_loop.idl,
-  _mac_binding_col_logical_port,
-  _mac_binding_col_ip);
+= mac_binding_by_lport_ip_index_create(ovnsb_idl_loop.idl);
 struct ovsdb_idl_index *sbrec_ip_multicast
 = ip_mcast_index_create(ovnsb_idl_loop.idl);
 struct ovsdb_idl_index *sbrec_igmp_group
@@ -3511,8 +3510,7 @@ main(int argc, char *argv[])
   _fdb_col_mac,
   _fdb_col_dp_key);
 struct ovsdb_idl_index *sbrec_mac_binding_by_datapath
-= ovsdb_idl_index_create1(ovnsb_idl_loop.idl,
-  _mac_binding_col_datapath);
+= mac_binding_by_datapath_index_create(ovnsb_idl_loop.idl);
 struct ovsdb_idl_index *sbrec_static_mac_binding_by_datapath
 = ovsdb_idl_index_create1(ovnsb_idl_loop.idl,
   _static_mac_binding_col_datapath);
diff --git a/lib/automake.mk b/lib/automake.mk
index 3a2da1fe4..60bead6a6 100644
--- a/lib/automake.mk
+++ b/lib/automake.mk
@@ -26,6 +26,8 @@ lib_libovn_la_SOURCES = \
lib/ovn-parallel-hmap.c \
lib/ip-mcast-index.c \
lib/ip-mcast-index.h \
+   lib/mac-binding-index.c \
+   lib/mac-binding-index.h \
lib/mcast-group-index.c \
lib/mcast-group-index.h \
lib/lex.c \
diff --git a/lib/mac-binding-index.c b/lib/mac-binding-index.c
new file mode 100644
index 0..d774f12d0
--- /dev/null
+++ b/lib/mac-binding-index.c
@@ -0,0 +1,33 @@
+/* Copyright (c) 2022, Red Hat, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include 
+
+#include "lib/mac-binding-index.h"
+#include "lib/ovn-sb-idl.h"
+
+struct ovsdb_idl_index *
+mac_binding_by_datapath_index_create(struct ovsdb_idl *idl)
+{
+return ovsdb_idl_index_create1(idl, _mac_binding_col_datapath);
+}
+
+struct ovsdb_idl_index *
+mac_binding_by_lport_ip_index_create(struct ovsdb_idl *idl)
+{
+return ovsdb_idl_index_create2(idl,
+   _mac_binding_col_logical_port,
+   _mac_binding_col_ip);
+}
diff --git a/lib/mac-binding-index.h b/lib/mac-binding-index.h
new file mode 100644
index 0..8e977ecea
--- /dev/null
+++ b/lib/mac-binding-index.h
@@ -0,0 +1,26 @@
+/* Copyright (c) 2022, Red Hat, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef OVN_MAC_BINDING_INDEX_H
+#define OVN_MAC_BINDING_INDEX_H 1
+
+#include "lib/ovn-sb-idl.h"
+
+struct ovsdb_idl_index *mac_binding_by_datapath_index_create(
+struct ovsdb_idl *idl);
+struct ovsdb_idl_index *mac_binding_by_lport_ip_index_create(
+struct ovsdb_idl *idl);
+
+#endif 

Re: [ovs-dev] [PATCH ovn v2 1/8] northd, controller: Add timestamp column to MAC_Binding table

2022-11-04 Thread Ales Musil
Oh no it removed the branch-22.06. Sorry for the noise.

On Fri, Nov 4, 2022 at 8:57 AM Ales Musil  wrote:

> The new timestamp column in MAC_Binding is
> populated with current time whenever the row is
> created or the MAC address is updated.
> This can be utilized by MAC binding aging mechanism,
> when we can check if enough time has passed since the
> creation/update.
>
> Reported-at: https://bugzilla.redhat.com/2084668
> Acked-By: Ihar Hrachyshka 
> Acked-by: Dumitru Ceara 
> Acked-by: Mark Michelson 
> Acked-by: Han Zhou 
> Signed-off-by: Ales Musil 
> ---
> v2: Fix the SBDB schema version
> ---
>  controller/pinctrl.c | 2 ++
>  northd/ovn-northd.c  | 2 +-
>  ovn-sb.ovsschema | 5 +++--
>  ovn-sb.xml   | 6 ++
>  4 files changed, 12 insertions(+), 3 deletions(-)
>
> diff --git a/controller/pinctrl.c b/controller/pinctrl.c
> index 9fe7859d5..ad79cda6e 100644
> --- a/controller/pinctrl.c
> +++ b/controller/pinctrl.c
> @@ -4212,8 +4212,10 @@ mac_binding_add_to_sb(struct ovsdb_idl_txn
> *ovnsb_idl_txn,
>  sbrec_mac_binding_set_ip(b, ip);
>  sbrec_mac_binding_set_mac(b, mac_string);
>  sbrec_mac_binding_set_datapath(b, dp);
> +sbrec_mac_binding_set_timestamp(b, time_wall_msec());
>  } else if (strcmp(b->mac, mac_string)) {
>  sbrec_mac_binding_set_mac(b, mac_string);
> +sbrec_mac_binding_set_timestamp(b, time_wall_msec());
>  }
>  }
>
> diff --git a/northd/ovn-northd.c b/northd/ovn-northd.c
> index ab28756af..bd35802ed 100644
> --- a/northd/ovn-northd.c
> +++ b/northd/ovn-northd.c
> @@ -115,7 +115,7 @@ static const char *rbac_port_binding_update[] =
>  static const char *rbac_mac_binding_auth[] =
>  {""};
>  static const char *rbac_mac_binding_update[] =
> -{"logical_port", "ip", "mac", "datapath"};
> +{"logical_port", "ip", "mac", "datapath", "timestamp"};
>
>  static const char *rbac_svc_monitor_auth[] =
>  {""};
> diff --git a/ovn-sb.ovsschema b/ovn-sb.ovsschema
> index 3b78ea6f6..de1a3c113 100644
> --- a/ovn-sb.ovsschema
> +++ b/ovn-sb.ovsschema
> @@ -1,7 +1,7 @@
>  {
>  "name": "OVN_Southbound",
> -"version": "20.23.0",
> -"cksum": "4045988377 28575",
> +"version": "20.24.0",
> +"cksum": "1811467130 28634",
>  "tables": {
>  "SB_Global": {
>  "columns": {
> @@ -260,6 +260,7 @@
>  "logical_port": {"type": "string"},
>  "ip": {"type": "string"},
>  "mac": {"type": "string"},
> +"timestamp": {"type": {"key": "integer"}},
>  "datapath": {"type": {"key": {"type": "uuid",
>"refTable":
> "Datapath_Binding",
>  "indexes": [["logical_port", "ip"]],
> diff --git a/ovn-sb.xml b/ovn-sb.xml
> index 42b3d4d68..1a555175c 100644
> --- a/ovn-sb.xml
> +++ b/ovn-sb.xml
> @@ -3672,6 +3672,12 @@ tcp.flags = RST;
>  
>The Ethernet address to which the IP is bound.
>  
> +
> +
> +  The timestamp in msec when the MAC binding was added or updated.
> +  Records that existed before this column will have 0.
> +
> +
>  
>The logical datapath to which the logical port belongs.
>  
> --
> 2.37.3
>
>

-- 

Ales Musil

Senior Software Engineer - OVN Core

Red Hat EMEA 

amu...@redhat.comIM: amusil

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH ovn v2 7/8] northd: Increase the MAC binding removal delay

2022-11-04 Thread Ales Musil
Reported-at: https://bugzilla.redhat.com/2084668
Signed-off-by: Ales Musil 
Acked-by: Dumitru Ceara 
Signed-off-by: Mark Michelson 
---
 northd/mac-binding-aging.c | 2 +-
 ovn-nb.xml | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/northd/mac-binding-aging.c b/northd/mac-binding-aging.c
index 4a2dfbbf8..0196a116b 100644
--- a/northd/mac-binding-aging.c
+++ b/northd/mac-binding-aging.c
@@ -28,7 +28,7 @@
 
 VLOG_DEFINE_THIS_MODULE(mac_binding_aging);
 
-#define MAC_BINDING_BULK_REMOVAL_DELAY_MSEC 10
+#define MAC_BINDING_BULK_REMOVAL_DELAY_MSEC 5000
 
 struct mac_binding_waker {
 bool should_schedule;
diff --git a/ovn-nb.xml b/ovn-nb.xml
index 2747b0a7d..8aa125093 100644
--- a/ovn-nb.xml
+++ b/ovn-nb.xml
@@ -167,7 +167,7 @@
 MAC binding aging bulk removal limit. This limits how many rows
 can expire in a single transaction. Default value is 0 which
 is unlimited. When we hit the limit next batch removal is delayed by
-10 ms.
+5 s.
   
 
   
-- 
2.37.3

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH ovn v2 8/8] northd: Properly check the wakeup time in MAC banding aging

2022-11-04 Thread Ales Musil
We shouldn't run the MAC binding aging if the waker did not
expire. This wouldn't cause any issue if the bulk removal limit
is not configured. When the limit is configured and hit there
will be some MAC bindings expired, just waiting for the
bulk removal delay. If something wakes up the aging run earlier
it could remove those rows without respecting the delay.
Add check if we are past the next wake to prevent that.

Signed-off-by: Ales Musil 
Signed-off-by: Han Zhou 
---
 northd/mac-binding-aging.c | 7 ---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/northd/mac-binding-aging.c b/northd/mac-binding-aging.c
index 0196a116b..f65353a69 100644
--- a/northd/mac-binding-aging.c
+++ b/northd/mac-binding-aging.c
@@ -91,9 +91,12 @@ en_mac_binding_aging_run(struct engine_node *node, void 
*data OVS_UNUSED)
 {
 const struct engine_context *eng_ctx = engine_get_context();
 struct northd_data *northd_data = engine_get_input_data("northd", node);
+struct mac_binding_waker *waker =
+engine_get_input_data("mac_binding_aging_waker", node);
 
 if (!eng_ctx->ovnsb_idl_txn ||
-!northd_data->features.mac_binding_timestamp) {
+!northd_data->features.mac_binding_timestamp ||
+time_msec() < waker->next_wake_msec) {
 return;
 }
 
@@ -101,8 +104,6 @@ en_mac_binding_aging_run(struct engine_node *node, void 
*data OVS_UNUSED)
 int64_t now = time_wall_msec();
 uint32_t removal_limit = get_removal_limit(node);
 uint32_t removed_n = 0;
-struct mac_binding_waker *waker =
-engine_get_input_data("mac_binding_aging_waker", node);
 struct ovsdb_idl_index *sbrec_mac_binding_by_datapath =
 engine_ovsdb_node_get_index(engine_get_input("SB_mac_binding", node),
 "sbrec_mac_binding_by_datapath");
-- 
2.37.3

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH ovn v2 6/8] northd, controller: Add MAC binding timestamp feature indication

2022-11-04 Thread Ales Musil
It can happen that northd and SB DB are updated before ovn-controller
in that case the new MAC binding would be added with timestamp=0.
In combination with enabled MAC binding aging, the affected rows
would be deleted over and over until the controller is upgraded.

To prevent the before mentioned issue add indication if
the controller supports MAC binding timestamps.

Signed-off-by: Ales Musil 
Acked-by: Dumitru Ceara 
---
 controller/chassis.c   |  7 +++
 include/ovn/features.h |  1 +
 northd/mac-binding-aging.c |  5 +++--
 northd/northd.c| 24 ++--
 northd/northd.h|  1 +
 5 files changed, 30 insertions(+), 8 deletions(-)

diff --git a/controller/chassis.c b/controller/chassis.c
index 92850fcc1..c6a1c50ae 100644
--- a/controller/chassis.c
+++ b/controller/chassis.c
@@ -351,6 +351,7 @@ chassis_build_other_config(const struct ovs_chassis_cfg 
*ovs_cfg,
  ovs_cfg->is_interconn ? "true" : "false");
 smap_replace(config, OVN_FEATURE_PORT_UP_NOTIF, "true");
 smap_replace(config, OVN_FEATURE_CT_NO_MASKED_LABEL, "true");
+smap_replace(config, OVN_FEATURE_MAC_BINDING_TIMESTAMP, "true");
 }
 
 /*
@@ -462,6 +463,12 @@ chassis_other_config_changed(const struct ovs_chassis_cfg 
*ovs_cfg,
 return true;
 }
 
+if (!smap_get_bool(_rec->other_config,
+   OVN_FEATURE_MAC_BINDING_TIMESTAMP,
+   false)) {
+return true;
+}
+
 return false;
 }
 
diff --git a/include/ovn/features.h b/include/ovn/features.h
index 8fbdbf19a..679f67457 100644
--- a/include/ovn/features.h
+++ b/include/ovn/features.h
@@ -23,6 +23,7 @@
 /* ovn-controller supported feature names. */
 #define OVN_FEATURE_PORT_UP_NOTIF  "port-up-notif"
 #define OVN_FEATURE_CT_NO_MASKED_LABEL "ct-no-masked-label"
+#define OVN_FEATURE_MAC_BINDING_TIMESTAMP "mac-binding-timestamp"
 
 /* OVS datapath supported features.  Based on availability OVN might generate
  * different types of openflows.
diff --git a/northd/mac-binding-aging.c b/northd/mac-binding-aging.c
index 36d0a6fd7..4a2dfbbf8 100644
--- a/northd/mac-binding-aging.c
+++ b/northd/mac-binding-aging.c
@@ -90,8 +90,10 @@ void
 en_mac_binding_aging_run(struct engine_node *node, void *data OVS_UNUSED)
 {
 const struct engine_context *eng_ctx = engine_get_context();
+struct northd_data *northd_data = engine_get_input_data("northd", node);
 
-if (!eng_ctx->ovnsb_idl_txn) {
+if (!eng_ctx->ovnsb_idl_txn ||
+!northd_data->features.mac_binding_timestamp) {
 return;
 }
 
@@ -99,7 +101,6 @@ en_mac_binding_aging_run(struct engine_node *node, void 
*data OVS_UNUSED)
 int64_t now = time_wall_msec();
 uint32_t removal_limit = get_removal_limit(node);
 uint32_t removed_n = 0;
-struct northd_data *northd_data = engine_get_input_data("northd", node);
 struct mac_binding_waker *waker =
 engine_get_input_data("mac_binding_aging_waker", node);
 struct ovsdb_idl_index *sbrec_mac_binding_by_datapath =
diff --git a/northd/northd.c b/northd/northd.c
index bb92bbc6f..e7b53410b 100644
--- a/northd/northd.c
+++ b/northd/northd.c
@@ -407,14 +407,23 @@ build_chassis_features(const struct northd_input 
*input_data,
 const struct sbrec_chassis *chassis;
 
 SBREC_CHASSIS_TABLE_FOR_EACH (chassis, input_data->sbrec_chassis) {
-if (!smap_get_bool(>other_config,
-   OVN_FEATURE_CT_NO_MASKED_LABEL,
-   false)) {
+bool ct_no_masked_label =
+smap_get_bool(>other_config,
+  OVN_FEATURE_CT_NO_MASKED_LABEL,
+  false);
+if (!ct_no_masked_label && chassis_features->ct_no_masked_label) {
 chassis_features->ct_no_masked_label = false;
-return;
+}
+
+bool mac_binding_timestamp =
+smap_get_bool(>other_config,
+  OVN_FEATURE_MAC_BINDING_TIMESTAMP,
+  false);
+if (!mac_binding_timestamp &&
+chassis_features->mac_binding_timestamp) {
+chassis_features->mac_binding_timestamp = false;
 }
 }
-chassis_features->ct_no_masked_label = true;
 }
 
 struct ovn_chassis_qdisc_queues {
@@ -15324,7 +15333,10 @@ northd_init(struct northd_data *data)
 hmap_init(>lb_groups);
 hmap_init(>bfd_connections);
 ovs_list_init(>lr_list);
-memset(>features, 0, sizeof data->features);
+data->features = (struct chassis_features) {
+.ct_no_masked_label = true,
+.mac_binding_timestamp = true,
+};
 data->ovn_internal_version_changed = false;
 }
 
diff --git a/northd/northd.h b/northd/northd.h
index 68c55bff4..4319c49fa 100644
--- a/northd/northd.h
+++ b/northd/northd.h
@@ -66,6 +66,7 @@ struct northd_input {
 
 struct chassis_features {
 bool ct_no_masked_label;
+bool mac_binding_timestamp;
 };
 
 struct northd_data {
-- 
2.37.3


[ovs-dev] [PATCH ovn v2 3/8] northd: Move struct ovn_datapath and related structs to northd.h

2022-11-04 Thread Ales Musil
The struct ovn_datapath could not be used outside the northd.c
move it to northd.h that it can be used by other .c files later on.

Reported-at: https://bugzilla.redhat.com/2084668
Acked-by: Dumitru Ceara 
Acked-by: Mark Michelson 
Acked-by: Han Zhou 
Signed-off-by: Ales Musil 
---
 northd/northd.c | 147 --
 northd/northd.h | 151 
 2 files changed, 151 insertions(+), 147 deletions(-)

diff --git a/northd/northd.c b/northd/northd.c
index 62f511613..bb92bbc6f 100644
--- a/northd/northd.c
+++ b/northd/northd.c
@@ -514,74 +514,6 @@ port_has_qos_params(const struct smap *opts)
 }
 
 
-/*
- * Multicast snooping and querier per datapath configuration.
- */
-struct mcast_switch_info {
-
-bool enabled;   /* True if snooping enabled. */
-bool querier;   /* True if querier enabled. */
-bool flood_unregistered;/* True if unregistered multicast should be
- * flooded.
- */
-bool flood_relay;   /* True if the switch is connected to a
- * multicast router and unregistered multicast
- * should be flooded to the mrouter. Only
- * applicable if flood_unregistered == false.
- */
-bool flood_reports; /* True if the switch has at least one port
- * configured to flood reports.
- */
-bool flood_static;  /* True if the switch has at least one port
- * configured to flood traffic.
- */
-int64_t table_size; /* Max number of IP multicast groups. */
-int64_t idle_timeout;   /* Timeout after which an idle group is
- * flushed.
- */
-int64_t query_interval; /* Interval between multicast queries. */
-char *eth_src;  /* ETH src address of the queries. */
-char *ipv4_src; /* IPv4 src address of the queries. */
-char *ipv6_src; /* IPv6 src address of the queries. */
-
-int64_t query_max_response; /* Expected time after which reports should
- * be received for queries that were sent out.
- */
-
-atomic_uint64_t active_v4_flows;   /* Current number of active IPv4 
multicast
- * flows.
- */
-atomic_uint64_t active_v6_flows;   /* Current number of active IPv6 
multicast
- * flows.
- */
-};
-
-struct mcast_router_info {
-bool relay;/* True if the router should relay IP multicast. */
-bool flood_static; /* True if the router has at least one port configured
-* to flood traffic.
-*/
-};
-
-struct mcast_info {
-
-struct hmap group_tnlids;  /* Group tunnel IDs in use on this DP. */
-uint32_t group_tnlid_hint; /* Hint for allocating next group tunnel ID. */
-struct ovs_list groups;/* List of groups learnt on this DP. */
-
-union {
-struct mcast_switch_info sw;  /* Switch specific multicast info. */
-struct mcast_router_info rtr; /* Router specific multicast info. */
-};
-};
-
-struct mcast_port_info {
-bool flood; /* True if the port should flood IP multicast traffic
- * regardless if it's registered or not. */
-bool flood_reports; /* True if the port should flood IP multicast reports
- * (e.g., IGMP join/leave). */
-};
-
 static void
 init_mcast_port_info(struct mcast_port_info *mcast_info,
  const struct nbrec_logical_switch_port *nbsp,
@@ -611,85 +543,6 @@ ovn_mcast_group_allocate_key(struct mcast_info *mcast_info)
   _info->group_tnlid_hint);
 }
 
-/* The 'key' comes from nbs->header_.uuid or nbr->header_.uuid or
- * sb->external_ids:logical-switch. */
-struct ovn_datapath {
-struct hmap_node key_node;  /* Index on 'key'. */
-struct uuid key;/* (nbs/nbr)->header_.uuid. */
-
-const struct nbrec_logical_switch *nbs;  /* May be NULL. */
-const struct nbrec_logical_router *nbr;  /* May be NULL. */
-const struct sbrec_datapath_binding *sb; /* May be NULL. */
-
-struct ovs_list list;   /* In list of similar records. */
-
-uint32_t tunnel_key;
-
-/* Logical switch data. */
-struct ovn_port **router_ports;
-size_t n_router_ports;
-size_t n_allocated_router_ports;
-
-struct hmap port_tnlids;
-uint32_t port_key_hint;
-
-bool has_stateful_acl;
-bool has_lb_vip;
-bool has_unknown;
-bool has_acls;
-
-/* IPAM data. */
-struct ipam_info 

[ovs-dev] [PATCH ovn v2 4/8] northd: Add MAC binding aging mechanism

2022-11-04 Thread Ales Musil
Add MAC binding aging mechanism, that utilizes
the timestamp column of MAC_Binding table.
When the MAC binding exceeds the threshold it is
removed from SB DB, this is postponed only in case
we receive update ARP with update to MAC address.

The threshold is configurable via option
"mac_binding_age_threshold" that can be specified
for each logical router. The option is defaulting to
0 which means that by default the aging is disabled
and the MAC binding rows will be persisted the same
way as before.

Reported-at: https://bugzilla.redhat.com/2084668
Acked-by: Dumitru Ceara 
Signed-off-by: Ales Musil 
---
 NEWS   |   3 +
 northd/automake.mk |   2 +
 northd/inc-proc-northd.c   |  15 
 northd/mac-binding-aging.c | 161 +
 northd/mac-binding-aging.h |  33 
 ovn-nb.xml |   7 ++
 tests/ovn.at   | 113 ++
 7 files changed, 334 insertions(+)
 create mode 100644 northd/mac-binding-aging.c
 create mode 100644 northd/mac-binding-aging.h

diff --git a/NEWS b/NEWS
index 1c8a3d165..236d0007f 100644
--- a/NEWS
+++ b/NEWS
@@ -5,6 +5,9 @@ OVN v22.06.1 - xx xxx 
 NAT-T UDP encapsulation. Requires OVS support for IPsec custom tunnel
 options (which is available in OVS 3.0).
   - Bump python version required for building OVN to 3.6.
+  - Added MAC binding aging mechanism, that is disabled by default.
+It can be enabled per logical router with option
+"mac_binding_age_threshold".
 
 OVN v22.06.0 - 03 Jun 2022
 --
diff --git a/northd/automake.mk b/northd/automake.mk
index 4862ec7b7..81582867d 100644
--- a/northd/automake.mk
+++ b/northd/automake.mk
@@ -1,6 +1,8 @@
 # ovn-northd
 bin_PROGRAMS += northd/ovn-northd
 northd_ovn_northd_SOURCES = \
+   northd/mac-binding-aging.c \
+   northd/mac-binding-aging.h \
northd/northd.c \
northd/northd.h \
northd/ovn-northd.c \
diff --git a/northd/inc-proc-northd.c b/northd/inc-proc-northd.c
index 43093cb5a..fc0d9e670 100644
--- a/northd/inc-proc-northd.c
+++ b/northd/inc-proc-northd.c
@@ -22,9 +22,11 @@
 #include "ip-mcast-index.h"
 #include "static-mac-binding-index.h"
 #include "lib/inc-proc-eng.h"
+#include "lib/mac-binding-index.h"
 #include "lib/ovn-nb-idl.h"
 #include "lib/ovn-sb-idl.h"
 #include "mcast-group-index.h"
+#include "northd/mac-binding-aging.h"
 #include "openvswitch/poll-loop.h"
 #include "openvswitch/vlog.h"
 #include "inc-proc-northd.h"
@@ -149,6 +151,8 @@ enum sb_engine_node {
  * avoid sparse errors. */
 static ENGINE_NODE(northd, "northd");
 static ENGINE_NODE(lflow, "lflow");
+static ENGINE_NODE(mac_binding_aging, "mac_binding_aging");
+static ENGINE_NODE(mac_binding_aging_waker, "mac_binding_aging_waker");
 
 void inc_proc_northd_init(struct ovsdb_idl_loop *nb,
   struct ovsdb_idl_loop *sb)
@@ -211,12 +215,18 @@ void inc_proc_northd_init(struct ovsdb_idl_loop *nb,
 engine_add_input(_northd, _sb_load_balancer, NULL);
 engine_add_input(_northd, _sb_fdb, NULL);
 engine_add_input(_northd, _sb_static_mac_binding, NULL);
+engine_add_input(_mac_binding_aging, _sb_mac_binding, NULL);
+engine_add_input(_mac_binding_aging, _northd, NULL);
+engine_add_input(_mac_binding_aging, _mac_binding_aging_waker, NULL);
 engine_add_input(_lflow, _nb_bfd, NULL);
 engine_add_input(_lflow, _sb_bfd, NULL);
 engine_add_input(_lflow, _sb_logical_flow, NULL);
 engine_add_input(_lflow, _sb_multicast_group, NULL);
 engine_add_input(_lflow, _sb_igmp_group, NULL);
 engine_add_input(_lflow, _northd, NULL);
+/* XXX: The "en_mac_binding_aging" should be separate "root" node
+ * once I-P engine allows multiple root nodes. */
+engine_add_input(_lflow, _mac_binding_aging, NULL);
 
 struct engine_arg engine_arg = {
 .nb_idl = nb->idl,
@@ -235,6 +245,8 @@ void inc_proc_northd_init(struct ovsdb_idl_loop *nb,
 chassis_hostname_index_create(sb->idl);
 struct ovsdb_idl_index *sbrec_static_mac_binding_by_lport_ip
 = static_mac_binding_index_create(sb->idl);
+struct ovsdb_idl_index *sbrec_mac_binding_by_datapath
+= mac_binding_by_datapath_index_create(sb->idl);
 
 engine_init(_lflow, _arg);
 
@@ -256,6 +268,9 @@ void inc_proc_northd_init(struct ovsdb_idl_loop *nb,
 engine_ovsdb_node_add_index(_sb_static_mac_binding,
 "sbrec_static_mac_binding_by_lport_ip",
 sbrec_static_mac_binding_by_lport_ip);
+engine_ovsdb_node_add_index(_sb_mac_binding,
+"sbrec_mac_binding_by_datapath",
+sbrec_mac_binding_by_datapath);
 }
 
 void inc_proc_northd_run(struct ovsdb_idl_txn *ovnnb_txn,
diff --git a/northd/mac-binding-aging.c b/northd/mac-binding-aging.c
new file mode 100644
index 0..3859c050b
--- /dev/null
+++ b/northd/mac-binding-aging.c
@@ -0,0 +1,161 @@
+/* 

[ovs-dev] [PATCH ovn v2 5/8] northd: Add config to limit bulk removal of MAC binding

2022-11-04 Thread Ales Musil
Add configuration option into NB global table
called "mac_binding_removal_limit" defaulting to 0.
This option allows to limit number of MAC bindings
that can be removed by the aging mechanism in a single
transaction. The 0 means that the mechanism is disabled.
If the limit is reached next removal will be delayed by
10 ms. This option when being set has a downside that
in theory we could never finish the removal, however in
practice it is unlikely considering that not all routers
will have aging enabled and the enabled will be with
reasonable threshold.

Reported-at: https://bugzilla.redhat.com/2084668
Acked-by: Mark Michelson 
Acked-by: Dumitru Ceara 
Signed-off-by: Ales Musil 
---
 northd/inc-proc-northd.c   |  1 +
 northd/mac-binding-aging.c | 33 +++--
 ovn-nb.xml |  8 
 3 files changed, 40 insertions(+), 2 deletions(-)

diff --git a/northd/inc-proc-northd.c b/northd/inc-proc-northd.c
index fc0d9e670..54e0ad3b0 100644
--- a/northd/inc-proc-northd.c
+++ b/northd/inc-proc-northd.c
@@ -215,6 +215,7 @@ void inc_proc_northd_init(struct ovsdb_idl_loop *nb,
 engine_add_input(_northd, _sb_load_balancer, NULL);
 engine_add_input(_northd, _sb_fdb, NULL);
 engine_add_input(_northd, _sb_static_mac_binding, NULL);
+engine_add_input(_mac_binding_aging, _nb_nb_global, NULL);
 engine_add_input(_mac_binding_aging, _sb_mac_binding, NULL);
 engine_add_input(_mac_binding_aging, _northd, NULL);
 engine_add_input(_mac_binding_aging, _mac_binding_aging_waker, NULL);
diff --git a/northd/mac-binding-aging.c b/northd/mac-binding-aging.c
index 3859c050b..36d0a6fd7 100644
--- a/northd/mac-binding-aging.c
+++ b/northd/mac-binding-aging.c
@@ -28,6 +28,8 @@
 
 VLOG_DEFINE_THIS_MODULE(mac_binding_aging);
 
+#define MAC_BINDING_BULK_REMOVAL_DELAY_MSEC 10
+
 struct mac_binding_waker {
 bool should_schedule;
 long long next_wake_msec;
@@ -37,7 +39,8 @@ static void
 mac_binding_aging_run_for_datapath(const struct sbrec_datapath_binding *dp,
const struct nbrec_logical_router *nbr,
struct ovsdb_idl_index *mb_by_datapath,
-   int64_t now, int64_t *wake_delay)
+   int64_t now, int64_t *wake_delay,
+   uint32_t removal_limit, uint32_t *removed_n)
 {
 uint64_t threshold = smap_get_uint(>options,
"mac_binding_age_threshold",
@@ -58,6 +61,10 @@ mac_binding_aging_run_for_datapath(const struct 
sbrec_datapath_binding *dp,
 continue;
 } else if (elapsed >= threshold) {
 sbrec_mac_binding_delete(mb);
+(*removed_n)++;
+if (removal_limit && *removed_n == removal_limit) {
+break;
+}
 } else {
 *wake_delay = MIN(*wake_delay, threshold - elapsed);
 }
@@ -65,6 +72,20 @@ mac_binding_aging_run_for_datapath(const struct 
sbrec_datapath_binding *dp,
 sbrec_mac_binding_index_destroy_row(mb_index_row);
 }
 
+static uint32_t
+get_removal_limit(struct engine_node *node)
+{
+const struct nbrec_nb_global_table *nb_global_table =
+EN_OVSDB_GET(engine_get_input("NB_nb_global", node));
+const struct nbrec_nb_global *nb =
+nbrec_nb_global_table_first(nb_global_table);
+if (!nb) {
+   return 0;
+}
+
+return smap_get_uint(>options, "mac_binding_removal_limit", 0);
+}
+
 void
 en_mac_binding_aging_run(struct engine_node *node, void *data OVS_UNUSED)
 {
@@ -76,6 +97,8 @@ en_mac_binding_aging_run(struct engine_node *node, void *data 
OVS_UNUSED)
 
 int64_t next_expire_msec = INT64_MAX;
 int64_t now = time_wall_msec();
+uint32_t removal_limit = get_removal_limit(node);
+uint32_t removed_n = 0;
 struct northd_data *northd_data = engine_get_input_data("northd", node);
 struct mac_binding_waker *waker =
 engine_get_input_data("mac_binding_aging_waker", node);
@@ -88,7 +111,13 @@ en_mac_binding_aging_run(struct engine_node *node, void 
*data OVS_UNUSED)
 if (od->sb && od->nbr) {
 mac_binding_aging_run_for_datapath(od->sb, od->nbr,
sbrec_mac_binding_by_datapath,
-   now, _expire_msec);
+   now, _expire_msec,
+   removal_limit, _n);
+if (removal_limit && removed_n == removal_limit) {
+/* Schedule the next run after specified delay. */
+next_expire_msec = MAC_BINDING_BULK_REMOVAL_DELAY_MSEC;
+break;
+}
 }
 }
 
diff --git a/ovn-nb.xml b/ovn-nb.xml
index d137d68cf..2747b0a7d 100644
--- a/ovn-nb.xml
+++ b/ovn-nb.xml
@@ -162,6 +162,14 @@
 dynamically assigned, e.g. 00:11:22
   
 
+  
+MAC binding aging 

[ovs-dev] [PATCH ovn v2 1/8] northd, controller: Add timestamp column to MAC_Binding table

2022-11-04 Thread Ales Musil
The new timestamp column in MAC_Binding is
populated with current time whenever the row is
created or the MAC address is updated.
This can be utilized by MAC binding aging mechanism,
when we can check if enough time has passed since the
creation/update.

Reported-at: https://bugzilla.redhat.com/2084668
Acked-By: Ihar Hrachyshka 
Acked-by: Dumitru Ceara 
Acked-by: Mark Michelson 
Acked-by: Han Zhou 
Signed-off-by: Ales Musil 
---
v2: Fix the SBDB schema version
---
 controller/pinctrl.c | 2 ++
 northd/ovn-northd.c  | 2 +-
 ovn-sb.ovsschema | 5 +++--
 ovn-sb.xml   | 6 ++
 4 files changed, 12 insertions(+), 3 deletions(-)

diff --git a/controller/pinctrl.c b/controller/pinctrl.c
index 9fe7859d5..ad79cda6e 100644
--- a/controller/pinctrl.c
+++ b/controller/pinctrl.c
@@ -4212,8 +4212,10 @@ mac_binding_add_to_sb(struct ovsdb_idl_txn 
*ovnsb_idl_txn,
 sbrec_mac_binding_set_ip(b, ip);
 sbrec_mac_binding_set_mac(b, mac_string);
 sbrec_mac_binding_set_datapath(b, dp);
+sbrec_mac_binding_set_timestamp(b, time_wall_msec());
 } else if (strcmp(b->mac, mac_string)) {
 sbrec_mac_binding_set_mac(b, mac_string);
+sbrec_mac_binding_set_timestamp(b, time_wall_msec());
 }
 }
 
diff --git a/northd/ovn-northd.c b/northd/ovn-northd.c
index ab28756af..bd35802ed 100644
--- a/northd/ovn-northd.c
+++ b/northd/ovn-northd.c
@@ -115,7 +115,7 @@ static const char *rbac_port_binding_update[] =
 static const char *rbac_mac_binding_auth[] =
 {""};
 static const char *rbac_mac_binding_update[] =
-{"logical_port", "ip", "mac", "datapath"};
+{"logical_port", "ip", "mac", "datapath", "timestamp"};
 
 static const char *rbac_svc_monitor_auth[] =
 {""};
diff --git a/ovn-sb.ovsschema b/ovn-sb.ovsschema
index 3b78ea6f6..de1a3c113 100644
--- a/ovn-sb.ovsschema
+++ b/ovn-sb.ovsschema
@@ -1,7 +1,7 @@
 {
 "name": "OVN_Southbound",
-"version": "20.23.0",
-"cksum": "4045988377 28575",
+"version": "20.24.0",
+"cksum": "1811467130 28634",
 "tables": {
 "SB_Global": {
 "columns": {
@@ -260,6 +260,7 @@
 "logical_port": {"type": "string"},
 "ip": {"type": "string"},
 "mac": {"type": "string"},
+"timestamp": {"type": {"key": "integer"}},
 "datapath": {"type": {"key": {"type": "uuid",
   "refTable": 
"Datapath_Binding",
 "indexes": [["logical_port", "ip"]],
diff --git a/ovn-sb.xml b/ovn-sb.xml
index 42b3d4d68..1a555175c 100644
--- a/ovn-sb.xml
+++ b/ovn-sb.xml
@@ -3672,6 +3672,12 @@ tcp.flags = RST;
 
   The Ethernet address to which the IP is bound.
 
+
+
+  The timestamp in msec when the MAC binding was added or updated.
+  Records that existed before this column will have 0.
+
+
 
   The logical datapath to which the logical port belongs.
 
-- 
2.37.3

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH ovn v2 2/8] controller: Add mac-binding-index.c/.h files

2022-11-04 Thread Ales Musil
Add helper source file for creating index
over MAC binding table.

Reported-at: https://bugzilla.redhat.com/2084668
Acked-by: Dumitru Ceara 
Acked-by: Mark Michelson 
Acked-by: Han Zhou 
Signed-off-by: Ales Musil 
---
 controller/ovn-controller.c |  8 +++-
 lib/automake.mk |  2 ++
 lib/mac-binding-index.c | 33 +
 lib/mac-binding-index.h | 26 ++
 4 files changed, 64 insertions(+), 5 deletions(-)
 create mode 100644 lib/mac-binding-index.c
 create mode 100644 lib/mac-binding-index.h

diff --git a/controller/ovn-controller.c b/controller/ovn-controller.c
index 000d57c41..57061b830 100644
--- a/controller/ovn-controller.c
+++ b/controller/ovn-controller.c
@@ -53,6 +53,7 @@
 #include "lib/chassis-index.h"
 #include "lib/extend-table.h"
 #include "lib/ip-mcast-index.h"
+#include "lib/mac-binding-index.h"
 #include "lib/mcast-group-index.h"
 #include "lib/ovn-sb-idl.h"
 #include "lib/ovn-util.h"
@@ -3496,9 +3497,7 @@ main(int argc, char *argv[])
 = ovsdb_idl_index_create1(ovnsb_idl_loop.idl,
   _datapath_binding_col_tunnel_key);
 struct ovsdb_idl_index *sbrec_mac_binding_by_lport_ip
-= ovsdb_idl_index_create2(ovnsb_idl_loop.idl,
-  _mac_binding_col_logical_port,
-  _mac_binding_col_ip);
+= mac_binding_by_lport_ip_index_create(ovnsb_idl_loop.idl);
 struct ovsdb_idl_index *sbrec_ip_multicast
 = ip_mcast_index_create(ovnsb_idl_loop.idl);
 struct ovsdb_idl_index *sbrec_igmp_group
@@ -3511,8 +3510,7 @@ main(int argc, char *argv[])
   _fdb_col_mac,
   _fdb_col_dp_key);
 struct ovsdb_idl_index *sbrec_mac_binding_by_datapath
-= ovsdb_idl_index_create1(ovnsb_idl_loop.idl,
-  _mac_binding_col_datapath);
+= mac_binding_by_datapath_index_create(ovnsb_idl_loop.idl);
 struct ovsdb_idl_index *sbrec_static_mac_binding_by_datapath
 = ovsdb_idl_index_create1(ovnsb_idl_loop.idl,
   _static_mac_binding_col_datapath);
diff --git a/lib/automake.mk b/lib/automake.mk
index 3a2da1fe4..60bead6a6 100644
--- a/lib/automake.mk
+++ b/lib/automake.mk
@@ -26,6 +26,8 @@ lib_libovn_la_SOURCES = \
lib/ovn-parallel-hmap.c \
lib/ip-mcast-index.c \
lib/ip-mcast-index.h \
+   lib/mac-binding-index.c \
+   lib/mac-binding-index.h \
lib/mcast-group-index.c \
lib/mcast-group-index.h \
lib/lex.c \
diff --git a/lib/mac-binding-index.c b/lib/mac-binding-index.c
new file mode 100644
index 0..d774f12d0
--- /dev/null
+++ b/lib/mac-binding-index.c
@@ -0,0 +1,33 @@
+/* Copyright (c) 2022, Red Hat, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include 
+
+#include "lib/mac-binding-index.h"
+#include "lib/ovn-sb-idl.h"
+
+struct ovsdb_idl_index *
+mac_binding_by_datapath_index_create(struct ovsdb_idl *idl)
+{
+return ovsdb_idl_index_create1(idl, _mac_binding_col_datapath);
+}
+
+struct ovsdb_idl_index *
+mac_binding_by_lport_ip_index_create(struct ovsdb_idl *idl)
+{
+return ovsdb_idl_index_create2(idl,
+   _mac_binding_col_logical_port,
+   _mac_binding_col_ip);
+}
diff --git a/lib/mac-binding-index.h b/lib/mac-binding-index.h
new file mode 100644
index 0..8e977ecea
--- /dev/null
+++ b/lib/mac-binding-index.h
@@ -0,0 +1,26 @@
+/* Copyright (c) 2022, Red Hat, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef OVN_MAC_BINDING_INDEX_H
+#define OVN_MAC_BINDING_INDEX_H 1
+
+#include "lib/ovn-sb-idl.h"
+
+struct ovsdb_idl_index *mac_binding_by_datapath_index_create(
+struct ovsdb_idl *idl);
+struct ovsdb_idl_index *mac_binding_by_lport_ip_index_create(
+struct ovsdb_idl *idl);
+
+#endif 

[ovs-dev] dpif-netdev: fix flow allocation size

2022-11-04 Thread Peng He
The tail of the struct dp_netdev_flow contains a whole netdev_flow_key
struct.

We need to first minus the size of netdev_flow_key then add back
the real size of this netdev_flow_key.

Signed-off-by: Peng He 
---
 lib/dpif-netdev.c | 6 --
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
index 2c08a71c8..a8779a979 100644
--- a/lib/dpif-netdev.c
+++ b/lib/dpif-netdev.c
@@ -4072,7 +4072,8 @@ dp_netdev_flow_add(struct dp_netdev_pmd_thread *pmd,
&& !FLOWMAP_HAS_FIELD(, regs));
 
 /* Do not allocate extra space. */
-flow = xmalloc(sizeof *flow - sizeof flow->cr.flow.mf + mask.len);
+flow = xmalloc(sizeof *flow - sizeof(flow->cr.flow) +
+   offsetof(struct netdev_flow_key, mf) + mask.len);
 memset(>stats, 0, sizeof flow->stats);
 atomic_init(>netdev_flow_get_result, 0);
 memset(>last_stats, 0, sizeof flow->last_stats);
@@ -9744,7 +9745,8 @@ dpcls_create_subtable(struct dpcls *cls, const struct 
netdev_flow_key *mask)
 
 /* Need to add one. */
 subtable = xmalloc(sizeof *subtable
-   - sizeof subtable->mask.mf + mask->len);
+   - sizeof subtable->mask
+   + offsetof(struct netdev_flow_key, mf) + mask->len);
 cmap_init(>rules);
 subtable->hit_cnt = 0;
 netdev_flow_key_clone(>mask, mask);
-- 
2.25.1

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH v4] ovs-ofctl:'--bundle' option can be used with OpenFlow 1.3

2022-11-04 Thread yangchang
>From the commit 25070e045e, bundle option can be used with OpenFlow 1.3

Signed-off-by: yangchang 
Acked-by: Mike Pattrick 
---
 utilities/ovs-ofctl.8.in | 10 +-
 utilities/ovs-save   |  6 +++---
 2 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/utilities/ovs-ofctl.8.in b/utilities/ovs-ofctl.8.in
index 10a6a64de..953609bfd 100644
--- a/utilities/ovs-ofctl.8.in
+++ b/utilities/ovs-ofctl.8.in
@@ -1315,7 +1315,7 @@ well as cookie values and table IDs if they are zero.
 Do not execute read/write commands.
 .
 .IP "\fB\-\-bundle\fR"
-Execute flow mods as an OpenFlow 1.4 atomic bundle transaction.
+Execute flow mods as an OpenFlow 1.3 atomic bundle transaction.
 .RS
 .IP \(bu
 Within a bundle, all flow mods are processed in the order they appear
@@ -1327,15 +1327,15 @@ the transaction, or after all the flow mods in the 
bundle have been
 successfully applied.
 .IP \(bu
 The beginning and the end of the flow table modification commands in a
-bundle are delimited with OpenFlow 1.4 bundle control messages, which
+bundle are delimited with OpenFlow 1.3 bundle control messages, which
 makes it possible to stream the included commands without explicit
 OpenFlow barriers, which are otherwise used after each flow table
 modification command.  This may make large modifications execute
 faster as a bundle.
 .IP \(bu
-Bundles require OpenFlow 1.4 or higher.  An explicit \fB-O
-OpenFlow14\fR option is not needed, but you may need to enable
-OpenFlow 1.4 support for OVS by setting the OVSDB \fIprotocols\fR
+Bundles require OpenFlow 1.3 or higher.  An explicit \fB-O
+OpenFlow13\fR option is not needed, but you may need to enable
+OpenFlow 1.3 support for OVS by setting the OVSDB \fIprotocols\fR
 column in the \fIbridge\fR table.
 .RE
 .
diff --git a/utilities/ovs-save b/utilities/ovs-save
index 67092ecf7..2efd82c78 100755
--- a/utilities/ovs-save
+++ b/utilities/ovs-save
@@ -102,7 +102,7 @@ save_interfaces () {
 get_highest_ofp_version() {
 ovs-vsctl get bridge "$1" protocols | \
 sed 's/[][]//g' | sed 's/\ //g' | \
-awk -F ',' '{ print (NF>0)? $(NF) : "OpenFlow14" }'
+awk -F ',' '{ print (NF>0)? $(NF) : "OpenFlow13" }'
 }

 save_flows () {
@@ -133,8 +133,8 @@ save_flows () {
  cnt++;printf "{class="$1",type="$2",len="$3"}->"$4}'
 echo "'"

-# If possible use OpenFlow 1.4 atomic bundle txn for flows and groups
-[ ${ofp_version#OpenFlow} -ge 14 ] && bundle=" --bundle" || bundle=""
+# If possible use OpenFlow 1.3 atomic bundle txn for flows and groups
+[ ${ofp_version#OpenFlow} -ge 13 ] && bundle=" --bundle" || bundle=""

 echo "ovs-ofctl -O $ofp_version add-groups ${bridge} \
   \"$workdir/$bridge.groups.dump\" ${bundle}"
--
2.27.0.windows.1


yangch...@chinatelecom.cn
___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


  1   2   >