On Fri, Jul 7, 2023 at 2:02 PM Numan Siddique <num...@ovn.org> wrote: > > On Fri, Jul 7, 2023 at 6:40 AM Han Zhou <hz...@ovn.org> wrote: > > > > On Thu, Jul 6, 2023 at 9:55 PM Numan Siddique <num...@ovn.org> wrote: > > > > > > On Thu, Jul 6, 2023 at 3:32 PM Han Zhou <hz...@ovn.org> wrote: > > > > > > > > Although each individual VIF port related changes are handled > > > > incrementally, it still triggers recompute if there are in-flight > > > > transactions (either to NB or SB) when a change comes, which is very > > > > common in real production environment if change happens frequently. > > > > It is also easy to hit such situation in test cases where nb_cfg > > > > mechanism is heavily used, which makes it difficult to write reliable > > > > and stable tests, such as what the commit 8c30ba1386 was trying to work > > > > around. > > > > > > > > This patch skips the I-P engine execution until the NB & SB transaction > > > > handles are available (no in-flight transactions), and when skippiing > > > > the runs it keeps the tracked changes in IDL across main loop > > > > iterations. This way we avoid recompute without worrying about missing > > > > any changes. > > > > > > > > Signed-off-by: Han Zhou <hz...@ovn.org> > > > > > > Acked-by: Numan Siddique <num...@ovn.org> > > > > > > Numan > > > > Thanks Numan! I applied the series to main. > > Hi Han, > > Looks like the test case - "LSP incremental processing" is a little bit flaky. > > If you see the latest CI run, this test is failing for ARM - > https://github.com/ovn-org/ovn/runs/14845008643 > It also failed for the load balancer I-P patch series > (http://patchwork.ozlabs.org/project/ovn/list/?series=362800) > in my local repo and it passed in a re-run. > > > ------ > as northd ovn-appctl -t ovn-northd inc-engine/clear-stats > ../../../tests/ovn-macros.at:419: "$@" > ovn-nbctl --wait=hv lsp-add ls0 lsp0-0 -- lsp-set-addresses lsp0-0 unknown > ../../../tests/ovn-macros.at:419: "$@" > Waiting until 0 rows in nb Logical_Switch_Port with up!=true type=""... > ovn-macros.at:470: waiting until test $count = $(count_rows $db:$table > $a $b $c $d $e)... > ovn-macros.at:470: wait succeeded immediately > Waiting until 0 rows in nb Logical_Switch_Port with up!=true type=router... > ovn-macros.at:470: waiting until test $count = $(count_rows $db:$table > $a $b $c $d $e)... > ovn-macros.at:470: wait succeeded immediately > ovn-nbctl --wait=hv sync > ../../../tests/ovn-macros.at:419: "$@" > ../../../tests/ovn-northd.at:9515: test x$northd_recomp = x$1 > ../../../tests/ovn-northd.at:9515: exit code was 1, expected 0 > ---- > Looks like its failing here - > https://github.com/ovn-org/ovn/blob/main/tests/ovn-northd.at#L9540 > > Maybe we shouldn't do exact matches on the counters ? > Sorry for this. I think the below patch should fix it: https://patchwork.ozlabs.org/project/ovn/patch/20230707062938.761299-1-hz...@ovn.org/
Thanks, Han > Thanks > Numan > > > > > > Han > > > > > > > --- > > > > northd/inc-proc-northd.c | 6 +-- > > > > northd/ovn-northd.c | 22 +++++---- > > > > tests/ovn-northd.at | 104 +++++++++++++++++++-------------------- > > > > 3 files changed, 66 insertions(+), 66 deletions(-) > > > > > > > > diff --git a/northd/inc-proc-northd.c b/northd/inc-proc-northd.c > > > > index 19fc67795643..d328deb222e6 100644 > > > > --- a/northd/inc-proc-northd.c > > > > +++ b/northd/inc-proc-northd.c > > > > @@ -296,6 +296,7 @@ void inc_proc_northd_init(struct ovsdb_idl_loop *nb, > > > > bool inc_proc_northd_run(struct ovsdb_idl_txn *ovnnb_txn, > > > > struct ovsdb_idl_txn *ovnsb_txn, > > > > bool recompute) { > > > > + ovs_assert(ovnnb_txn && ovnsb_txn); > > > > engine_init_run(); > > > > > > > > /* Force a full recompute if instructed to, for example, after a > > NB/SB > > > > @@ -312,10 +313,7 @@ bool inc_proc_northd_run(struct ovsdb_idl_txn > > *ovnnb_txn, > > > > }; > > > > > > > > engine_set_context(&eng_ctx); > > > > - > > > > - if (ovnnb_txn && ovnsb_txn) { > > > > - engine_run(true); > > > > - } > > > > + engine_run(true); > > > > > > > > if (!engine_has_run()) { > > > > if (engine_need_run()) { > > > > diff --git a/northd/ovn-northd.c b/northd/ovn-northd.c > > > > index 3e0848e41b8e..4fa1b039ea32 100644 > > > > --- a/northd/ovn-northd.c > > > > +++ b/northd/ovn-northd.c > > > > @@ -881,6 +881,7 @@ main(int argc, char *argv[]) > > > > simap_destroy(&usage); > > > > } > > > > > > > > + bool clear_idl_track = true; > > > > if (!state.paused) { > > > > if (!ovsdb_idl_has_lock(ovnsb_idl_loop.idl) && > > > > !ovsdb_idl_is_lock_contended(ovnsb_idl_loop.idl)) > > > > @@ -930,25 +931,26 @@ main(int argc, char *argv[]) > > > > } > > > > > > > > if (ovsdb_idl_has_lock(ovnsb_idl_loop.idl)) { > > > > - int64_t loop_start_time = time_wall_msec(); > > > > - bool activity = inc_proc_northd_run(ovnnb_txn, > > ovnsb_txn, > > > > - recompute); > > > > - recompute = false; > > > > - if (ovnsb_txn) { > > > > + bool activity = false; > > > > + if (ovnnb_txn && ovnsb_txn) { > > > > + int64_t loop_start_time = time_wall_msec(); > > > > + activity = inc_proc_northd_run(ovnnb_txn, > > ovnsb_txn, > > > > + recompute); > > > > + recompute = false; > > > > check_and_add_supported_dhcp_opts_to_sb_db( > > > > ovnsb_txn, ovnsb_idl_loop.idl); > > > > check_and_add_supported_dhcpv6_opts_to_sb_db( > > > > ovnsb_txn, ovnsb_idl_loop.idl); > > > > check_and_update_rbac( > > > > ovnsb_txn, ovnsb_idl_loop.idl); > > > > - } > > > > > > > > - if (ovnnb_txn && ovnsb_txn) { > > > > update_sequence_numbers(loop_start_time, > > > > ovnnb_idl_loop.idl, > > > > ovnsb_idl_loop.idl, > > > > ovnnb_txn, ovnsb_txn, > > > > &ovnsb_idl_loop); > > > > + } else if (!recompute) { > > > > + clear_idl_track = false; > > > > } > > > > > > > > /* If there are any errors, we force a full recompute > > in order > > > > @@ -998,8 +1000,10 @@ main(int argc, char *argv[]) > > > > recompute = true; > > > > } > > > > > > > > - ovsdb_idl_track_clear(ovnnb_idl_loop.idl); > > > > - ovsdb_idl_track_clear(ovnsb_idl_loop.idl); > > > > + if (clear_idl_track) { > > > > + ovsdb_idl_track_clear(ovnnb_idl_loop.idl); > > > > + ovsdb_idl_track_clear(ovnsb_idl_loop.idl); > > > > + } > > > > > > > > unixctl_server_run(unixctl); > > > > unixctl_server_wait(unixctl); > > > > diff --git a/tests/ovn-northd.at b/tests/ovn-northd.at > > > > index f1bf9092eeb7..e79d33b2aec5 100644 > > > > --- a/tests/ovn-northd.at > > > > +++ b/tests/ovn-northd.at > > > > @@ -9522,67 +9522,65 @@ as hv1 > > > > ovs-vsctl add-br br-phys > > > > ovn_attach n1 br-phys 192.168.0.11 > > > > > > > > -fail_count=0 > > > > check_recompute_counter() { > > > > northd_recomp=$(as northd ovn-appctl -t NORTHD_TYPE > > inc-engine/show-stats northd recompute) > > > > - if test x$northd_recomp != x$1; then > > > > - fail_count=$(($fail_count + 1)) > > > > - echo check northd recompute failed: expected $1, got > > $northd_recomp > > > > - return 1 > > > > - fi > > > > + AT_CHECK([test x$northd_recomp = x$1]) > > > > + > > > > lflow_recomp=$(as northd ovn-appctl -t NORTHD_TYPE > > inc-engine/show-stats lflow recompute) > > > > - if test x$lflow_recomp != x$2; then > > > > - fail_count=$(($fail_count + 1)) > > > > - echo check lflow recompute failed: expected $2, got > > $lflow_recomp > > > > - return 1 > > > > - fi > > > > - return 0 > > > > + AT_CHECK([test x$lflow_recomp = x$2]) > > > > } > > > > > > > > -# Depending on order of responses from NB and SB, the number of > > recompute may > > > > -# be different. This test case only verifies the best case scenario, > > which > > > > -# should have the expected recompute count at least 50% of the time. > > > > +check ovn-nbctl --wait=hv ls-add ls0 > > > > + > > > > +check as northd ovn-appctl -t NORTHD_TYPE inc-engine/clear-stats > > > > +check ovn-nbctl --wait=hv lsp-add ls0 lsp0-0 -- lsp-set-addresses > > lsp0-0 "unknown" > > > > +ovs-vsctl add-port br-int lsp0-0 -- set interface lsp0-0 > > external_ids:iface-id=lsp0-0 > > > > +wait_for_ports_up > > > > +check ovn-nbctl --wait=hv sync > > > > +check_recompute_counter 5 5 > > > > + > > > > +check as northd ovn-appctl -t NORTHD_TYPE inc-engine/clear-stats > > > > +check ovn-nbctl --wait=hv lsp-add ls0 lsp0-1 -- lsp-set-addresses > > lsp0-1 "aa:aa:aa:00:00:01 192.168.0.11" > > > > +ovs-vsctl add-port br-int lsp0-1 -- set interface lsp0-1 > > external_ids:iface-id=lsp0-1 > > > > +wait_for_ports_up > > > > +check ovn-nbctl --wait=hv sync > > > > +check_recompute_counter 0 0 > > > > + > > > > +check as northd ovn-appctl -t NORTHD_TYPE inc-engine/clear-stats > > > > +check ovn-nbctl --wait=hv lsp-add ls0 lsp0-2 -- lsp-set-addresses > > lsp0-2 "aa:aa:aa:00:00:02 192.168.0.12" > > > > +ovs-vsctl add-port br-int lsp0-2 -- set interface lsp0-2 > > external_ids:iface-id=lsp0-2 > > > > +wait_for_ports_up > > > > +check ovn-nbctl --wait=hv sync > > > > +check_recompute_counter 0 0 > > > > + > > > > +check as northd ovn-appctl -t NORTHD_TYPE inc-engine/clear-stats > > > > +check ovn-nbctl --wait=hv lsp-del lsp0-1 > > > > +check_recompute_counter 0 0 > > > > + > > > > +check as northd ovn-appctl -t NORTHD_TYPE inc-engine/clear-stats > > > > +check ovn-nbctl --wait=hv lsp-set-addresses lsp0-2 "aa:aa:aa:00:00:88 > > 192.168.0.88" > > > > +check_recompute_counter 0 0 > > > > + > > > > +# Delete and re-add a LSP for several times continuously, to ensure > > > > +# frequent operations do not trigger recompute when there are in-flight > > > > +# transcations. > > > > for i in $(seq 10); do > > > > - check ovn-nbctl --wait=hv ls-add ls$i > > > > - > > > > - check as northd ovn-appctl -t NORTHD_TYPE inc-engine/clear-stats > > > > - check ovn-nbctl --wait=hv lsp-add ls$i lsp${i}-0 -- > > lsp-set-addresses lsp${i}-0 "unknown" > > > > - ovs-vsctl add-port br-int lsp${i}-0 -- set interface lsp${i}-0 > > external_ids:iface-id=lsp${i}-0 > > > > - wait_for_ports_up > > > > - check ovn-nbctl --wait=hv sync > > > > - check_recompute_counter 5 5 || continue > > > > - > > > > - check as northd ovn-appctl -t NORTHD_TYPE inc-engine/clear-stats > > > > - check ovn-nbctl --wait=hv lsp-add ls$i lsp${i}-1 -- > > lsp-set-addresses lsp${i}-1 "aa:aa:aa:00:00:01 192.168.0.11" > > > > - ovs-vsctl add-port br-int lsp${i}-1 -- set interface lsp${i}-1 > > external_ids:iface-id=lsp${i}-1 > > > > - wait_for_ports_up > > > > - check ovn-nbctl --wait=hv sync > > > > - check_recompute_counter 0 0 || continue > > > > - > > > > - check as northd ovn-appctl -t NORTHD_TYPE inc-engine/clear-stats > > > > - check ovn-nbctl --wait=hv lsp-add ls$i lsp${i}-2 -- > > lsp-set-addresses lsp${i}-2 "aa:aa:aa:00:00:02 192.168.0.12" > > > > - ovs-vsctl add-port br-int lsp${i}-2 -- set interface lsp${i}-2 > > external_ids:iface-id=lsp${i}-2 > > > > - wait_for_ports_up > > > > - check ovn-nbctl --wait=hv sync > > > > - check_recompute_counter 0 0 || continue > > > > - > > > > - check as northd ovn-appctl -t NORTHD_TYPE inc-engine/clear-stats > > > > - check ovn-nbctl --wait=hv lsp-del lsp${i}-1 > > > > - check_recompute_counter 0 0 || continue > > > > - > > > > - check as northd ovn-appctl -t NORTHD_TYPE inc-engine/clear-stats > > > > - check ovn-nbctl --wait=hv lsp-set-addresses lsp${i}-2 > > "aa:aa:aa:00:00:88 192.168.0.88" > > > > - check_recompute_counter 0 0 || continue > > > > - > > > > - # No change, no recompute > > > > - check as northd ovn-appctl -t NORTHD_TYPE inc-engine/clear-stats > > > > - check ovn-nbctl --wait=sb sync > > > > - check_recompute_counter 0 0 || continue > > > > + # wait for sb but not wait for hv > > > > + check ovn-nbctl --wait=sb lsp-del lsp0-2 > > > > + check ovn-nbctl --wait=sb lsp-add ls0 lsp0-2 -- lsp-set-addresses > > lsp0-2 "aa:aa:aa:00:00:02 192.168.0.12" > > > > > > > > - CHECK_NO_CHANGE_AFTER_RECOMPUTE > > > > + # even without waiting for sb > > > > + check ovn-nbctl lsp-del lsp0-2 > > > > + check ovn-nbctl lsp-add ls0 lsp0-2 -- lsp-set-addresses lsp0-2 > > "aa:aa:aa:00:00:02 192.168.0.12" > > > > done > > > > -echo Test failed $fail_count in 10. > > > > -AT_CHECK([test $fail_count -le 5]) > > > > +check_recompute_counter 0 0 > > > > + > > > > +# No change, no recompute > > > > +check as northd ovn-appctl -t NORTHD_TYPE inc-engine/clear-stats > > > > +check ovn-nbctl --wait=sb sync > > > > +check_recompute_counter 0 0 > > > > + > > > > +CHECK_NO_CHANGE_AFTER_RECOMPUTE > > > > > > > > OVN_CLEANUP([hv1]) > > > > AT_CLEANUP > > > > -- > > > > 2.30.2 > > > > > > > > _______________________________________________ > > > > dev mailing list > > > > d...@openvswitch.org > > > > https://mail.openvswitch.org/mailman/listinfo/ovs-dev > > > > > > _______________________________________________ > > dev mailing list > > d...@openvswitch.org > > https://mail.openvswitch.org/mailman/listinfo/ovs-dev _______________________________________________ dev mailing list d...@openvswitch.org https://mail.openvswitch.org/mailman/listinfo/ovs-dev