On Mon, Sep 7, 2020 at 10:55 AM Numan Siddique <num...@ovn.org> wrote:
> > > On Wed, Sep 2, 2020 at 4:57 AM Han Zhou <hz...@ovn.org> wrote: > >> This change adds timestamp information for nb_cfg related processing of >> ovn-northd and ovn-controller. It is useful for measuring end to end >> control plane latency more accurately. The timestamps tell exactly when >> ovn-northd starts processing a NB DB change, updates the changes to >> SB DB, and when the slowest chassis finishes processing the changes. >> >> Signed-off-by: Han Zhou <hz...@ovn.org> >> > > Hi Han, > > I have one comment in the documentation. With that comment addressed: > Acked-by: Numan Siddique <num...@ovn.org> > > Please see below > > Thanks > Numan > > >> --- >> controller/ovn-controller.c | 4 ++++ >> northd/ovn-northd.c | 40 ++++++++++++++++++++++++++++++-------- >> ovn-nb.ovsschema | 7 +++++-- >> ovn-nb.xml | 24 +++++++++++++++++++++-- >> ovn-sb.ovsschema | 5 +++-- >> ovn-sb.xml | 5 +++++ >> tests/ovn.at | 47 >> +++++++++++++++++++++++++++++++++++++++++++++ >> 7 files changed, 118 insertions(+), 14 deletions(-) >> >> diff --git a/controller/ovn-controller.c b/controller/ovn-controller.c >> index 874087c..89f10a6 100644 >> --- a/controller/ovn-controller.c >> +++ b/controller/ovn-controller.c >> @@ -2161,6 +2161,8 @@ main(int argc, char *argv[]) >> ovsdb_idl_track_add_all(ovnsb_idl_loop.idl); >> ovsdb_idl_omit_alert(ovnsb_idl_loop.idl, >> &sbrec_chassis_private_col_nb_cfg); >> + ovsdb_idl_omit_alert(ovnsb_idl_loop.idl, >> + &sbrec_chassis_private_col_nb_cfg_timestamp); >> >> /* Omit the external_ids column of all the tables except for - >> * - DNS. pinctrl.c uses the external_ids column of DNS, >> @@ -2564,6 +2566,8 @@ main(int argc, char *argv[]) >> int64_t cur_cfg = ofctrl_get_cur_cfg(); >> if (cur_cfg && cur_cfg != chassis_private->nb_cfg) { >> sbrec_chassis_private_set_nb_cfg(chassis_private, >> cur_cfg); >> + sbrec_chassis_private_set_nb_cfg_timestamp( >> + chassis_private, time_wall_msec()); >> } >> } >> >> diff --git a/northd/ovn-northd.c b/northd/ovn-northd.c >> index 7be0e85..fefbf43 100644 >> --- a/northd/ovn-northd.c >> +++ b/northd/ovn-northd.c >> @@ -44,6 +44,7 @@ >> #include "svec.h" >> #include "stream.h" >> #include "stream-ssl.h" >> +#include "timeval.h" >> #include "unixctl.h" >> #include "util.h" >> #include "uuid.h" >> @@ -11639,7 +11640,8 @@ ovnnb_db_run(struct northd_context *ctx, >> struct ovsdb_idl_index *sbrec_chassis_by_name, >> struct ovsdb_idl_loop *sb_loop, >> struct hmap *datapaths, struct hmap *ports, >> - struct ovs_list *lr_list) >> + struct ovs_list *lr_list, >> + int64_t loop_start_time) >> { >> if (!ctx->ovnsb_txn || !ctx->ovnnb_txn) { >> return; >> @@ -11664,7 +11666,10 @@ ovnnb_db_run(struct northd_context *ctx, >> if (nb->ipsec != sb->ipsec) { >> sbrec_sb_global_set_ipsec(sb, nb->ipsec); >> } >> - sbrec_sb_global_set_nb_cfg(sb, nb->nb_cfg); >> + if (nb->nb_cfg != sb->nb_cfg) { >> + sbrec_sb_global_set_nb_cfg(sb, nb->nb_cfg); >> + nbrec_nb_global_set_nb_cfg_timestamp(nb, loop_start_time); >> + } >> sbrec_sb_global_set_options(sb, &nb->options); >> sb_loop->next_cfg = nb->nb_cfg; >> >> @@ -12038,7 +12043,7 @@ static const char *rbac_chassis_update[] = >> static const char *rbac_chassis_private_auth[] = >> {"name"}; >> static const char *rbac_chassis_private_update[] = >> - {"nb_cfg", "chassis"}; >> + {"nb_cfg", "nb_cfg_timestamp", "chassis"}; >> >> static const char *rbac_encap_auth[] = >> {"chassis_name"}; >> @@ -12242,13 +12247,15 @@ check_and_update_rbac(struct northd_context >> *ctx) >> /* Updates the sb_cfg and hv_cfg columns in the northbound NB_Global >> table. */ >> static void >> update_northbound_cfg(struct northd_context *ctx, >> - struct ovsdb_idl_loop *sb_loop) >> + struct ovsdb_idl_loop *sb_loop, >> + int64_t loop_start_time) >> { >> /* Update northbound sb_cfg if appropriate. */ >> const struct nbrec_nb_global *nbg = >> nbrec_nb_global_first(ctx->ovnnb_idl); >> int64_t sb_cfg = sb_loop->cur_cfg; >> if (nbg && sb_cfg && nbg->sb_cfg != sb_cfg) { >> nbrec_nb_global_set_sb_cfg(nbg, sb_cfg); >> + nbrec_nb_global_set_sb_cfg_timestamp(nbg, loop_start_time); >> } >> >> /* Update northbound hv_cfg if appropriate. */ >> @@ -12256,6 +12263,7 @@ update_northbound_cfg(struct northd_context *ctx, >> /* Find minimum nb_cfg among all chassis. */ >> const struct sbrec_chassis_private *chassis_priv; >> int64_t hv_cfg = nbg->nb_cfg; >> + int64_t hv_cfg_ts = 0; >> SBREC_CHASSIS_PRIVATE_FOR_EACH (chassis_priv, ctx->ovnsb_idl) { >> const struct sbrec_chassis *chassis = chassis_priv->chassis; >> if (chassis) { >> @@ -12273,12 +12281,17 @@ update_northbound_cfg(struct northd_context >> *ctx, >> >> if (chassis_priv->nb_cfg < hv_cfg) { >> hv_cfg = chassis_priv->nb_cfg; >> + hv_cfg_ts = chassis_priv->nb_cfg_timestamp; >> + } else if (chassis_priv->nb_cfg == hv_cfg && >> + chassis_priv->nb_cfg_timestamp > hv_cfg_ts) { >> + hv_cfg_ts = chassis_priv->nb_cfg_timestamp; >> } >> } >> >> /* Update hv_cfg. */ >> if (nbg->hv_cfg != hv_cfg) { >> nbrec_nb_global_set_hv_cfg(nbg, hv_cfg); >> + nbrec_nb_global_set_hv_cfg_timestamp(nbg, hv_cfg_ts); >> } >> } >> } >> @@ -12287,7 +12300,8 @@ update_northbound_cfg(struct northd_context *ctx, >> static void >> ovnsb_db_run(struct northd_context *ctx, >> struct ovsdb_idl_loop *sb_loop, >> - struct hmap *ports) >> + struct hmap *ports, >> + int64_t loop_start_time) >> { >> if (!ctx->ovnnb_txn || >> !ovsdb_idl_has_ever_connected(ctx->ovnsb_idl)) { >> return; >> @@ -12295,7 +12309,7 @@ ovnsb_db_run(struct northd_context *ctx, >> >> struct shash ha_ref_chassis_map = >> SHASH_INITIALIZER(&ha_ref_chassis_map); >> handle_port_binding_changes(ctx, ports, &ha_ref_chassis_map); >> - update_northbound_cfg(ctx, sb_loop); >> + update_northbound_cfg(ctx, sb_loop, loop_start_time); >> if (ctx->ovnsb_txn) { >> update_sb_ha_group_ref_chassis(&ha_ref_chassis_map); >> } >> @@ -12312,9 +12326,11 @@ ovn_db_run(struct northd_context *ctx, >> ovs_list_init(&lr_list); >> hmap_init(&datapaths); >> hmap_init(&ports); >> + >> + int64_t start_time = time_wall_msec(); >> ovnnb_db_run(ctx, sbrec_chassis_by_name, ovnsb_idl_loop, >> - &datapaths, &ports, &lr_list); >> - ovnsb_db_run(ctx, ovnsb_idl_loop, &ports); >> + &datapaths, &ports, &lr_list, start_time); >> + ovnsb_db_run(ctx, ovnsb_idl_loop, &ports, start_time); >> destroy_datapaths_and_ports(&datapaths, &ports, &lr_list); >> } >> >> @@ -12447,8 +12463,14 @@ main(int argc, char *argv[]) >> /* We want to detect (almost) all changes to the ovn-nb db. */ >> struct ovsdb_idl_loop ovnnb_idl_loop = OVSDB_IDL_LOOP_INITIALIZER( >> ovsdb_idl_create(ovnnb_db, &nbrec_idl_class, true, true)); >> + ovsdb_idl_omit_alert(ovnnb_idl_loop.idl, >> + &nbrec_nb_global_col_nb_cfg_timestamp); >> ovsdb_idl_omit_alert(ovnnb_idl_loop.idl, >> &nbrec_nb_global_col_sb_cfg); >> + ovsdb_idl_omit_alert(ovnnb_idl_loop.idl, >> + &nbrec_nb_global_col_sb_cfg_timestamp); >> ovsdb_idl_omit_alert(ovnnb_idl_loop.idl, >> &nbrec_nb_global_col_hv_cfg); >> + ovsdb_idl_omit_alert(ovnnb_idl_loop.idl, >> + &nbrec_nb_global_col_hv_cfg_timestamp); >> >> unixctl_command_register("nb-connection-status", "", 0, 0, >> ovn_conn_show, ovnnb_idl_loop.idl); >> @@ -12577,6 +12599,8 @@ main(int argc, char *argv[]) >> &sbrec_chassis_private_col_chassis); >> ovsdb_idl_add_column(ovnsb_idl_loop.idl, >> &sbrec_chassis_private_col_nb_cfg); >> + ovsdb_idl_add_column(ovnsb_idl_loop.idl, >> + &sbrec_chassis_private_col_nb_cfg_timestamp); >> >> ovsdb_idl_add_table(ovnsb_idl_loop.idl, &sbrec_table_ha_chassis); >> add_column_noalert(ovnsb_idl_loop.idl, >> diff --git a/ovn-nb.ovsschema b/ovn-nb.ovsschema >> index 0c939b7..c66c9fa 100644 >> --- a/ovn-nb.ovsschema >> +++ b/ovn-nb.ovsschema >> @@ -1,14 +1,17 @@ >> { >> "name": "OVN_Northbound", >> - "version": "5.25.0", >> - "cksum": "1354137211 26116", >> + "version": "5.26.0", >> + "cksum": "3650391919 26314", >> "tables": { >> "NB_Global": { >> "columns": { >> "name": {"type": "string"}, >> "nb_cfg": {"type": {"key": "integer"}}, >> + "nb_cfg_timestamp": {"type": {"key": "integer"}}, >> "sb_cfg": {"type": {"key": "integer"}}, >> + "sb_cfg_timestamp": {"type": {"key": "integer"}}, >> "hv_cfg": {"type": {"key": "integer"}}, >> + "hv_cfg_timestamp": {"type": {"key": "integer"}}, >> "external_ids": { >> "type": {"key": "string", "value": "string", >> "min": 0, "max": "unlimited"}}, >> diff --git a/ovn-nb.xml b/ovn-nb.xml >> index 1f2dbb9..cda42ff 100644 >> --- a/ovn-nb.xml >> +++ b/ovn-nb.xml >> @@ -54,21 +54,41 @@ >> applying the changes, it may increment this sequence number. >> </column> >> >> + <column name="nb_cfg_timestamp"> >> + The timestamp when <code>ovn-northd</code> sees the latest >> + <code>nb_cfg</code> and starts processing. >> + </column> >> + >> <column name="sb_cfg"> >> Sequence number that <code>ovn-northd</code> sets to the value >> of <ref >> column="nb_cfg"/> after it finishes applying the corresponding >> configuration changes to the <ref db="OVN_Southbound"/> database. >> </column> >> >> + <column name="sb_cfg_timestamp"> >> + The timestamp when <code>ovn-northd</code> finishes applying the >> + corresponding configuration changes to the <ref >> db="OVN_Southbound"/> >> + database successfully. >> > > Can you please be more specific about the timestamp unit ? > > Thanks Numan. I applied this to master, with (ms) added to the document. > > > >> + </column> >> + >> <column name="hv_cfg"> >> Sequence number that <code>ovn-northd</code> sets to the smallest >> sequence number of all the chassis in the system, as reported in >> the >> - <code>Chassis</code> table in the southbound database. Thus, >> <ref >> - column="hv_cfg"/> equals <ref column="nb_cfg"/> if all chassis >> are >> + <code>Chassis_Private</code> table in the southbound database. >> Thus, >> + <ref column="hv_cfg"/> equals <ref column="nb_cfg"/> if all >> chassis are >> caught up with the northbound configuration (which may never >> happen, if >> any chassis is down). This value can regress, if a chassis was >> removed >> from the system and rejoins before catching up. >> </column> >> + >> + <column name="hv_cfg_timestamp"> >> + The largest timestamp of the smallest sequence number of all the >> + chassis in the system, as reported in the >> <code>Chassis_Private</code> >> + table in the southbound database. In other words, this timestamp >> + reflects the time when the slowest chassis catches up with the >> + northbound configuration, which is useful for end-to-end control >> plane >> + latency measurement. >> + </column> >> </group> >> >> <group title="Common Columns"> >> diff --git a/ovn-sb.ovsschema b/ovn-sb.ovsschema >> index 3af7654..d1c506a 100644 >> --- a/ovn-sb.ovsschema >> +++ b/ovn-sb.ovsschema >> @@ -1,7 +1,7 @@ >> { >> "name": "OVN_Southbound", >> - "version": "2.9.0", >> - "cksum": "223619766 22548", >> + "version": "2.10.0", >> + "cksum": "2548342632 22615", >> "tables": { >> "SB_Global": { >> "columns": { >> @@ -54,6 +54,7 @@ >> "refType": "weak"}, >> "min": 0, "max": 1}}, >> "nb_cfg": {"type": {"key": "integer"}}, >> + "nb_cfg_timestamp": {"type": {"key": "integer"}}, >> "external_ids": { >> "type": {"key": "string", "value": "string", >> "min": 0, "max": "unlimited"}}}, >> diff --git a/ovn-sb.xml b/ovn-sb.xml >> index 59b2171..59888a1 100644 >> --- a/ovn-sb.xml >> +++ b/ovn-sb.xml >> @@ -392,6 +392,11 @@ >> from the <ref table="SB_Global"/> table into this column. >> </column> >> >> + <column name="nb_cfg_timestamp"> >> + The timestamp when <code>ovn-controller</code> finishes processing >> the >> + change corresponding to <ref column="nb_cfg"/>. >> + </column> >> + >> <group title="Common Columns"> >> The overall purpose of these columns is described under >> <code>Common >> Columns</code> at the beginning of this document. >> diff --git a/tests/ovn.at b/tests/ovn.at >> index 5ad51c0..55257c3 100644 >> --- a/tests/ovn.at >> +++ b/tests/ovn.at >> @@ -21409,3 +21409,50 @@ OVS_WAIT_UNTIL([test x$(as hv1 ovn-appctl -t >> ovn-controller debug/status) = "xru >> >> OVN_CLEANUP([hv1]) >> AT_CLEANUP >> + >> +AT_SETUP([ovn -- nb_cfg timestamp]) >> +ovn_start >> + >> +ovn-nbctl ls-add s2 >> +ovn-nbctl ls-add join >> + >> +net_add n1 >> + >> +n=10 >> +for i in $(seq 1 $n); do >> + sim_add hv$i >> + as hv$i >> + ovs-vsctl add-br br-phys >> + ovn_attach n1 br-phys 192.168.0.$i >> +done >> + >> +AT_FAIL_IF([test x$(ovn-nbctl get nb_global . nb_cfg_timestamp) != x0]) >> +ovn-nbctl --wait=hv ls-add s1 >> +AT_CHECK([ovn-nbctl get nb_global . nb_cfg], [0], [1 >> +]) >> +AT_CHECK([test x$(ovn-nbctl get nb_global . nb_cfg_timestamp) != x0]) >> +AT_CHECK([test x$(ovn-nbctl get nb_global . sb_cfg_timestamp) != x0]) >> +AT_CHECK([test x$(ovn-nbctl get nb_global . hv_cfg_timestamp) != x0]) >> + >> +# kill ovn-controller on chassis hv3, so that it wont update nb_cfg >> +as hv3 ovn-appctl -t ovn-controller exit --restart >> + >> +ovn-nbctl --timeout=1 --wait=hv sync >> +AT_CHECK([ovn-nbctl get nb_global . nb_cfg], [0], [2 >> +]) >> +AT_CHECK([ovn-sbctl --bare --columns=nb_cfg find chassis_private >> name=hv3], [0], [1 >> +]) >> + >> +# start ovn-controller on hv3 again, so that it will update nb_cfg with >> latest >> +# timestamp (hv3 will be the slowest one). >> +as hv3 start_daemon ovn-controller >> +OVS_WAIT_UNTIL([test x$(ovn-sbctl --bare --columns=nb_cfg find >> chassis_private name=hv3) == x2]) >> +hv3_ts=$(ovn-sbctl --bare --columns=nb_cfg_timestamp find >> chassis_private name=hv3) >> +hv_cfg_ts=$(ovn-nbctl get nb_global . hv_cfg_timestamp) >> +AT_CHECK([test x$hv3_ts == x$hv_cfg_ts]) >> + >> +for i in $(seq 2 $n); do >> + OVN_CLEANUP_SBOX([hv$i]) >> +done >> +OVN_CLEANUP([hv1]) >> +AT_CLEANUP >> -- >> 2.1.0 >> >> _______________________________________________ >> dev mailing list >> d...@openvswitch.org >> https://mail.openvswitch.org/mailman/listinfo/ovs-dev >> >> _______________________________________________ dev mailing list d...@openvswitch.org https://mail.openvswitch.org/mailman/listinfo/ovs-dev