If the I-P engine aborts in the middle of a run, the 'flow_output' node change handlers or run callback might not be called.
As a side effect this may cause that Logical_Flow IDL tracked changes are not processed during the iteration. As a consequence, if a Logical_Flow was removed from the Southound DB, then its associated cache entry (if any) will not be flushed. This has two side effects: 1. Stale entries are kept in the cache until a full recompute or cache flush happens. 2. If a new Logical_Flow is added to the Southbound and it happens to have a UUID that matches one of a stale cache entry then ovn-controller will install incorrect openflows. IDL tracked changes are cleared at every iteration of ovn-controller. Skipping the ovsdb_idl_track_clear() call if the I-P engine aborted is not a valid option for now because it might lead to some of the IDL changes to be processed twice. Instead, lflow_handle_cached_flows() is called now at every iteration of ovn-controller making sure deleted flows are removed from the cache. Also, rename the 'flush-lflow-cache' unixctl command to 'lflow-cache/flush' to better match the style of other command names. The old 'flush-lflow-cache' command is kept (as deprecated) to avoid breaking existing CMS scripts. Fixes: 2662498bfd13 ("ovn-controller: Persist the conjunction ids allocated for conjuctive matches.") Acked-by: Mark Michelson <mmich...@redhat.com> Signed-off-by: Dumitru Ceara <dce...@redhat.com> --- v3: - keep 'flush-lflow-cache' to avoid breaking CMS scripts. - add man page entry for 'lflow-cache/flush' --- controller/lflow.c | 30 +++++++++++++++++------------- controller/lflow.h | 4 +++- controller/ovn-controller.8.xml | 5 +++++ controller/ovn-controller.c | 24 ++++++++++++++++++++---- tests/ovn.at | 2 +- 5 files changed, 46 insertions(+), 19 deletions(-) diff --git a/controller/lflow.c b/controller/lflow.c index 2b7d356..02a4480 100644 --- a/controller/lflow.c +++ b/controller/lflow.c @@ -1583,19 +1583,6 @@ lflow_run(struct lflow_ctx_in *l_ctx_in, struct lflow_ctx_out *l_ctx_out) { COVERAGE_INC(lflow_run); - /* when lflow_run is called, it's possible that some of the logical flows - * are deleted. We need to delete the lflow cache for these - * lflows (if present), otherwise, they will not be deleted at all. */ - if (l_ctx_out->lflow_cache_map) { - const struct sbrec_logical_flow *lflow; - SBREC_LOGICAL_FLOW_TABLE_FOR_EACH_TRACKED (lflow, - l_ctx_in->logical_flow_table) { - if (sbrec_logical_flow_is_deleted(lflow)) { - lflow_cache_delete(l_ctx_out->lflow_cache_map, lflow); - } - } - } - add_logical_flows(l_ctx_in, l_ctx_out); add_neighbor_flows(l_ctx_in->sbrec_port_binding_by_name, l_ctx_in->mac_binding_table, l_ctx_in->local_datapaths, @@ -1604,6 +1591,23 @@ lflow_run(struct lflow_ctx_in *l_ctx_in, struct lflow_ctx_out *l_ctx_out) l_ctx_out->flow_table); } +/* Should be called at every ovn-controller iteration before IDL tracked + * changes are cleared to avoid maintaining cache entries for flows that + * don't exist anymore. + */ +void +lflow_handle_cached_flows(struct hmap *lflow_cache_map, + const struct sbrec_logical_flow_table *flow_table) +{ + const struct sbrec_logical_flow *lflow; + + SBREC_LOGICAL_FLOW_TABLE_FOR_EACH_TRACKED (lflow, flow_table) { + if (sbrec_logical_flow_is_deleted(lflow)) { + lflow_cache_delete(lflow_cache_map, lflow); + } + } +} + void lflow_destroy(void) { diff --git a/controller/lflow.h b/controller/lflow.h index ba79cc3..cf4f0e8 100644 --- a/controller/lflow.h +++ b/controller/lflow.h @@ -156,7 +156,9 @@ struct lflow_ctx_out { }; void lflow_init(void); -void lflow_run(struct lflow_ctx_in *, struct lflow_ctx_out *); +void lflow_run(struct lflow_ctx_in *, struct lflow_ctx_out *); +void lflow_handle_cached_flows(struct hmap *lflow_cache, + const struct sbrec_logical_flow_table *); bool lflow_handle_changed_flows(struct lflow_ctx_in *, struct lflow_ctx_out *); bool lflow_handle_changed_ref(enum ref_type, const char *ref_name, struct lflow_ctx_in *, struct lflow_ctx_out *, diff --git a/controller/ovn-controller.8.xml b/controller/ovn-controller.8.xml index 29833c7..23ceb86 100644 --- a/controller/ovn-controller.8.xml +++ b/controller/ovn-controller.8.xml @@ -544,6 +544,11 @@ end-to-end latency in a large scale environment. See <code>ovn-nbctl</code>(8) for more details. </dd> + + <dt><code>lflow-cache/flush</code></dt> + <dd> + Flushes the <code>ovn-controller</code> logical flow cache. + </dd> </dl> </p> diff --git a/controller/ovn-controller.c b/controller/ovn-controller.c index ef3e0e9..0f54ccd 100644 --- a/controller/ovn-controller.c +++ b/controller/ovn-controller.c @@ -80,7 +80,7 @@ static unixctl_cb_func cluster_state_reset_cmd; static unixctl_cb_func debug_pause_execution; static unixctl_cb_func debug_resume_execution; static unixctl_cb_func debug_status_execution; -static unixctl_cb_func flush_lflow_cache; +static unixctl_cb_func lflow_cache_flush_cmd; static unixctl_cb_func debug_delay_nb_cfg_report; #define DEFAULT_BRIDGE_NAME "br-int" @@ -2666,7 +2666,12 @@ main(int argc, char *argv[]) unixctl_command_register("recompute", "", 0, 0, engine_recompute_cmd, NULL); - unixctl_command_register("flush-lflow-cache", "", 0, 0, flush_lflow_cache, + unixctl_command_register("lflow-cache/flush", "", 0, 0, + lflow_cache_flush_cmd, + &flow_output_data->pd); + /* Keep deprecated 'flush-lflow-cache' command for now. */ + unixctl_command_register("flush-lflow-cache", "[deprecated]", 0, 0, + lflow_cache_flush_cmd, &flow_output_data->pd); bool reset_ovnsb_idl_min_index = false; @@ -2773,6 +2778,16 @@ main(int argc, char *argv[]) if (ovsdb_idl_has_ever_connected(ovnsb_idl_loop.idl) && northd_version_match) { + + /* Unconditionally remove all deleted lflows from the lflow + * cache. + */ + if (flow_output_data && flow_output_data->pd.lflow_cache_enabled) { + lflow_handle_cached_flows( + &flow_output_data->pd.lflow_cache_map, + sbrec_logical_flow_table_get(ovnsb_idl_loop.idl)); + } + /* Contains the transport zones that this Chassis belongs to */ struct sset transport_zones = SSET_INITIALIZER(&transport_zones); sset_from_delimited_string(&transport_zones, @@ -3253,8 +3268,9 @@ engine_recompute_cmd(struct unixctl_conn *conn OVS_UNUSED, int argc OVS_UNUSED, } static void -flush_lflow_cache(struct unixctl_conn *conn OVS_UNUSED, int argc OVS_UNUSED, - const char *argv[] OVS_UNUSED, void *arg_) +lflow_cache_flush_cmd(struct unixctl_conn *conn OVS_UNUSED, + int argc OVS_UNUSED, const char *argv[] OVS_UNUSED, + void *arg_) { VLOG_INFO("User triggered lflow cache flush."); struct flow_output_persistent_data *fo_pd = arg_; diff --git a/tests/ovn.at b/tests/ovn.at index 377a84e..785514d 100644 --- a/tests/ovn.at +++ b/tests/ovn.at @@ -22322,7 +22322,7 @@ check ovn-nbctl acl-add pg0 to-lport 1002 "outport == @pg0 && ip4 && udp.dst >= wait_conj_id_count 2 "6 1 tcp" "7 1 udp" AS_BOX([Flush lflow cache.]) -as hv1 ovn-appctl -t ovn-controller flush-lflow-cache +as hv1 ovn-appctl -t ovn-controller lflow-cache/flush wait_conj_id_count 2 "2 1" "3 1" AS_BOX([Disable lflow caching.]) _______________________________________________ dev mailing list d...@openvswitch.org https://mail.openvswitch.org/mailman/listinfo/ovs-dev