From: Han Zhou <hzh...@ebay.com> In the main loop, if the SB DB is disconnected when there is a pending transaction, there can be busy loop causing 100% CPU of ovn-controller, until SB DB is connected again.
The root cause is that when a transaction is pending, ovsdb_idl_loop_run() will return NULL for ovnsb_idl_txn, and chassis_run() returns NULL when ovnsb_idl_txn is NULL, so the condition if (br_int && chassis) is not satisfied and so ofctrl_run() is not executed in the main loop. If there is any message pending from br-int.mgmt, such as OFPTYPE_BARRIER_REPLY or OFPTYPE_ECHO_REQUEST, the main loop will be woken up again and again because those messages are not processed because ofctrl_run() is not invoked. This patch fixes the problem by moving ofctrl_run() above and run it whenever br_int is not NULL, and not care about chassis because this function doesn't depend on it. It also moves out sbrec_chassis_set_nb_cfg() from the "if (ovs_idl_txn)" just to avoid adding more indentation of the whole block to avoid >79 line length. Note: the changes of this patch is better to be shown with "-w" because most of them are indent changes. Acked-by: Numan Siddique <nusid...@redhat.com> Acked-by: Mark Michelson <mmich...@redhat.com> Signed-off-by: Han Zhou <hzh...@ebay.com> --- v3->v4: rebase on master ovn/controller/ovn-controller.c | 97 ++++++++++++++++++++++------------------- 1 file changed, 51 insertions(+), 46 deletions(-) diff --git a/ovn/controller/ovn-controller.c b/ovn/controller/ovn-controller.c index ad626f6..cf4907d 100644 --- a/ovn/controller/ovn-controller.c +++ b/ovn/controller/ovn-controller.c @@ -722,38 +722,40 @@ main(int argc, char *argv[]) &active_tunnels, &local_datapaths, &local_lports, &local_lport_ids); } - if (br_int && chassis) { - struct shash addr_sets = SHASH_INITIALIZER(&addr_sets); - addr_sets_init(sbrec_address_set_table_get(ovnsb_idl_loop.idl), - &addr_sets); - struct shash port_groups = SHASH_INITIALIZER(&port_groups); - port_groups_init( - sbrec_port_group_table_get(ovnsb_idl_loop.idl), - &port_groups); - - patch_run(ovs_idl_txn, - ovsrec_bridge_table_get(ovs_idl_loop.idl), - ovsrec_open_vswitch_table_get(ovs_idl_loop.idl), - ovsrec_port_table_get(ovs_idl_loop.idl), - sbrec_port_binding_table_get(ovnsb_idl_loop.idl), - br_int, chassis); + if (br_int) { enum mf_field_id mff_ovn_geneve = ofctrl_run( br_int, &pending_ct_zones); - pinctrl_run(ovnsb_idl_txn, - sbrec_datapath_binding_by_key, - sbrec_port_binding_by_datapath, - sbrec_port_binding_by_key, - sbrec_port_binding_by_name, - sbrec_mac_binding_by_lport_ip, - sbrec_dns_table_get(ovnsb_idl_loop.idl), - br_int, chassis, - &local_datapaths, &active_tunnels); - update_ct_zones(&local_lports, &local_datapaths, &ct_zones, - ct_zone_bitmap, &pending_ct_zones); - if (ovs_idl_txn) { - if (ofctrl_can_put()) { + if (chassis) { + struct shash addr_sets = SHASH_INITIALIZER(&addr_sets); + addr_sets_init( + sbrec_address_set_table_get(ovnsb_idl_loop.idl), + &addr_sets); + struct shash port_groups = SHASH_INITIALIZER(&port_groups); + port_groups_init( + sbrec_port_group_table_get(ovnsb_idl_loop.idl), + &port_groups); + + patch_run(ovs_idl_txn, + ovsrec_bridge_table_get(ovs_idl_loop.idl), + ovsrec_open_vswitch_table_get(ovs_idl_loop.idl), + ovsrec_port_table_get(ovs_idl_loop.idl), + sbrec_port_binding_table_get(ovnsb_idl_loop.idl), + br_int, chassis); + + pinctrl_run(ovnsb_idl_txn, + sbrec_datapath_binding_by_key, + sbrec_port_binding_by_datapath, + sbrec_port_binding_by_key, + sbrec_port_binding_by_name, + sbrec_mac_binding_by_lport_ip, + sbrec_dns_table_get(ovnsb_idl_loop.idl), + br_int, chassis, + &local_datapaths, &active_tunnels); + update_ct_zones(&local_lports, &local_datapaths, &ct_zones, + ct_zone_bitmap, &pending_ct_zones); + if (ovs_idl_txn && ofctrl_can_put()) { stopwatch_start(CONTROLLER_LOOP_STOPWATCH_NAME, time_msec()); @@ -807,28 +809,31 @@ main(int argc, char *argv[]) sbrec_chassis_set_nb_cfg(chassis, cur_cfg); } } - } - if (pending_pkt.conn) { - char *error = ofctrl_inject_pkt(br_int, pending_pkt.flow_s, - &addr_sets, &port_groups); - if (error) { - unixctl_command_reply_error(pending_pkt.conn, error); - free(error); - } else { - unixctl_command_reply(pending_pkt.conn, NULL); + if (pending_pkt.conn) { + char *error = ofctrl_inject_pkt(br_int, + pending_pkt.flow_s, + &addr_sets, + &port_groups); + if (error) { + unixctl_command_reply_error(pending_pkt.conn, + error); + free(error); + } else { + unixctl_command_reply(pending_pkt.conn, NULL); + } + pending_pkt.conn = NULL; + free(pending_pkt.flow_s); } - pending_pkt.conn = NULL; - free(pending_pkt.flow_s); - } - update_sb_monitors(ovnsb_idl_loop.idl, chassis, - &local_lports, &local_datapaths); + update_sb_monitors(ovnsb_idl_loop.idl, chassis, + &local_lports, &local_datapaths); - expr_const_sets_destroy(&addr_sets); - shash_destroy(&addr_sets); - expr_const_sets_destroy(&port_groups); - shash_destroy(&port_groups); + expr_const_sets_destroy(&addr_sets); + shash_destroy(&addr_sets); + expr_const_sets_destroy(&port_groups); + shash_destroy(&port_groups); + } } /* If we haven't handled the pending packet insertion -- 2.1.0 _______________________________________________ dev mailing list d...@openvswitch.org https://mail.openvswitch.org/mailman/listinfo/ovs-dev