On 7/17/23 11:10, Ales Musil wrote: > On Thu, Jul 13, 2023 at 9:42 PM Dumitru Ceara <dce...@redhat.com> wrote: > >> On 7/7/23 10:59, Ales Musil wrote: >>> On Tue, Jun 20, 2023 at 4:26 PM Xavier Simonart <xsimo...@redhat.com> >> wrote: >>> >>>> If an interface with an qos option is deleted at the same >>>> time as an ofport notification from ovs (causing runtime_data recompute) >>>> is received, the binding module was trying to delete twice the same qos >>>> queue, causing ovs to raise an exception. >>>> >>>> Reported-at: https://bugzilla.redhat.com/show_bug.cgi?id=2213219 >>>> Fixes: 7d1d111ff213 ("controller: configure qos through ovs qos table >> and >>>> do not run tc directly") >>>> Signed-off-by: Xavier Simonart <xsimo...@redhat.com> >>>> >>> >>> Hi, >> >> Hi Xavier, Ales, >> > > Hi Dumitru, > > >> >>> I have one question below. >>> >>> >>>> --- >>>> v2: rebased on origin/main >>>> --- >>>> controller/binding.c | 22 ++++++++++++ >>>> controller/binding.h | 1 + >>>> controller/ovn-controller.c | 12 +++++++ >>>> tests/ovn-macros.at | 34 ++++++++++++++++++ >>>> tests/ovn.at | 70 +++++++++++++++++++++++++++++++++++++ >>>> tests/system-ovn.at | 18 ---------- >>>> 6 files changed, 139 insertions(+), 18 deletions(-) >>>> >>>> diff --git a/controller/binding.c b/controller/binding.c >>>> index 486095ca7..8069a2e0d 100644 >>>> --- a/controller/binding.c >>>> +++ b/controller/binding.c >>>> @@ -396,9 +396,23 @@ configure_qos(const struct sbrec_port_binding *pb, >>>> q->burst = burst; >>>> } >>>> >>>> +static const struct ovsrec_queue * >>>> +find_qos_queue_by_external_ids(const struct smap *external_ids, >>>> + struct ovsdb_idl_index *ovsrec_queue_by_external_ids) >>>> +{ >>>> + const struct ovsrec_queue *queue = >>>> + ovsrec_queue_index_init_row(ovsrec_queue_by_external_ids); >>>> + ovsrec_queue_index_set_external_ids(queue, external_ids); >>>> + const struct ovsrec_queue *retval = >>>> + ovsrec_queue_index_find(ovsrec_queue_by_external_ids, queue); >>>> + ovsrec_queue_index_destroy_row(queue); >>>> + return retval; >>>> +} >>>> + >>>> static void >>>> ovs_qos_entries_gc(struct ovsdb_idl_txn *ovs_idl_txn, >>>> struct ovsdb_idl_index *ovsrec_port_by_qos, >>>> + struct ovsdb_idl_index >> *ovsrec_queue_by_external_ids, >>>> const struct ovsrec_qos_table *qos_table, >>>> struct hmap *queue_map) >>>> { >>>> @@ -414,6 +428,13 @@ ovs_qos_entries_gc(struct ovsdb_idl_txn >> *ovs_idl_txn, >>>> if (!queue) { >>>> continue; >>>> } >>>> + const struct ovsrec_queue *ovsrec_queue = >>>> + find_qos_queue_by_external_ids(&queue->external_ids, >>>> + >>>> ovsrec_queue_by_external_ids); >>>> >>> >>> Since we do not completely control the external ids, isn't there a chance >>> that we will have >>> outdated external ids which will result in leaked qos records in the end? >>> Maybe my understanding of the index search over smap is wrong. >>> >> >> I think this is fine. The search is in local data (in the IDL). If I'm >> reading this correctly it's just taking advantage that the first time an >> IDL record is deleted with <table-name>_delete(record) it will also be >> removed from the index. >> > > Ok, that makes sense. > > >> >>> >>>> + if (!ovsrec_queue) { >>>> + VLOG_DBG("queue already deleted !"); >>>> + continue; >>>> + } >>>> >>>> const char *port = smap_get(&queue->external_ids, >> "ovn_port"); >>>> if (!port) { >>>> @@ -2165,6 +2186,7 @@ binding_run(struct binding_ctx_in *b_ctx_in, >> struct >>>> binding_ctx_out *b_ctx_out) >>>> shash_destroy(&bridge_mappings); >>>> /* Remove stale QoS entries. */ >>>> ovs_qos_entries_gc(b_ctx_in->ovs_idl_txn, >>>> b_ctx_in->ovsrec_port_by_qos, >>>> + b_ctx_in->ovsrec_queue_by_external_ids, >>>> b_ctx_in->qos_table, b_ctx_out->qos_map); >>>> >>>> cleanup_claimed_port_timestamps(); >>>> diff --git a/controller/binding.h b/controller/binding.h >>>> index 0e57f02ee..e3ab1d7ca 100644 >>>> --- a/controller/binding.h >>>> +++ b/controller/binding.h >>>> @@ -47,6 +47,7 @@ struct binding_ctx_in { >>>> struct ovsdb_idl_index *sbrec_port_binding_by_datapath; >>>> struct ovsdb_idl_index *sbrec_port_binding_by_name; >>>> struct ovsdb_idl_index *ovsrec_port_by_qos; >>>> + struct ovsdb_idl_index *ovsrec_queue_by_external_ids; >>>> const struct ovsrec_qos_table *qos_table; >>>> const struct sbrec_port_binding_table *port_binding_table; >>>> const struct ovsrec_bridge *br_int; >>>> diff --git a/controller/ovn-controller.c b/controller/ovn-controller.c >>>> index a47406979..bb84554fc 100644 >>>> --- a/controller/ovn-controller.c >>>> +++ b/controller/ovn-controller.c >>>> @@ -1116,6 +1116,7 @@ enum sb_engine_node { >>>> OVS_NODE(port, "port") \ >>>> OVS_NODE(interface, "interface") \ >>>> OVS_NODE(qos, "qos") \ >>>> + OVS_NODE(queue, "queue") \ >>>> OVS_NODE(flow_sample_collector_set, "flow_sample_collector_set") >>>> >>>> enum ovs_engine_node { >>>> @@ -1576,6 +1577,10 @@ init_binding_ctx(struct engine_node *node, >>>> engine_ovsdb_node_get_index( >>>> engine_get_input("OVS_port", node), "qos"); >>>> >>>> + struct ovsdb_idl_index *ovsrec_queue_by_external_ids = >>>> + engine_ovsdb_node_get_index( >>>> + engine_get_input("OVS_queue", node), "external_ids"); >>>> + >>>> struct controller_engine_ctx *ctrl_ctx = >>>> engine_get_context()->client_ctx; >>>> >>>> b_ctx_in->ovnsb_idl_txn = engine_get_context()->ovnsb_idl_txn; >>>> @@ -1584,6 +1589,7 @@ init_binding_ctx(struct engine_node *node, >>>> b_ctx_in->sbrec_port_binding_by_datapath = >>>> sbrec_port_binding_by_datapath; >>>> b_ctx_in->sbrec_port_binding_by_name = sbrec_port_binding_by_name; >>>> b_ctx_in->ovsrec_port_by_qos = ovsrec_port_by_qos; >>>> + b_ctx_in->ovsrec_queue_by_external_ids = >> ovsrec_queue_by_external_ids; >>>> b_ctx_in->iface_table = iface_shadow->iface_table; >>>> b_ctx_in->iface_table_external_ids_old = >>>> &iface_shadow->iface_table_external_ids_old; >>>> @@ -4599,6 +4605,9 @@ main(int argc, char *argv[]) >>>> struct ovsdb_idl_index *ovsrec_port_by_qos >>>> = ovsdb_idl_index_create1(ovs_idl_loop.idl, >>>> &ovsrec_port_col_qos); >>>> + struct ovsdb_idl_index *ovsrec_queue_by_external_ids >>>> + = ovsdb_idl_index_create1(ovs_idl_loop.idl, >>>> + &ovsrec_queue_col_external_ids); >>>> struct ovsdb_idl_index *ovsrec_flow_sample_collector_set_by_id >>>> = ovsdb_idl_index_create2(ovs_idl_loop.idl, >>>> >>>> &ovsrec_flow_sample_collector_set_col_bridge, >>>> @@ -4899,6 +4908,7 @@ main(int argc, char *argv[]) >>>> engine_add_input(&en_runtime_data, &en_ovs_open_vswitch, NULL); >>>> engine_add_input(&en_runtime_data, &en_ovs_bridge, NULL); >>>> engine_add_input(&en_runtime_data, &en_ovs_qos, NULL); >>>> + engine_add_input(&en_runtime_data, &en_ovs_queue, NULL); >>>> >>>> engine_add_input(&en_runtime_data, &en_sb_chassis, NULL); >>>> engine_add_input(&en_runtime_data, &en_sb_datapath_binding, >>>> @@ -4960,6 +4970,8 @@ main(int argc, char *argv[]) >>>> engine_ovsdb_node_add_index(&en_ovs_flow_sample_collector_set, >> "id", >>>> >> ovsrec_flow_sample_collector_set_by_id); >>>> engine_ovsdb_node_add_index(&en_ovs_port, "qos", >> ovsrec_port_by_qos); >>>> + engine_ovsdb_node_add_index(&en_ovs_queue, "external_ids", >>>> + ovsrec_queue_by_external_ids); >>>> >>>> struct ed_type_lflow_output *lflow_output_data = >>>> engine_get_internal_data(&en_lflow_output); >>>> diff --git a/tests/ovn-macros.at b/tests/ovn-macros.at >>>> index 6f2d085ae..7223846ef 100644 >>>> --- a/tests/ovn-macros.at >>>> +++ b/tests/ovn-macros.at >>>> @@ -840,6 +840,40 @@ fmt_pkt() { >>>> print(out.decode())" | $PYTHON3 >>>> } >>>> >>>> +sleep_sb() { >>>> + echo SB going to sleep >>>> + AT_CHECK([kill -STOP $(cat ovn-sb/ovsdb-server.pid)]) >>>> +} >>>> +wake_up_sb() { >>>> + echo SB waking up >>>> + AT_CHECK([kill -CONT $(cat ovn-sb/ovsdb-server.pid)]) >>>> +} >>>> +sleep_controller() { >>>> + echo Controller $hv going to sleep >>>> + hv=$1 >>>> + as $hv >>>> + check ovn-appctl debug/pause >>>> + OVS_WAIT_UNTIL([test x$(ovn-appctl -t ovn-controller debug/status) = >>>> "xpaused"]) >>>> +} >>>> +wake_up_controller() { >>>> + hv=$1 >>>> + as $hv >>>> + echo Controller $hv waking up >>>> + ovn-appctl debug/resume >>>> + OVS_WAIT_UNTIL([test x$(ovn-appctl -t ovn-controller debug/status) = >>>> "xrunning"]) >>>> +} >>>> +sleep_ovs() { >>>> + hv=$1 >>>> + echo ovs $hv going to sleep >>>> + AT_CHECK([kill -STOP $(cat $hv/ovs-vswitchd.pid)]) >>>> +} >>>> + >>>> +wake_up_ovs() { >>>> + hv=$1 >>>> + echo ovs $hv going to sleep >>>> + AT_CHECK([kill -CONT $(cat $hv/ovs-vswitchd.pid)]) >>>> +} >>>> + >>>> OVS_END_SHELL_HELPERS >>>> >>>> m4_define([OVN_POPULATE_ARP], [AT_CHECK(ovn_populate_arp__, [0], >>>> [ignore])]) >>>> diff --git a/tests/ovn.at b/tests/ovn.at >>>> index 544fba187..2c221a05c 100644 >>>> --- a/tests/ovn.at >>>> +++ b/tests/ovn.at >>>> @@ -36458,3 +36458,73 @@ >>>> OVN_CHECK_PACKETS_REMOVE_BROADCAST([hv2/vif1-tx.pcap], [expected]) >>>> >>>> AT_CLEANUP >>>> ]) >>>> + >>>> +OVN_FOR_EACH_NORTHD([ >>>> +AT_SETUP([OVN QoS port deletion]) >>>> +ovn_start >>>> + >>>> +check ovn-nbctl ls-add ls1 >>>> +check ovn-nbctl lsp-add ls1 public1 >>>> +check ovn-nbctl lsp-set-addresses public1 unknown >>>> +check ovn-nbctl lsp-set-type public1 localnet >>>> +check ovn-nbctl lsp-set-options public1 network_name=phys >>>> +net_add n >>>> + >>>> +# two hypervisors, each connected to the same network >>>> +for i in 1 2; do >>>> + sim_add hv-$i >>>> + as hv-$i >>>> + ovs-vsctl add-br br-phys >>>> + ovs-vsctl set open . external-ids:ovn-bridge-mappings=phys:br-phys >>>> + ovn_attach n br-phys 192.168.0.$i >>>> +done >>>> + >>>> +check ovn-nbctl lsp-add ls1 lsp1 >>>> +check ovn-nbctl lsp-set-addresses lsp1 f0:00:00:00:00:03 >>>> +as hv-1 >>>> +ovs-vsctl add-port br-int vif1 -- \ >>>> + set Interface vif1 external-ids:iface-id=lsp1 \ >>>> + ofport-request=3 >>>> + >>>> +OVS_WAIT_UNTIL([test x`ovn-nbctl lsp-get-up lsp1` = xup]) >>>> + >>>> +check ovn-nbctl set Logical_Switch_Port lsp1 >> options:qos_max_rate=800000 >>>> +check ovn-nbctl --wait=hv set Logical_Switch_Port lsp1 >>>> options:qos_burst=9000000 >>>> + >>>> +AS_BOX([$(date +%H:%M:%S.%03N) checking deletion of port with qos >>>> options]) >>>> +check ovn-nbctl ls-add ls2 >>>> +check ovn-nbctl lsp-add ls2 lsp2 >>>> +check ovn-nbctl lsp-set-addresses lsp2 f0:00:00:00:00:05 >>>> +as hv-1 >>>> +ovs-vsctl add-port br-int vif2 -- \ >>>> + set Interface vif2 external-ids:iface-id=lsp2 \ >>>> + ofport-request=5 >>>> +OVS_WAIT_UNTIL([test x`ovn-nbctl lsp-get-up lsp2` = xup]) >>>> + >>>> +# Sleep ovs to postpone ofport notification to ovn >>>> +sleep_ovs hv-1 >>>> + >>>> +# Create localnet; this will cause patch-port creation >>>> +check ovn-nbctl lsp-add ls2 public2 >>>> +check ovn-nbctl lsp-set-addresses public2 unknown >>>> +check ovn-nbctl lsp-set-type public2 localnet >>>> +check ovn-nbctl --wait=sb set Logical_Switch_Port public2 >>>> options:qos_min_rate=6000000000 options:qos_max_rate=7000000000 >>>> options:qos_burst=8000000000 options:network_name=phys >>>> + >>>> +# Let's now send ovn controller to sleep, so it will receive both >> ofport >>>> notification and ls deletion simultaneously >>>> +sleep_controller hv-1 >>>> + >>>> +# Tme to wake up ovs >>>> +wake_up_ovs hv-1 >>>> + >>>> +# Delete lsp1 >>>> +check ovn-nbctl --wait=sb lsp-del lsp1 >>>> + >>>> +# And finally wake up controller >>>> +wake_up_controller hv-1 >>>> + >>>> +# Make sure ovn-controller is still OK >>>> +ovn-nbctl --wait=hv sync >> >> Nit: check ovn-nbctl ... >> >> But I can take care of this when pushing the patch. Xavier, was my >> index related reasoning above correct? Ales, do you agree? If so and >> if you ack the patch I can fix this minor issue at apply time. >> >> Thanks, >> Dumitru >> >> > In that case: > > Acked-by: Ales Musil <amu...@redhat.com> >
Thanks, Xavier and Ales! I applied this to main and backported it to 23.06. Regards, Dumitru _______________________________________________ dev mailing list d...@openvswitch.org https://mail.openvswitch.org/mailman/listinfo/ovs-dev