On Tue, Jun 20, 2023 at 4:26 PM Xavier Simonart <xsimo...@redhat.com> wrote:

> If an interface with an qos option is deleted at the same
> time as an ofport notification from ovs (causing runtime_data recompute)
> is received, the binding module was trying to delete twice the same qos
> queue, causing ovs to raise an exception.
>
> Reported-at: https://bugzilla.redhat.com/show_bug.cgi?id=2213219
> Fixes: 7d1d111ff213 ("controller: configure qos through ovs qos table and
> do not run tc directly")
> Signed-off-by: Xavier Simonart <xsimo...@redhat.com>
>

Hi,
I have one question below.


> ---
> v2: rebased on origin/main
> ---
>  controller/binding.c        | 22 ++++++++++++
>  controller/binding.h        |  1 +
>  controller/ovn-controller.c | 12 +++++++
>  tests/ovn-macros.at         | 34 ++++++++++++++++++
>  tests/ovn.at                | 70 +++++++++++++++++++++++++++++++++++++
>  tests/system-ovn.at         | 18 ----------
>  6 files changed, 139 insertions(+), 18 deletions(-)
>
> diff --git a/controller/binding.c b/controller/binding.c
> index 486095ca7..8069a2e0d 100644
> --- a/controller/binding.c
> +++ b/controller/binding.c
> @@ -396,9 +396,23 @@ configure_qos(const struct sbrec_port_binding *pb,
>      q->burst = burst;
>  }
>
> +static const struct ovsrec_queue *
> +find_qos_queue_by_external_ids(const struct smap *external_ids,
> +    struct ovsdb_idl_index *ovsrec_queue_by_external_ids)
> +{
> +    const struct ovsrec_queue *queue =
> +        ovsrec_queue_index_init_row(ovsrec_queue_by_external_ids);
> +    ovsrec_queue_index_set_external_ids(queue, external_ids);
> +    const struct ovsrec_queue *retval =
> +        ovsrec_queue_index_find(ovsrec_queue_by_external_ids, queue);
> +    ovsrec_queue_index_destroy_row(queue);
> +    return retval;
> +}
> +
>  static void
>  ovs_qos_entries_gc(struct ovsdb_idl_txn *ovs_idl_txn,
>                     struct ovsdb_idl_index *ovsrec_port_by_qos,
> +                   struct ovsdb_idl_index *ovsrec_queue_by_external_ids,
>                     const struct ovsrec_qos_table *qos_table,
>                     struct hmap *queue_map)
>  {
> @@ -414,6 +428,13 @@ ovs_qos_entries_gc(struct ovsdb_idl_txn *ovs_idl_txn,
>              if (!queue) {
>                  continue;
>              }
> +            const struct ovsrec_queue *ovsrec_queue =
> +                find_qos_queue_by_external_ids(&queue->external_ids,
> +
>  ovsrec_queue_by_external_ids);
>

Since we do not completely control the external ids, isn't there a chance
that we will have
outdated external ids which will result in leaked qos records in the end?
Maybe my understanding of the index search over smap is wrong.


> +            if (!ovsrec_queue) {
> +                VLOG_DBG("queue already deleted !");
> +                continue;
> +            }
>
>              const char *port = smap_get(&queue->external_ids, "ovn_port");
>              if (!port) {
> @@ -2165,6 +2186,7 @@ binding_run(struct binding_ctx_in *b_ctx_in, struct
> binding_ctx_out *b_ctx_out)
>      shash_destroy(&bridge_mappings);
>      /* Remove stale QoS entries. */
>      ovs_qos_entries_gc(b_ctx_in->ovs_idl_txn,
> b_ctx_in->ovsrec_port_by_qos,
> +                       b_ctx_in->ovsrec_queue_by_external_ids,
>                         b_ctx_in->qos_table, b_ctx_out->qos_map);
>
>      cleanup_claimed_port_timestamps();
> diff --git a/controller/binding.h b/controller/binding.h
> index 0e57f02ee..e3ab1d7ca 100644
> --- a/controller/binding.h
> +++ b/controller/binding.h
> @@ -47,6 +47,7 @@ struct binding_ctx_in {
>      struct ovsdb_idl_index *sbrec_port_binding_by_datapath;
>      struct ovsdb_idl_index *sbrec_port_binding_by_name;
>      struct ovsdb_idl_index *ovsrec_port_by_qos;
> +    struct ovsdb_idl_index *ovsrec_queue_by_external_ids;
>      const struct ovsrec_qos_table *qos_table;
>      const struct sbrec_port_binding_table *port_binding_table;
>      const struct ovsrec_bridge *br_int;
> diff --git a/controller/ovn-controller.c b/controller/ovn-controller.c
> index a47406979..bb84554fc 100644
> --- a/controller/ovn-controller.c
> +++ b/controller/ovn-controller.c
> @@ -1116,6 +1116,7 @@ enum sb_engine_node {
>      OVS_NODE(port, "port") \
>      OVS_NODE(interface, "interface") \
>      OVS_NODE(qos, "qos") \
> +    OVS_NODE(queue, "queue") \
>      OVS_NODE(flow_sample_collector_set, "flow_sample_collector_set")
>
>  enum ovs_engine_node {
> @@ -1576,6 +1577,10 @@ init_binding_ctx(struct engine_node *node,
>          engine_ovsdb_node_get_index(
>                  engine_get_input("OVS_port", node), "qos");
>
> +    struct ovsdb_idl_index *ovsrec_queue_by_external_ids =
> +        engine_ovsdb_node_get_index(
> +                engine_get_input("OVS_queue", node), "external_ids");
> +
>      struct controller_engine_ctx *ctrl_ctx =
> engine_get_context()->client_ctx;
>
>      b_ctx_in->ovnsb_idl_txn = engine_get_context()->ovnsb_idl_txn;
> @@ -1584,6 +1589,7 @@ init_binding_ctx(struct engine_node *node,
>      b_ctx_in->sbrec_port_binding_by_datapath =
> sbrec_port_binding_by_datapath;
>      b_ctx_in->sbrec_port_binding_by_name = sbrec_port_binding_by_name;
>      b_ctx_in->ovsrec_port_by_qos = ovsrec_port_by_qos;
> +    b_ctx_in->ovsrec_queue_by_external_ids = ovsrec_queue_by_external_ids;
>      b_ctx_in->iface_table = iface_shadow->iface_table;
>      b_ctx_in->iface_table_external_ids_old =
>          &iface_shadow->iface_table_external_ids_old;
> @@ -4599,6 +4605,9 @@ main(int argc, char *argv[])
>      struct ovsdb_idl_index *ovsrec_port_by_qos
>          = ovsdb_idl_index_create1(ovs_idl_loop.idl,
>                                    &ovsrec_port_col_qos);
> +    struct ovsdb_idl_index *ovsrec_queue_by_external_ids
> +        = ovsdb_idl_index_create1(ovs_idl_loop.idl,
> +                                  &ovsrec_queue_col_external_ids);
>      struct ovsdb_idl_index *ovsrec_flow_sample_collector_set_by_id
>          = ovsdb_idl_index_create2(ovs_idl_loop.idl,
>
>  &ovsrec_flow_sample_collector_set_col_bridge,
> @@ -4899,6 +4908,7 @@ main(int argc, char *argv[])
>      engine_add_input(&en_runtime_data, &en_ovs_open_vswitch, NULL);
>      engine_add_input(&en_runtime_data, &en_ovs_bridge, NULL);
>      engine_add_input(&en_runtime_data, &en_ovs_qos, NULL);
> +    engine_add_input(&en_runtime_data, &en_ovs_queue, NULL);
>
>      engine_add_input(&en_runtime_data, &en_sb_chassis, NULL);
>      engine_add_input(&en_runtime_data, &en_sb_datapath_binding,
> @@ -4960,6 +4970,8 @@ main(int argc, char *argv[])
>      engine_ovsdb_node_add_index(&en_ovs_flow_sample_collector_set, "id",
>                                  ovsrec_flow_sample_collector_set_by_id);
>      engine_ovsdb_node_add_index(&en_ovs_port, "qos", ovsrec_port_by_qos);
> +    engine_ovsdb_node_add_index(&en_ovs_queue, "external_ids",
> +                                ovsrec_queue_by_external_ids);
>
>      struct ed_type_lflow_output *lflow_output_data =
>          engine_get_internal_data(&en_lflow_output);
> diff --git a/tests/ovn-macros.at b/tests/ovn-macros.at
> index 6f2d085ae..7223846ef 100644
> --- a/tests/ovn-macros.at
> +++ b/tests/ovn-macros.at
> @@ -840,6 +840,40 @@ fmt_pkt() {
>            print(out.decode())" | $PYTHON3
>  }
>
> +sleep_sb() {
> +  echo SB going to sleep
> +  AT_CHECK([kill -STOP $(cat ovn-sb/ovsdb-server.pid)])
> +}
> +wake_up_sb() {
> +  echo SB waking up
> +  AT_CHECK([kill -CONT $(cat ovn-sb/ovsdb-server.pid)])
> +}
> +sleep_controller() {
> +  echo Controller $hv going to sleep
> +  hv=$1
> +  as $hv
> +  check ovn-appctl debug/pause
> +  OVS_WAIT_UNTIL([test x$(ovn-appctl -t ovn-controller debug/status) =
> "xpaused"])
> +}
> +wake_up_controller() {
> +  hv=$1
> +  as $hv
> +  echo Controller $hv waking up
> +  ovn-appctl debug/resume
> +  OVS_WAIT_UNTIL([test x$(ovn-appctl -t ovn-controller debug/status) =
> "xrunning"])
> +}
> +sleep_ovs() {
> +  hv=$1
> +  echo ovs $hv going to sleep
> +  AT_CHECK([kill -STOP $(cat $hv/ovs-vswitchd.pid)])
> +}
> +
> +wake_up_ovs() {
> +  hv=$1
> +  echo ovs $hv going to sleep
> +  AT_CHECK([kill -CONT $(cat $hv/ovs-vswitchd.pid)])
> +}
> +
>  OVS_END_SHELL_HELPERS
>
>  m4_define([OVN_POPULATE_ARP], [AT_CHECK(ovn_populate_arp__, [0],
> [ignore])])
> diff --git a/tests/ovn.at b/tests/ovn.at
> index 544fba187..2c221a05c 100644
> --- a/tests/ovn.at
> +++ b/tests/ovn.at
> @@ -36458,3 +36458,73 @@
> OVN_CHECK_PACKETS_REMOVE_BROADCAST([hv2/vif1-tx.pcap], [expected])
>
>  AT_CLEANUP
>  ])
> +
> +OVN_FOR_EACH_NORTHD([
> +AT_SETUP([OVN QoS port deletion])
> +ovn_start
> +
> +check ovn-nbctl ls-add ls1
> +check ovn-nbctl lsp-add ls1 public1
> +check ovn-nbctl lsp-set-addresses public1 unknown
> +check ovn-nbctl lsp-set-type public1 localnet
> +check ovn-nbctl lsp-set-options public1 network_name=phys
> +net_add n
> +
> +# two hypervisors, each connected to the same network
> +for i in 1 2; do
> +    sim_add hv-$i
> +    as hv-$i
> +    ovs-vsctl add-br br-phys
> +    ovs-vsctl set open . external-ids:ovn-bridge-mappings=phys:br-phys
> +    ovn_attach n br-phys 192.168.0.$i
> +done
> +
> +check ovn-nbctl lsp-add ls1 lsp1
> +check ovn-nbctl lsp-set-addresses lsp1 f0:00:00:00:00:03
> +as hv-1
> +ovs-vsctl add-port br-int vif1 -- \
> +    set Interface vif1 external-ids:iface-id=lsp1 \
> +    ofport-request=3
> +
> +OVS_WAIT_UNTIL([test x`ovn-nbctl lsp-get-up lsp1` = xup])
> +
> +check ovn-nbctl set Logical_Switch_Port lsp1 options:qos_max_rate=800000
> +check ovn-nbctl --wait=hv set Logical_Switch_Port lsp1
> options:qos_burst=9000000
> +
> +AS_BOX([$(date +%H:%M:%S.%03N) checking deletion of port with qos
> options])
> +check ovn-nbctl ls-add ls2
> +check ovn-nbctl lsp-add ls2 lsp2
> +check ovn-nbctl lsp-set-addresses lsp2 f0:00:00:00:00:05
> +as hv-1
> +ovs-vsctl add-port br-int vif2 -- \
> +    set Interface vif2 external-ids:iface-id=lsp2 \
> +    ofport-request=5
> +OVS_WAIT_UNTIL([test x`ovn-nbctl lsp-get-up lsp2` = xup])
> +
> +# Sleep ovs to postpone ofport notification to ovn
> +sleep_ovs hv-1
> +
> +# Create localnet; this will cause patch-port creation
> +check ovn-nbctl lsp-add ls2 public2
> +check ovn-nbctl lsp-set-addresses public2 unknown
> +check ovn-nbctl lsp-set-type public2 localnet
> +check ovn-nbctl --wait=sb set Logical_Switch_Port public2
> options:qos_min_rate=6000000000 options:qos_max_rate=7000000000
> options:qos_burst=8000000000 options:network_name=phys
> +
> +# Let's now send ovn controller to sleep, so it will receive both ofport
> notification and ls deletion simultaneously
> +sleep_controller hv-1
> +
> +# Tme to wake up ovs
> +wake_up_ovs hv-1
> +
> +# Delete lsp1
> +check ovn-nbctl --wait=sb lsp-del lsp1
> +
> +# And finally wake up controller
> +wake_up_controller hv-1
> +
> +# Make sure ovn-controller is still OK
> +ovn-nbctl --wait=hv sync
> +OVS_WAIT_UNTIL([test $(as hv-1 ovs-vsctl list qos | grep -c linux-htb)
> -eq 1])
> +
> +AT_CLEANUP
> +])
> diff --git a/tests/system-ovn.at b/tests/system-ovn.at
> index 44a8072d6..18a79410e 100644
> --- a/tests/system-ovn.at
> +++ b/tests/system-ovn.at
> @@ -10897,20 +10897,6 @@ wait_for_local_bindings() {
>        [kill -CONT $(cat ovn-sb/ovsdb-server.pid)]
>    )
>  }
> -sleep_sb() {
> -  echo SB going to sleep
> -  AT_CHECK([kill -STOP $(cat ovn-sb/ovsdb-server.pid)])
> -}
> -wake_up_sb() {
> -  echo SB waking up
> -  AT_CHECK([kill -CONT $(cat ovn-sb/ovsdb-server.pid)])
> -}
> -sleep_controller() {
> -  echo Controller going to sleep
> -  ovn-appctl debug/pause
> -  OVS_WAIT_UNTIL([test x$(ovn-appctl -t ovn-controller debug/status) =
> "xpaused"])
> -}
> -
>  stop_ovsdb_controller_updates() {
>    TCP_PORT=$1
>    echo Stopping updates from ovn-controller to ovsdb using port $TCP_PORT
> @@ -10922,10 +10908,6 @@ restart_ovsdb_controller_updates() {
>    echo Restarting updates from ovn-controller to ovsdb
>    iptables -D INPUT -p tcp --destination-port $TCP_PORT  -j DROP
>  }
> -wake_up_controller() {
> -  echo Controller waking up
> -  ovn-appctl debug/resume
> -}
>  ensure_controller_run() {
>  # We want to make sure controller could run at least one full loop.
>  # We can't use wait=hv as sb might be sleeping.
> --
> 2.31.1
>
> _______________________________________________
> dev mailing list
> d...@openvswitch.org
> https://mail.openvswitch.org/mailman/listinfo/ovs-dev
>
>
Thanks,
Ales

-- 

Ales Musil

Senior Software Engineer - OVN Core

Red Hat EMEA <https://www.redhat.com>

amu...@redhat.com    IM: amusil
<https://red.ht/sig>
_______________________________________________
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev

Reply via email to