Hi Mark,
thank you for the review. Everything should be addressed in v2, except the
stats there reply in-line below.

On Wed, Jun 15, 2022 at 8:13 PM Mark Michelson <mmich...@redhat.com> wrote:

> Hi Ales,
>
> I have a few comments in-line below.
>
> On 6/14/22 09:49, Ales Musil wrote:
> > Add MAC binding aging mechanism that utilizes
> > the ownership of MAC binding row. The controller
> > that "owns" the MAC binding will track idle_age
> > statistics for related OpenFlows (table 66 and 67).
> >
> > If the idle_age exceeds the threshold value for aging
> > the MAC binding row is removed. The threshold is set to
> > 60 secs. The removal won't happen exactly at the 60
> > second mark, but can happen any time later on, depending
> > on timing of the ovn-controller loop.
> >
> > Reported-at: https://bugzilla.redhat.com/2084668
> > Signed-off-by: Ales Musil <amu...@redhat.com>
> > ---
> >   controller/automake.mk         |   4 +-
> >   controller/mac-binding-aging.c | 230 +++++++++++++++++++++++++++++++++
> >   controller/mac-binding-aging.h |  32 +++++
> >   controller/ovn-controller.c    |  12 ++
> >   4 files changed, 277 insertions(+), 1 deletion(-)
> >   create mode 100644 controller/mac-binding-aging.c
> >   create mode 100644 controller/mac-binding-aging.h
> >
> > diff --git a/controller/automake.mk b/controller/automake.mk
> > index c2ab1bbe6..01546e335 100644
> > --- a/controller/automake.mk
> > +++ b/controller/automake.mk
> > @@ -41,7 +41,9 @@ controller_ovn_controller_SOURCES = \
> >       controller/ovsport.h \
> >       controller/ovsport.c \
> >       controller/vif-plug.h \
> > -     controller/vif-plug.c
> > +     controller/vif-plug.c \
> > +     controller/mac-binding-aging.h \
> > +     controller/mac-binding-aging.c
> >
> >   controller_ovn_controller_LDADD = lib/libovn.la $(OVS_LIBDIR)/
> libopenvswitch.la
> >   man_MANS += controller/ovn-controller.8
> > diff --git a/controller/mac-binding-aging.c
> b/controller/mac-binding-aging.c
> > new file mode 100644
> > index 000000000..7ab01c4ab
> > --- /dev/null
> > +++ b/controller/mac-binding-aging.c
> > @@ -0,0 +1,230 @@
> > +
> > +/* Copyright (c) 2022, Red Hat, Inc.
> > + *
> > + * Licensed under the Apache License, Version 2.0 (the "License");
> > + * you may not use this file except in compliance with the License.
> > + * You may obtain a copy of the License at:
> > + *
> > + *     http://www.apache.org/licenses/LICENSE-2.0
> > + *
> > + * Unless required by applicable law or agreed to in writing, software
> > + * distributed under the License is distributed on an "AS IS" BASIS,
> > + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
> implied.
> > + * See the License for the specific language governing permissions and
> > + * limitations under the License.
> > + */
> > +
> > +#include <config.h>
> > +
> > +#include "byte-order.h"
> > +#include "dirs.h"
> > +#include "mac-binding-aging.h"
> > +#include "openvswitch/hmap.h"
> > +#include "openvswitch/ofp-flow.h"
> > +#include "openvswitch/vconn.h"
> > +#include "openvswitch/vlog.h"
> > +#include "ovn-sb-idl.h"
> > +#include "timeval.h"
> > +#include "util.h"
> > +#include "uuid.h"
> > +
> > +VLOG_DEFINE_THIS_MODULE(mac_binding_aging);
> > +
> > +/* Contains "struct mac_binding_aging"s. */
> > +static struct hmap mb_aging_hmap;
> > +
> > +struct mac_binding_aging {
> > +    struct hmap_node hmap_node;
> > +
> > +    /* Idle time from last statistic check in ms. */
> > +    long long idle_age;
> > +    /* Time when the statistics were updated in ms. */
> > +    long long last_check;
> > +
> > +    const struct sbrec_mac_binding *mb;
>
> Storing an IDL row like this in a long-term data structure is not a good
> idea. You are not in control of the lifetime of this object, and it is
> possible for it to be freed by the IDL before you are done with it.
> Since the only thing you need is the UUID of the MAC_Binding, the best
> thing to do would be to store a copy of the UUID instead.
>
> > +};
> > +
> > +void
> > +mac_binding_aging_init(void)
> > +{
> > +    hmap_init(&mb_aging_hmap);
> > +}
> > +
> > +void
> > +mac_binding_aging_destroy(void)
> > +{
> > +    struct mac_binding_aging *mb_aging;
> > +
> > +    HMAP_FOR_EACH_POP(mb_aging, hmap_node, &mb_aging_hmap) {
> > +        free(mb_aging);
> > +    }
> > +    hmap_destroy(&mb_aging_hmap);
> > +}
> > +
> > +static struct mac_binding_aging *
> > +mac_binding_aging_find(const struct uuid *uuid)
> > +{
> > +    struct mac_binding_aging *mb_aging;
> > +
> > +    size_t hash = uuid_hash(uuid);
> > +
> > +    HMAP_FOR_EACH_WITH_HASH(mb_aging, hmap_node, hash, &mb_aging_hmap) {
> > +        if (uuid_equals(&mb_aging->mb->header_.uuid, uuid)) {
> > +            return mb_aging;
> > +        }
> > +    }
> > +
> > +    return NULL;
> > +}
> > +
> > +static void
> > +mac_binding_aging_add(const struct sbrec_mac_binding *mb)
> > +{
> > +    struct mac_binding_aging *mb_aging;
> > +    size_t hash = uuid_hash(&mb->header_.uuid);
> > +
> > +    mb_aging = xmalloc(sizeof *mb_aging);
> > +    mb_aging->mb = mb;
> > +    mb_aging->last_check = time_msec();
> > +    mb_aging->idle_age = 0;
> > +    hmap_insert(&mb_aging_hmap, &mb_aging->hmap_node, hash);
> > +}
> > +
> > +static bool
> > +mac_binding_aging_needs_update(struct mac_binding_aging *mb_aging,
> > +                               long long now, unsigned long long
> threshold)
> > +{
> > +    return (now - mb_aging->last_check + mb_aging->idle_age) >=
> threshold;
> > +}
> > +
> > +static void
> > +mac_binding_aging_update_statistics(struct vconn *vconn,
> > +                                    struct mac_binding_aging *mb_aging,
> > +                                    long long now)
> > +{
> > +    uint32_t cookie = mb_aging->mb->header_.uuid.parts[0];
> > +
> > +    struct ofputil_flow_stats_request fsr = {
> > +        .cookie = htonll(cookie),
> > +        .cookie_mask = OVS_BE64_MAX,
> > +        .out_port = OFPP_ANY,
> > +        .out_group = OFPG_ANY,
> > +        .table_id = OFPTT_ALL,
> > +    };
>
> If I've read the code correctly, it appears that we send one of these
> flow stats requests messages for each MAC binding for which we want to
> get updated stats. The cookie is the element that changes with each
> successive flow stats request.
>
> What if instead of doing that, we did a single flow stats request and
> specify the table we are interested in? Then we could iterate over the
> returned flows and find the corresponding mac_binding_aging structure to
> determine what action to take.
>
> My thought here is that with a large number of MAC bindings, it may be
> faster to perform a single request to OVS instead of many.
>
> Since you add a stopwatch in patch 3 of this series, we can use that as
> a way of testing the two implementations to see which is quickest.
>

So I tried to pull the whole table at once and measured the difference. To
my surprise
the difference is pretty big e.g.

Getting the full table at once:
Statistics for 'mac-binding-aging'
  Total samples: 2392
  Maximum: 24 msec
  Minimum: 0 msec
  95th percentile: 6.052561 msec
  Short term average: 0.916053 msec
  Long term average: 4.955343 msec

The v2 implementation with other suggestions:
Statistics for 'mac-binding-aging'
  Total samples: 3798
  Maximum: 39 msec
  Minimum: 0 msec
  95th percentile: 1.001509 msec
  Short term average: 0.822068 msec
  Long term average: 0.666313 msec

The spikes in max are due to MAC bindings being removed at once which I
will hopefully address in a follow up.
The test was done over 1000 MAC bindings residing on the same chassis.
That's not that huge sample but still big enough
to show the difference.

We can always change it if we find out that this is really not the way to
go. I am leaving it as is for now.


> > +
> > +    struct ofputil_flow_stats *fses;
> > +    size_t n_fses;
> > +
> > +    int error =
> > +        vconn_dump_flows(vconn, &fsr, OFPUTIL_P_OF15_OXM, &fses,
> &n_fses);
> > +    if (error) {
> > +        VLOG_WARN("%s: error obtaining flow stats (%s)",
> > +                  vconn_get_name(vconn), ovs_strerror(error));
> > +        goto free;
> > +    }
> > +
> > +    if (n_fses != 2) {
> > +        VLOG_DBG("Unexpected statistics count (%" PRIuSIZE "), "
> > +                 "the flows might not be installed yet or they "
> > +                 "are already removed.", n_fses);
> > +        goto free;
> > +    }
> > +
> > +    mb_aging->idle_age = MIN(fses[0].idle_age, fses[1].idle_age) * 1000;
> > +    mb_aging->last_check = now;
> > +
> > +free:
> > +    for (size_t i = 0; i < n_fses; i++) {
> > +        free(CONST_CAST(struct ofpact *, fses[i].ofpacts));
> > +    }
> > +    free(fses);
> > +}
> > +
> > +static void
> > +mac_binding_aging_update_monitored(struct ovsdb_idl_index
> *mb_by_chassis_index,
> > +                                   const struct sbrec_mac_binding_table
> > +                                   *mac_binding_table,
> > +                                   const struct sbrec_chassis *chassis)
> > +{
> > +    const struct sbrec_mac_binding *mb;
> > +    SBREC_MAC_BINDING_TABLE_FOR_EACH_TRACKED(mb, mac_binding_table) {
> > +        if (sbrec_mac_binding_is_deleted(mb) ||
> > +            (sbrec_mac_binding_is_updated(mb,
> SBREC_MAC_BINDING_COL_CHASSIS) &&
> > +             mb->chassis != chassis)) {
> > +            struct mac_binding_aging *mb_aging =
> > +                mac_binding_aging_find(&mb->header_.uuid);
> > +            if (mb_aging) {
> > +                hmap_remove(&mb_aging_hmap, &mb_aging->hmap_node);
> > +                free(mb_aging);
> > +            }
> > +        }
> > +    }
>
> Using this mechanism is a problem. OVN has an incremental engine built
> into it that you can use to process changes from the southbound database
> incrementally. The reason why an incremental engine exists is,
> partially, so that we can detect scenarios where trying to do an
> incremental change will not be possible and that a full recompute is
> required instead. A simple example of this would be if we disconnect
> from the southbound database and then reconnect. In such a scenario, we
> can't rely on the tracked differences in the database.
>
> As such, using the _TRACKED variants of table traversals should only be
> done within the confines of the incremental engine.  This way, you can
> be sure that you are performing incremental processing in a context
> where it will work properly.
>
> Your best bet would be to re-write this function to be stateless. If we
> deem it necessary, we can potentially add incremental processing at a
> later time.
>
> What I mean by stateless in this case is that you will need to traverse
> through all MAC bindings in the southbound table, find the corresponding
> mac_binding_aging structure, and perform whatever processing is
> necessary. If you find that a mac_binding_aging structure went unvisited
> during the traversal, then that means the corresponding southbound
> MAC_Binding was deleted and we can delete our local mac_binding_aging
> struct.
>
> > +
> > +    struct sbrec_mac_binding *mb_index_row =
> > +        sbrec_mac_binding_index_init_row(mb_by_chassis_index);
> > +    sbrec_mac_binding_index_set_chassis(mb_index_row, chassis);
> > +    SBREC_MAC_BINDING_FOR_EACH_EQUAL(mb, mb_index_row,
> mb_by_chassis_index) {
> > +        if (!mac_binding_aging_find(&mb->header_.uuid)) {
> > +            mac_binding_aging_add(mb);
> > +        }
> > +    }
> > +    sbrec_mac_binding_index_destroy_row(mb_index_row);
> > +}
> > +
> > +static struct vconn *
> > +create_ovs_connection(const char *br_int_name)
> > +{
> > +    struct vconn *vconn;
> > +    char *target = xasprintf("unix:%s/%s.mgmt", ovs_rundir(),
> br_int_name);
> > +    int retval = vconn_open_block(target, 1 << OFP15_VERSION, 0, -1,
> &vconn);
> > +
> > +    if (retval) {
> > +        VLOG_WARN("%s: connection failed (%s)", target,
> ovs_strerror(retval));
> > +    }
> > +    free(target);
> > +
> > +    return vconn;
> > +}
> > +
> > +void
> > +mac_binding_aging_run(struct ovsdb_idl_txn *ovnsb_idl_txn,
> > +                      const char *br_int_name,
> > +                      const struct sbrec_chassis *chassis,
> > +                      const struct sbrec_mac_binding_table
> *mac_binding_table,
> > +                      struct ovsdb_idl_index *mb_by_chassis_index,
> > +                      unsigned long long threshold)
> > +{
> > +    if (!ovnsb_idl_txn) {
> > +        return;
> > +    }
> > +
> > +    struct vconn *vconn = create_ovs_connection(br_int_name);
> > +
> > +    if (!vconn) {
> > +        return;
> > +    }
> > +
> > +    mac_binding_aging_update_monitored(mb_by_chassis_index,
> mac_binding_table,
> > +                                       chassis);
> > +
> > +    long long now = time_msec();
> > +
> > +    struct mac_binding_aging *mb_aging;
> > +
> > +    HMAP_FOR_EACH_SAFE(mb_aging, hmap_node, &mb_aging_hmap) {
> > +        if (mac_binding_aging_needs_update(mb_aging, now, threshold)) {
> > +            mac_binding_aging_update_statistics(vconn, mb_aging, now);
> > +        }
> > +        if (mb_aging->idle_age >= threshold) {
> > +            VLOG_DBG("MAC binding exceeded threshold uuid=" UUID_FMT ",
> "
> > +                     "idle_age=%llu ms",
> > +                     UUID_ARGS(&mb_aging->mb->header_.uuid),
> > +                     mb_aging->idle_age);
> > +            sbrec_mac_binding_delete(mb_aging->mb);
> > +            hmap_remove(&mb_aging_hmap, &mb_aging->hmap_node);
> > +            free(mb_aging);
> > +        }
> > +    }
> > +
> > +    vconn_close(vconn);
> > +}
> > diff --git a/controller/mac-binding-aging.h
> b/controller/mac-binding-aging.h
> > new file mode 100644
> > index 000000000..2228d0ca1
> > --- /dev/null
> > +++ b/controller/mac-binding-aging.h
> > @@ -0,0 +1,32 @@
> > +
> > +/* Copyright (c) 2022, Red Hat, Inc.
> > + *
> > + * Licensed under the Apache License, Version 2.0 (the "License");
> > + * you may not use this file except in compliance with the License.
> > + * You may obtain a copy of the License at:
> > + *
> > + *     http://www.apache.org/licenses/LICENSE-2.0
> > + *
> > + * Unless required by applicable law or agreed to in writing, software
> > + * distributed under the License is distributed on an "AS IS" BASIS,
> > + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
> implied.
> > + * See the License for the specific language governing permissions and
> > + * limitations under the License.
> > + */
> > +
> > +#ifndef OVN_MAC_BINDING_AGING_H
> > +#define OVN_MAC_BINDING_AGING_H 1
> > +
> > +#include "ovn-sb-idl.h"
> > +
> > +void mac_binding_aging_init(void);
> > +void mac_binding_aging_destroy(void);
> > +void mac_binding_aging_run(struct ovsdb_idl_txn *ovnsb_idl_txn,
> > +                           const char *br_int_name,
> > +                           const struct sbrec_chassis *chassis,
> > +                           const struct sbrec_mac_binding_table
> > +                           *mac_binding_table,
> > +                           struct ovsdb_idl_index *mb_by_chassis_index,
> > +                           unsigned long long threshold);
> > +
> > +#endif /* controller/mac-binding-aging.h */
> > diff --git a/controller/ovn-controller.c b/controller/ovn-controller.c
> > index 2793c8687..f33eb43d4 100644
> > --- a/controller/ovn-controller.c
> > +++ b/controller/ovn-controller.c
> > @@ -76,6 +76,7 @@
> >   #include "stopwatch.h"
> >   #include "lib/inc-proc-eng.h"
> >   #include "hmapx.h"
> > +#include "mac-binding-aging.h"
> >
> >   VLOG_DEFINE_THIS_MODULE(main);
> >
> > @@ -3306,6 +3307,7 @@ main(int argc, char *argv[])
> >       pinctrl_init();
> >       lflow_init();
> >       vif_plug_provider_initialize();
> > +    mac_binding_aging_init();
> >
> >       /* Connect to OVS OVSDB instance. */
> >       struct ovsdb_idl_loop ovs_idl_loop = OVSDB_IDL_LOOP_INITIALIZER(
> > @@ -3378,6 +3380,9 @@ main(int argc, char *argv[])
> >       struct ovsdb_idl_index *sbrec_static_mac_binding_by_datapath
> >           = ovsdb_idl_index_create1(ovnsb_idl_loop.idl,
> >
>  &sbrec_static_mac_binding_col_datapath);
> > +    struct ovsdb_idl_index *sbrec_mac_biding_by_chassis
> > +        = ovsdb_idl_index_create1(ovnsb_idl_loop.idl,
> > +                                  &sbrec_mac_binding_col_chassis);
> >
> >       ovsdb_idl_track_add_all(ovnsb_idl_loop.idl);
> >       ovsdb_idl_omit_alert(ovnsb_idl_loop.idl,
> > @@ -3951,6 +3956,12 @@ main(int argc, char *argv[])
> >                               stopwatch_stop(VIF_PLUG_RUN_STOPWATCH_NAME,
> >                                              time_msec());
> >                           }
> > +                        mac_binding_aging_run(ovnsb_idl_txn
> ,br_int->name,
> > +                                              chassis,
> > +
> sbrec_mac_binding_table_get
> > +                                                  (ovnsb_idl_loop.idl),
> > +
> sbrec_mac_biding_by_chassis,
> > +                                              60000);
> >                           stopwatch_start(PINCTRL_RUN_STOPWATCH_NAME,
> >                                           time_msec());
> >                           pinctrl_run(ovnsb_idl_txn,
> > @@ -4218,6 +4229,7 @@ loop_done:
> >       shash_destroy(&vif_plug_deleted_iface_ids);
> >       shash_destroy(&vif_plug_changed_iface_ids);
> >       vif_plug_provider_destroy_all();
> > +    mac_binding_aging_destroy();
> >
> >       ovsdb_idl_loop_destroy(&ovs_idl_loop);
> >       ovsdb_idl_loop_destroy(&ovnsb_idl_loop);
> >
>
>
Thanks,
Ales

-- 

Ales Musil

Senior Software Engineer - OVN Core

Red Hat EMEA <https://www.redhat.com>

amu...@redhat.com    IM: amusil
<https://red.ht/sig>
_______________________________________________
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev

Reply via email to