Add FDB aging mechanism that utilizes the
timestamp column in FDB table. When the timestamp
exceeds the threshold set for specified logical
switch it will be removed from DB.

The threshold is configurable per logical switch
via other_config "fdb_age_threshold" in seconds.
The default value is 0 which means disabled.

In addition there is a config option to limit
how many FDB rows can be removed in single transaction.
This option, called "fdb_removal_limit", can be set
in NB global table, default value being 0 which is
infinite.

Signed-off-by: Ales Musil <amu...@redhat.com>
---
 NEWS                      |   3 +
 northd/aging.c            | 126 ++++++++++++++++++++++++++++++++++++++
 northd/aging.h            |  11 ++++
 northd/en-northd-output.c |   8 +++
 northd/en-northd-output.h |   2 +
 northd/inc-proc-northd.c  |  14 +++++
 ovn-nb.xml                |  15 +++++
 tests/ovn.at              |  65 ++++++++++++++++++++
 8 files changed, 244 insertions(+)

diff --git a/NEWS b/NEWS
index 0f1c5f985..09f4a61a2 100644
--- a/NEWS
+++ b/NEWS
@@ -25,6 +25,9 @@ Post v23.03.0
     databases for ovn-nbctl and ovn-sbctl respectively.  See man ovn-nb and
     man ovn-sb for 'nbctl_probe_interval' and 'sbctl_probe_interval'
     options for more details.
+  - Added FDB aging mechanism, that is disabled by default.
+    It can be enabled per logical switch with other_config
+    "fdb_age_threshold".
 
 OVN v23.03.0 - 03 Mar 2023
 --------------------------
diff --git a/northd/aging.c b/northd/aging.c
index 49f60b663..f626c72c8 100644
--- a/northd/aging.c
+++ b/northd/aging.c
@@ -111,6 +111,7 @@ get_removal_limit(struct engine_node *node, const char 
*name)
     return smap_get_uint(&nb->options, name, 0);
 }
 
+/* MAC binding aging */
 static void
 mac_binding_aging_run_for_datapath(const struct sbrec_datapath_binding *dp,
                                    struct ovsdb_idl_index *mb_by_datapath,
@@ -238,3 +239,128 @@ void
 en_mac_binding_aging_waker_cleanup(void *data OVS_UNUSED)
 {
 }
+
+/* FDB aging */
+static void
+fdb_run_for_datapath(const struct sbrec_datapath_binding *dp,
+                     struct ovsdb_idl_index *fdb_by_dp_key,
+                     struct aging_context *ctx)
+{
+    if (!ctx->threshold) {
+        return;
+    }
+
+    struct sbrec_fdb *fdb_index_row = sbrec_fdb_index_init_row(fdb_by_dp_key);
+    sbrec_fdb_index_set_dp_key(fdb_index_row, dp->tunnel_key);
+
+    const struct sbrec_fdb *fdb;
+    SBREC_FDB_FOR_EACH_EQUAL (fdb, fdb_index_row, fdb_by_dp_key) {
+        if (aging_context_handle_timestamp(ctx, fdb->timestamp)) {
+            sbrec_fdb_delete(fdb);
+            if (aging_context_is_at_limit(ctx)) {
+                break;
+            }
+        }
+    }
+    sbrec_fdb_index_destroy_row(fdb_index_row);
+}
+
+void
+en_fdb_aging_run(struct engine_node *node, void *data OVS_UNUSED)
+{
+    const struct engine_context *eng_ctx = engine_get_context();
+    struct northd_data *northd_data = engine_get_input_data("northd", node);
+    struct aging_waker *waker = engine_get_input_data("fdb_aging_waker", node);
+
+    if (!eng_ctx->ovnsb_idl_txn ||
+        !northd_data->features.fdb_timestamp ||
+        time_msec() < waker->next_wake_msec) {
+        return;
+    }
+
+    uint32_t limit = get_removal_limit(node, "fdb_removal_limit");
+    struct aging_context ctx = aging_context_init(limit);
+
+    struct ovsdb_idl_index *sbrec_fdb_by_dp_key =
+            engine_ovsdb_node_get_index(engine_get_input("SB_fdb", node),
+                                        "fdb_by_dp_key");
+
+    struct ovn_datapath *od;
+    HMAP_FOR_EACH (od, key_node, &northd_data->ls_datapaths.datapaths) {
+        ovs_assert(od->nbs);
+
+        if (!od->sb) {
+            continue;
+        }
+
+        uint64_t threshold =
+                smap_get_uint(&od->nbs->other_config, "fdb_age_threshold", 0);
+        aging_context_set_threshold(&ctx, threshold * 1000);
+
+        fdb_run_for_datapath(od->sb, sbrec_fdb_by_dp_key, &ctx);
+        if (aging_context_is_at_limit(&ctx)) {
+            /* Schedule the next run after specified delay. */
+            ctx.next_wake_ms = AGING_BULK_REMOVAL_DELAY_MSEC;
+            break;
+        }
+    }
+
+    aging_waker_schedule_next_wake(waker, ctx.next_wake_ms);
+
+    engine_set_node_state(node, EN_UPDATED);
+}
+
+void *
+en_fdb_aging_init(struct engine_node *node OVS_UNUSED,
+                  struct engine_arg *arg OVS_UNUSED)
+{
+    return NULL;
+}
+
+void
+en_fdb_aging_cleanup(void *data OVS_UNUSED)
+{
+}
+
+/* The waker node is an input node, but the data about when to wake up
+ * the aging node are populated by the aging node.
+ * The reason being that engine periodically runs input nodes to check
+ * if we there are updates, so it could process the other nodes, however
+ * the waker cannot be dependent on other node because it wouldn't be
+ * input node anymore. */
+void
+en_fdb_aging_waker_run(struct engine_node *node, void *data)
+{
+    struct aging_waker *waker = data;
+
+    engine_set_node_state(node, EN_UNCHANGED);
+
+    if (!waker->should_schedule) {
+        return;
+    }
+
+    if (time_msec() >= waker->next_wake_msec) {
+        waker->should_schedule = false;
+        engine_set_node_state(node, EN_UPDATED);
+        return;
+    }
+
+    poll_timer_wait_until(waker->next_wake_msec);
+}
+
+void *
+en_fdb_aging_waker_init(struct engine_node *node OVS_UNUSED,
+                                struct engine_arg *arg OVS_UNUSED)
+{
+    struct aging_waker *waker = xmalloc(sizeof *waker);
+
+    waker->should_schedule = false;
+    waker->next_wake_msec = 0;
+
+    return waker;
+}
+
+void
+en_fdb_aging_waker_cleanup(void *data OVS_UNUSED)
+{
+}
diff --git a/northd/aging.h b/northd/aging.h
index 129397d7b..650990e24 100644
--- a/northd/aging.h
+++ b/northd/aging.h
@@ -30,4 +30,15 @@ void *en_mac_binding_aging_waker_init(struct engine_node 
*node,
                                       struct engine_arg *arg);
 void en_mac_binding_aging_waker_cleanup(void *data);
 
+/* The FDB aging node functions. */
+void en_fdb_aging_run(struct engine_node *node, void *data);
+void *en_fdb_aging_init(struct engine_node *node, struct engine_arg *arg);
+void en_fdb_aging_cleanup(void *data);
+
+/* The FDB aging waker node functions. */
+void en_fdb_aging_waker_run(struct engine_node *node, void *data);
+void *en_fdb_aging_waker_init(struct engine_node *node,
+                              struct engine_arg *arg);
+void en_fdb_aging_waker_cleanup(void *data);
+
 #endif /* northd/aging.h */
diff --git a/northd/en-northd-output.c b/northd/en-northd-output.c
index df7ea2b31..98098d974 100644
--- a/northd/en-northd-output.c
+++ b/northd/en-northd-output.c
@@ -64,3 +64,11 @@ northd_output_mac_binding_aging_handler(struct engine_node 
*node,
     engine_set_node_state(node, EN_UPDATED);
     return true;
 }
+
+bool
+northd_output_fdb_aging_handler(struct engine_node *node,
+                                void *data OVS_UNUSED)
+{
+    engine_set_node_state(node, EN_UPDATED);
+    return true;
+}
diff --git a/northd/en-northd-output.h b/northd/en-northd-output.h
index 7165d4b55..5f577b89c 100644
--- a/northd/en-northd-output.h
+++ b/northd/en-northd-output.h
@@ -15,5 +15,7 @@ bool northd_output_lflow_handler(struct engine_node *node,
                                  void *data OVS_UNUSED);
 bool northd_output_mac_binding_aging_handler(struct engine_node *node,
                                              void *data OVS_UNUSED);
+bool northd_output_fdb_aging_handler(struct engine_node *node,
+                                     void *data OVS_UNUSED);
 
 #endif
diff --git a/northd/inc-proc-northd.c b/northd/inc-proc-northd.c
index 63b9650ec..522f4c3ba 100644
--- a/northd/inc-proc-northd.c
+++ b/northd/inc-proc-northd.c
@@ -135,6 +135,8 @@ static ENGINE_NODE(mac_binding_aging_waker, 
"mac_binding_aging_waker");
 static ENGINE_NODE(northd_output, "northd_output");
 static ENGINE_NODE(sync_to_sb, "sync_to_sb");
 static ENGINE_NODE(sync_to_sb_addr_set, "sync_to_sb_addr_set");
+static ENGINE_NODE(fdb_aging, "fdb_aging");
+static ENGINE_NODE(fdb_aging_waker, "fdb_aging_waker");
 
 void inc_proc_northd_init(struct ovsdb_idl_loop *nb,
                           struct ovsdb_idl_loop *sb)
@@ -175,6 +177,11 @@ void inc_proc_northd_init(struct ovsdb_idl_loop *nb,
     engine_add_input(&en_mac_binding_aging, &en_northd, NULL);
     engine_add_input(&en_mac_binding_aging, &en_mac_binding_aging_waker, NULL);
 
+    engine_add_input(&en_fdb_aging, &en_nb_nb_global, NULL);
+    engine_add_input(&en_fdb_aging, &en_sb_fdb, NULL);
+    engine_add_input(&en_fdb_aging, &en_northd, NULL);
+    engine_add_input(&en_fdb_aging, &en_fdb_aging_waker, NULL);
+
     engine_add_input(&en_lflow, &en_nb_bfd, NULL);
     engine_add_input(&en_lflow, &en_sb_bfd, NULL);
     engine_add_input(&en_lflow, &en_sb_logical_flow, NULL);
@@ -200,6 +207,8 @@ void inc_proc_northd_init(struct ovsdb_idl_loop *nb,
                      northd_output_lflow_handler);
     engine_add_input(&en_northd_output, &en_mac_binding_aging,
                      northd_output_mac_binding_aging_handler);
+    engine_add_input(&en_northd_output, &en_fdb_aging,
+                     northd_output_fdb_aging_handler);
 
     struct engine_arg engine_arg = {
         .nb_idl = nb->idl,
@@ -220,6 +229,8 @@ void inc_proc_northd_init(struct ovsdb_idl_loop *nb,
         = static_mac_binding_index_create(sb->idl);
     struct ovsdb_idl_index *sbrec_mac_binding_by_datapath
         = mac_binding_by_datapath_index_create(sb->idl);
+    struct ovsdb_idl_index *fdb_by_dp_key =
+        ovsdb_idl_index_create1(sb->idl, &sbrec_fdb_col_dp_key);
 
     engine_init(&en_northd_output, &engine_arg);
 
@@ -244,6 +255,9 @@ void inc_proc_northd_init(struct ovsdb_idl_loop *nb,
     engine_ovsdb_node_add_index(&en_sb_mac_binding,
                                 "sbrec_mac_binding_by_datapath",
                                 sbrec_mac_binding_by_datapath);
+    engine_ovsdb_node_add_index(&en_sb_fdb,
+                                "fdb_by_dp_key",
+                                fdb_by_dp_key);
 
     struct ovsdb_idl_index *sbrec_address_set_by_name
         = ovsdb_idl_index_create1(sb->idl, &sbrec_address_set_col_name);
diff --git a/ovn-nb.xml b/ovn-nb.xml
index d8114aa48..0c02db002 100644
--- a/ovn-nb.xml
+++ b/ovn-nb.xml
@@ -170,6 +170,14 @@
         5 s.
       </column>
 
+      <column name="options" key="fdb_removal_limit"
+              type='{"type": "integer", "minInteger": 0, "maxInteger": 
4294967295}'>
+        FDB aging bulk removal limit. This limits how many rows
+        can expire in a single transaction. Default value is 0 which
+        is unlimited. When we hit the limit next batch removal is delayed by
+        5 s.
+      </column>
+
       <column name="options" key="controller_event" type='{"type": "boolean"}'>
         Value set by the CMS to enable/disable ovn-controller event reporting.
         Traffic into OVS can raise a 'controller' event that results in a
@@ -663,6 +671,13 @@
         Value used to request to assign L2 address only if neither subnet
         nor ipv6_prefix are specified
       </column>
+
+      <column name="other_config" key="fdb_age_threshold"
+              type='{"type": "integer", "minInteger": 0, "maxInteger": 
4294967295}'>
+        FDB aging <code>threshold</code> value in seconds. FDB exceeding
+        this timeout will be automatically removed. The value defaults
+        to 0, which means disabled.
+      </column>
     </group>
 
     <group title="IP Multicast Snooping Options">
diff --git a/tests/ovn.at b/tests/ovn.at
index 2ef370c26..f5754067f 100644
--- a/tests/ovn.at
+++ b/tests/ovn.at
@@ -35623,3 +35623,68 @@ check test "$current_id2" = "$prev_id2"
 OVN_CLEANUP([hv1])
 AT_CLEANUP
 ])
+
+OVN_FOR_EACH_NORTHD([
+AT_SETUP([FDB aging])
+ovn_start
+
+net_add n1
+
+check ovn-nbctl ls-add ls0
+
+check ovn-nbctl lsp-add ls0 ln_port -- \
+      lsp-set-addresses ln_port unknown -- \
+      lsp-set-type ln_port localnet -- \
+      lsp-set-options ln_port network_name=physnet1 -- \
+      set logical_switch_port ln_port options:localnet_learn_fdb=true
+
+check ovn-nbctl lsp-add ls0 vif1 -- \
+      lsp-set-addresses vif1 "00:00:00:00:10:10 192.168.10.10"
+
+sim_add hv1
+as hv1
+ovs-vsctl add-br br-underlay
+ovn_attach n1 br-underlay 192.168.0.1
+ovs-vsctl add-br br-phys
+ovs-vsctl -- add-port br-int vif1 -- \
+    set interface vif1 external-ids:iface-id=vif1 \
+    options:tx_pcap=hv1/vif1-tx.pcap \
+    options:rxq_pcap=hv1/vif1-rx.pcap \
+    ofport-request=1
+ovs-vsctl -- add-port br-phys ext0 -- \
+    set interface ext0 \
+    options:tx_pcap=hv1/ext0-tx.pcap \
+    options:rxq_pcap=hv1/ext0-rx.pcap \
+    ofport-request=2
+ovs-vsctl set open . external_ids:ovn-bridge-mappings=physnet1:br-phys
+
+OVN_POPULATE_ARP
+wait_for_ports_up
+check ovn-nbctl --wait=hv sync
+
+send_packet() {
+    packet=$(fmt_pkt "
+            Ether(dst='00:00:00:00:10:10', src='00:00:00:00:10:${1}') /
+            IP(src='192.168.10.${1}', dst='192.168.10.10') /
+            UDP(sport=1234, dport=1235)
+           ")
+    ovs-appctl netdev-dummy/receive ext0 $packet
+}
+
+# Send packet to create FDB row
+send_packet 20
+wait_row_count fdb 1 mac='"00:00:00:00:10:20"'
+
+# Set the FDB aging threshold
+check ovn-nbctl set logical_switch ls0 other_config:fdb_age_threshold=1
+AT_CHECK([fetch_column nb:logical_switch other_config | grep -q 
fdb_age_threshold=1])
+check ovn-nbctl --wait=sb sync
+
+# Set the timeout for OVS_WAIT* functions to 5 seconds
+OVS_CTL_TIMEOUT=5
+# Check if the records are removed after some inactivity
+wait_row_count fdb 0 mac='"00:00:00:00:10:20"'
+
+OVN_CLEANUP([hv1])
+AT_CLEANUP
+])
-- 
2.40.1

_______________________________________________
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev

Reply via email to