Fallback setting can be configured at Network Function Group level.
Following values are supported:
  - fail-close: All traffic that has to be redirected to NF
                gets dropped when no active NFs are available.
                This is the default setting.
  - fail-open: All traffic that has to be redirected to NF
               is allowed when no active NFs are available.

Signed-off-by: Naveen Yerramneni <[email protected]>
Acked-by: Sragdhara Datta Chaudhuri <[email protected]>
Acked-by: Aditya Mehakare <[email protected]>
---
v1:
- First patch
 
v2:
- Rebase with latest main

v3:
- Fix format specifier issue

v4:
- Added Acked-by tags Sragdhara, Aditya
---
 NEWS                |   1 +
 northd/northd.c     |  69 +++++++++++++++++++++++++-----
 ovn-nb.ovsschema    |   8 +++-
 ovn-nb.xml          |  27 ++++++++++++
 tests/ovn-northd.at |  44 +++++++++++++++----
 tests/system-ovn.at | 102 +++++++++++++++++++++++++++-----------------
 6 files changed, 192 insertions(+), 59 deletions(-)

diff --git a/NEWS b/NEWS
index 754934b6b..44e2011a8 100644
--- a/NEWS
+++ b/NEWS
@@ -63,6 +63,7 @@ Post v25.09.0
   - Add a new experimental service - ovn-br-controller to program and
     manage OVS bridges (not managed by ovn-controller) using OVN logical flows.
     For more details see man ovn-br(5).
+   - Add fallback support for Network Function.
 
 OVN v25.09.0 - xxx xx xxxx
 --------------------------
diff --git a/northd/northd.c b/northd/northd.c
index cdf12ec86..73077710f 100644
--- a/northd/northd.c
+++ b/northd/northd.c
@@ -18161,6 +18161,27 @@ build_lswitch_stateful_nf(struct ovn_port *op,
                   ds_cstr(match), ds_cstr(actions), lflow_ref);
 }
 
+static const char*
+network_function_group_get_fallback(
+    const struct nbrec_network_function_group *nfg)
+{
+    if (nfg->fallback) {
+        return nfg->fallback;
+    }
+    return "fail-close";
+}
+
+static bool
+network_function_group_is_fallback_fail_open(
+    const struct nbrec_network_function_group *nfg)
+{
+    const char *fallback = network_function_group_get_fallback(nfg);
+    if (!strcasecmp(fallback, "fail-open")) {
+        return true;
+    }
+    return false;
+}
+
 static struct nbrec_network_function *
 nf_get_active(const struct nbrec_network_function_group *nfg)
 {
@@ -18237,21 +18258,23 @@ network_function_update_active(const struct 
nbrec_network_function_group *nfg,
             }
         }
     } else {
-        /* No healthy NFs, keep nf_active_prev if set, else select first one */
-        nf_active = nf_active_prev ? nf_active_prev : nfg->network_function[0];
+        /* No healthy NFs, clear nf_active to apply fallback */
+        nf_active = NULL;
         static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1);
-        VLOG_WARN_RL(&rl, "NetworkFunction: No healthy network_function found "
-                     "in network_function_group %s, "
-                     "selected network_function %s as active", nfg->name,
-                     nf_active->name);
+        VLOG_WARN_RL(&rl, "NetworkFunction: No healthy network_function "
+                     "found in network_function_group %s, "
+                     "fallback to %s", nfg->name,
+                     network_function_group_get_fallback(nfg));
     }
     free(healthy_nfs);
 
     if (nf_active_prev != nf_active) {
-        static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1);
-        VLOG_INFO_RL(&rl, "NetworkFunction: Update active network_function %s "
-                     "in network_function_group %s",
-                     nf_active->name, nfg->name);
+        if (nf_active) {
+            static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1);
+            VLOG_INFO_RL(&rl, "NetworkFunction: Update active network_function"
+                         " %s in network_function_group %s",
+                         nf_active->name, nfg->name);
+        }
         nbrec_network_function_group_set_network_function_active(nfg,
                                                                  nf_active);
     }
@@ -18273,6 +18296,23 @@ static void build_network_function_active(
     }
 }
 
+static void
+network_function_configure_fail_open_flows(struct lflow_table *lflows,
+              const struct ovn_datapath *od, struct lflow_ref *lflow_ref,
+              uint64_t nfg_id)
+{
+    struct ds match = DS_EMPTY_INITIALIZER;
+    ds_put_format(&match,
+                  REG_NF_GROUP_ID " == %"PRIu8" || "
+                  "(ct.trk && ct_label.nf_group_id == %"PRIu8")",
+                  (uint8_t) nfg_id, (uint8_t) nfg_id);
+    ovn_lflow_add(lflows, od, S_SWITCH_IN_NF, 10,
+        ds_cstr(&match), "next;", lflow_ref);
+    ovn_lflow_add(lflows, od, S_SWITCH_OUT_NF, 10,
+        ds_cstr(&match), "next;", lflow_ref);
+    ds_destroy(&match);
+}
+
 static void
 consider_network_function(struct lflow_table *lflows,
                           const struct ovn_datapath *od,
@@ -18283,6 +18323,15 @@ consider_network_function(struct lflow_table *lflows,
     struct ds action = DS_EMPTY_INITIALIZER;
     static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 1);
 
+    /* If NFG is in fail-open mode then, configure flows to with higher
+     * priority than default drop rule to allow the traffic when there is no
+     * active NF avaialble.
+     */
+    if (network_function_group_is_fallback_fail_open(nfg)) {
+        network_function_configure_fail_open_flows(lflows, od, lflow_ref,
+                                                   nfg->id);
+    }
+
     /* Currently we support only one active port-pair in a group.
      * If there are multiple active pairs, take the first one.
      * Load balancing would be added in future. */
diff --git a/ovn-nb.ovsschema b/ovn-nb.ovsschema
index cbb4f98e7..8c2c1d861 100644
--- a/ovn-nb.ovsschema
+++ b/ovn-nb.ovsschema
@@ -1,7 +1,7 @@
 {
     "name": "OVN_Northbound",
-    "version": "7.14.0",
-    "cksum": "3428479461 43444",
+    "version": "7.15.0",
+    "cksum": "4060410729 43708",
     "tables": {
         "NB_Global": {
             "columns": {
@@ -221,6 +221,10 @@
         "Network_Function_Group": {
             "columns": {
                 "name": {"type": "string"},
+                "fallback": {"type": {"key": {"type": "string",
+                                      "enum": ["set", ["fail-open",
+                                                       "fail-close"]]},
+                                      "min": 0, "max": 1}},
                 "network_function": {"type":
                                   {"key": {"type": "uuid",
                                            "refTable": "Network_Function",
diff --git a/ovn-nb.xml b/ovn-nb.xml
index b5fe44e53..1c04a1107 100644
--- a/ovn-nb.xml
+++ b/ovn-nb.xml
@@ -6261,6 +6261,33 @@ or
       <code>Network_Function_Group</code>.
     </column>
 
+    <column name="fallback">
+      Fallback setting when no active network functions are available.
+      <p>
+        Supports following fallback mechanisms.
+        If not specified, fail-close will be applied when no active Network
+        Functions are available.
+      </p>
+
+      <dl>
+        <dt><code>fail-open</code></dt>
+        <dd>
+          <p>
+            Traffic bypasses Network Function and gets allowed when there is
+            no active Network Function available.
+          </p>
+        </dd>
+
+        <dt><code>fail-close</code></dt>
+        <dd>
+          <p>
+           Traffic gets dropped when there is no active Network Function
+           available.
+          </p>
+        </dd>
+      </dl>
+    </column>
+
     <column name="network_function">
       A list of network functions which belong to this group.
     </column>
diff --git a/tests/ovn-northd.at b/tests/ovn-northd.at
index 448bc66ae..eb7b21ef2 100644
--- a/tests/ovn-northd.at
+++ b/tests/ovn-northd.at
@@ -18000,6 +18000,8 @@ OVN_FOR_EACH_NORTHD_NO_HV([
 AT_SETUP([Check network function])
 ovn_start
 
+AS_BOX([Create a NF and add it to a from-lport ACL])
+
 # Create a NF and add it to a from-lport ACL.
 check ovn-nbctl ls-add sw0
 check ovn-nbctl lsp-add sw0 sw0-nf-p1
@@ -18020,6 +18022,11 @@ check ovn-nbctl lsp-add sw0 sw0-p3 -- 
lsp-set-addresses sw0-p3 "00:00:00:00:00:0
 check ovn-nbctl pg-add pg0 sw0-p1
 check ovn-nbctl acl-add pg0 from-lport 1002 "inport == @pg0 && ip4.dst == 
10.0.0.3" allow-related nfg0
 
+# Add hypervisor and bind NF ports
+check ovn-sbctl chassis-add hv1 geneve 127.0.0.1
+check ovn-sbctl lsp-bind sw0-nf-p1 hv1
+check ovn-sbctl lsp-bind sw0-nf-p2 hv1
+
 check ovn-nbctl --wait=sb sync
 
 ovn-sbctl dump-flows sw0 > sw0flows
@@ -18107,6 +18114,8 @@ ct_next(ct_state=new|trk) {
 };
 ])
 
+AS_BOX([Create another NF and add it to a to-lport ACL.])
+
 # Create another NF and add it to a to-lport ACL.
 check ovn-nbctl lsp-add sw0 sw0-nf-p3
 check ovn-nbctl lsp-add sw0 sw0-nf-p4
@@ -18119,6 +18128,8 @@ check ovn-nbctl set logical_switch_port sw0-nf-p4 \
 check ovn-nbctl nf-add nf1 sw0-nf-p3 sw0-nf-p4
 check ovn-nbctl nfg-add nfg1 2 inline nf1
 check ovn-nbctl acl-add pg0 to-lport 1003 "outport == @pg0 && ip4.src == 
10.0.0.4" allow-related nfg1
+check ovn-sbctl lsp-bind sw0-nf-p3 hv1
+check ovn-sbctl lsp-bind sw0-nf-p4 hv1
 check ovn-nbctl --wait=sb sync
 
 ovn-sbctl dump-flows sw0 > sw0flows
@@ -18412,7 +18423,7 @@ AT_CHECK(
 ])
 
 # Set the service monitor for nf0 to offline and nf1 to offline
-# and verify nf1 is still the active.
+# and verify fail-close is applied.
 
 AS_BOX([Set the service monitor for nf0 to offline and nf1 to offline])
 check ovn-sbctl set service_monitor $nfsw-p2 status=offline
@@ -18428,18 +18439,33 @@ AT_CHECK(
   [grep -E 'ls_(in|out)_network_function' lflows | ovn_strip_lflows | sort], 
[0], [dnl
   table=??(ls_in_network_function), priority=0    , match=(1), action=(next;)
   table=??(ls_in_network_function), priority=1    , match=(reg8[[21]] == 1), 
action=(drop;)
-  table=??(ls_in_network_function), priority=100  , match=(inport == 
"child-3"), action=(reg5[[16..31]] = ct_label.tun_if_id; next;)
-  table=??(ls_in_network_function), priority=100  , match=(inport == 
"child-4"), action=(reg5[[16..31]] = ct_label.tun_if_id; next;)
   table=??(ls_in_network_function), priority=100  , match=(reg8[[21]] == 1 && 
eth.mcast), action=(next;)
-  table=??(ls_in_network_function), priority=99   , match=(reg8[[21]] == 1 && 
reg8[[22]] == 0 && ct_label.nf_group_id == 1), action=(outport = "child-3"; 
output;)
-  table=??(ls_in_network_function), priority=99   , match=(reg8[[21]] == 1 && 
reg8[[22]] == 1 && reg0[[22..29]] == 1), action=(outport = "child-3"; output;)
   table=??(ls_out_network_function), priority=0    , match=(1), action=(next;)
   table=??(ls_out_network_function), priority=1    , match=(reg8[[21]] == 1), 
action=(drop;)
-  table=??(ls_out_network_function), priority=100  , match=(outport == 
"child-3"), action=(next;)
-  table=??(ls_out_network_function), priority=100  , match=(outport == 
"child-4"), action=(next;)
   table=??(ls_out_network_function), priority=100  , match=(reg8[[21]] == 1 && 
eth.mcast), action=(next;)
-  table=??(ls_out_network_function), priority=99   , match=(reg8[[21]] == 1 && 
reg8[[22]] == 0 && ct_label.nf_group_id == 1), action=(outport = "child-4"; 
reg8[[23]] = 1; next(pipeline=ingress, table=??);)
-  table=??(ls_out_network_function), priority=99   , match=(reg8[[21]] == 1 && 
reg8[[22]] == 1 && reg0[[22..29]] == 1), action=(outport = "child-4"; 
reg8[[23]] = 1; next(pipeline=ingress, table=??);)
+])
+
+AS_BOX([Configure NFG fallback method to fail-open])
+
+# Configure NFG fallback method to fail-open
+nfg_uuid=$(fetch_column nb:network_function_group _uuid name=nfg0)
+check ovn-nbctl set network_function_group $nfg_uuid fallback=fail-open
+
+check ovn-nbctl --wait=sb sync
+
+ovn-sbctl dump-flows $sw > lflows
+AT_CAPTURE_FILE([lflows])
+
+AT_CHECK(
+  [grep -E 'ls_(in|out)_network_function' lflows | ovn_strip_lflows | sort], 
[0], [dnl
+  table=??(ls_in_network_function), priority=0    , match=(1), action=(next;)
+  table=??(ls_in_network_function), priority=1    , match=(reg8[[21]] == 1), 
action=(drop;)
+  table=??(ls_in_network_function), priority=10   , match=(reg0[[22..29]] == 1 
|| (ct.trk && ct_label.nf_group_id == 1)), action=(next;)
+  table=??(ls_in_network_function), priority=100  , match=(reg8[[21]] == 1 && 
eth.mcast), action=(next;)
+  table=??(ls_out_network_function), priority=0    , match=(1), action=(next;)
+  table=??(ls_out_network_function), priority=1    , match=(reg8[[21]] == 1), 
action=(drop;)
+  table=??(ls_out_network_function), priority=10   , match=(reg0[[22..29]] == 
1 || (ct.trk && ct_label.nf_group_id == 1)), action=(next;)
+  table=??(ls_out_network_function), priority=100  , match=(reg8[[21]] == 1 && 
eth.mcast), action=(next;)
 ])
 
 AT_CLEANUP
diff --git a/tests/system-ovn.at b/tests/system-ovn.at
index 5b34e621f..312fa8e9e 100644
--- a/tests/system-ovn.at
+++ b/tests/system-ovn.at
@@ -19020,6 +19020,7 @@ AS_BOX([Test-1: Single NF without health check])
 check ovn-nbctl nf-add nf0 nf-p1 nf-p2
 nf0_uuid=$(fetch_column nb:network_function _uuid name=nf0)
 check ovn-nbctl nfg-add nfg0 1 inline nf0
+nfg_uuid=$(fetch_column nb:network_function_group _uuid name=nfg0)
 
 check ovn-nbctl pg-add pg0 server
 check ovn-nbctl acl-add pg0 from-lport 1001 "inport == @pg0 && ip4.dst == 
192.168.1.10" allow-related nfg0
@@ -19033,7 +19034,40 @@ NS_CHECK_EXEC([nf], [ip link set dev nf-p1 master br0])
 NS_CHECK_EXEC([nf], [ip link set dev nf-p2 master br0])
 NS_CHECK_EXEC([nf], [ip link set dev br0 up])
 
+start_tcp_server_client() {
+    client_ns=$1
+    server_ns=$2
+    sip=$3
+    port=${4:-10000}
+    fifo_path=${5:-/tmp/nffifo}
+    wait_for_server=${6:-yes}
+
+    # Start a TCP server
+    : > output.txt
+    NETNS_DAEMONIZE($server_ns, [server.py -i $sip -p $port], [server.pid])
+    on_exit 'kill $(cat server.pid)'
+
+    # Ensure TCP server is ready for connections
+    if [[ "$wait_for_server" == "yes" ]]; then
+        OVS_WAIT_FOR_OUTPUT([cat output.txt], [0], [dnl
+Server Ready
+])
+    fi
+
+    # Make a FIFO and send its output to a server
+    if [[ ! -p "$fifo_path" ]]; then
+        mkfifo "$fifo_path"
+        on_exit "rm -rf $fifo_path"
+    fi
+
+    NETNS_DAEMONIZE($client_ns, [client.py -f "$fifo_path" -i $sip -p $port], 
[client.pid])
+    on_exit 'kill $(cat client.pid)'
+}
+
 validate_traffic() {
+    # Empty the file
+    : > output.txt
+
     send_data=$1; recv_data=$2; pkt_cnt=$3;
     AT_CHECK([printf "$send_data\n" > /tmp/nffifo], [0], [dnl
 ])
@@ -19047,8 +19081,6 @@ $recv_data
 ])
     fi
 
-    : > output.txt
-
     OVS_WAIT_UNTIL([
         total_pkts=$(cat pkt.pcap | wc -l)
         test ${total_pkts} -ge ${pkt_cnt}
@@ -19058,22 +19090,7 @@ $recv_data
 validate_single_nf_no_health_check() {
     client_ns=$1; server_ns=$2; sip=$3; direction=$4
 
-    # Start a TCP server
-    NETNS_DAEMONIZE($server_ns, [server.py -i $sip -p 10000], [server.pid])
-    on_exit 'kill $(cat server.pid)'
-
-    # Ensure TCP server is ready for connections
-    OVS_WAIT_FOR_OUTPUT([cat output.txt], [0], [dnl
-Server Ready
-])
-    : > output.txt
-
-    # Make a FIFO and send its output to a server
-    mkfifo /tmp/nffifo
-    on_exit 'rm -rf /tmp/nffifo'
-
-    NETNS_DAEMONIZE($client_ns, [client.py -f "/tmp/nffifo" -i $sip -p 10000], 
[client.pid])
-    on_exit 'kill $(cat client.pid)'
+    start_tcp_server_client $client_ns $server_ns $sip
 
     AS_BOX([$direction: Verify traffic forwarding through single NF without 
health check])
 
@@ -19083,7 +19100,6 @@ Server Ready
     on_exit 'kill $(pidof tcpdump)'
 
     # Verify no service monitors exist when health check is not configured
-    #AT_CHECK([ovn-sbctl list service_monitor | grep -v "^$"], [1])
     AT_CHECK([ovn-sbctl list service_monitor | wc -l], [0], [dnl
 0
 ])
@@ -19135,26 +19151,15 @@ check ovn-nbctl --wait=hv sync
 validate_nf_with_traffic() {
     client_ns=$1; server_ns=$2; sip=$3; direction=$4
 
-    # Start a TCP server
-    NETNS_DAEMONIZE($server_ns, [server.py -i $sip -p 10000], [server.pid])
-    on_exit 'kill $(cat server.pid)'
-
-    # Ensure TCP server is ready for connections
-    OVS_WAIT_FOR_OUTPUT([cat output.txt], [0], [dnl
-Server Ready
-])
-    : > output.txt
-
-    # Make a FIFO and send its output to a server
-    mkfifo /tmp/nffifo
-    on_exit 'rm -rf /tmp/nffifo'
-
-    NETNS_DAEMONIZE($client_ns, [client.py -f "/tmp/nffifo" -i $sip -p 10000], 
[client.pid])
-    on_exit 'kill $(cat client.pid)'
+    start_tcp_server_client $client_ns $server_ns $sip
 
     AS_BOX([$direction: Verify traffic forwarding through NF when nf0 is 
active])
     NS_CHECK_EXEC([nf], [ip link set dev br0 up])
     NS_CHECK_EXEC([nf], [ip link set dev br1 down])
+    # set fallback to fail-close
+    check ovn-nbctl set network_function_group $nfg_uuid fallback=fail-close
+    check ovn-nbctl --wait=hv sync
+
 
     NS_CHECK_EXEC([nf], [tcpdump -l -nvv -i nf-p1 tcp > pkt.pcap 2>tcpdump_err 
&])
     OVS_WAIT_UNTIL([grep "listening" tcpdump_err])
@@ -19186,23 +19191,44 @@ Server Ready
 
     validate_traffic "test" "test" 2
 
-    AS_BOX([$direction: Verify traffic forwarding through NF when nf0 and nf1 
are down])
+    AS_BOX([$direction: Verify traffic forwarding through NF when nf0 and nf1 
are down "
+            "when fallback set to fail-close])
 
     kill $(pidof tcpdump)
     NS_CHECK_EXEC([nf], [tcpdump -l -nvv -i nf-p3 tcp > pkt.pcap 2>tcpdump_err 
&])
     OVS_WAIT_UNTIL([grep "listening" tcpdump_err])
     on_exit 'kill $(pidof tcpdump)'
 
-    # Bring nf0 down and nf1 up
+    # Bring nf1 down
     NS_CHECK_EXEC([nf], [ip link set dev br1 down])
     # sleep to allow service_monitor to detect the state
     sleep 5
 
-    ovn-sbctl dump-flows sw0 > lflows_nf1_active
+    ovn-sbctl dump-flows sw0 > lflows_both_down_fail_close
     ovn-sbctl list service_monitor
 
     validate_traffic "test" "" 0
 
+    AS_BOX(["$direction: Verify traffic forwarding through NF when nf0 and nf1 
are down "
+            "when fallback set to fail-open"])
+
+    # Restart the client before fail-open test
+    kill $(cat client.pid)
+    kill $(cat server.pid)
+
+    start_tcp_server_client $client_ns $server_ns $sip 10000 /tmp/nffifo no
+
+    check ovn-nbctl set network_function_group $nfg_uuid fallback=fail-open
+    check ovn-nbctl --wait=hv sync
+
+    kill $(cat tcpdump-nf.pid)
+    NETNS_START_TCPDUMP([nf], [-nvv -i nf-p3 tcp], [tcpdump-nf])
+
+    ovn-sbctl dump-flows sw0 > lflows_both_down_fail_open
+    ovn-sbctl list service_monitor
+
+    validate_traffic "test" "test" 0
+
     kill $(cat client.pid)
     kill $(cat server.pid)
     rm -f client.pid
-- 
2.43.5

_______________________________________________
dev mailing list
[email protected]
https://mail.openvswitch.org/mailman/listinfo/ovs-dev

Reply via email to