Thanks Naveen! I merged this to main.
On Mon, Nov 17, 2025 at 11:13 AM Naveen Yerramneni <[email protected]> wrote: > > Fallback setting can be configured at Network Function Group level. > Following values are supported: > - fail-close: All traffic that has to be redirected to NF > gets dropped when no active NFs are available. > This is the default setting. > - fail-open: All traffic that has to be redirected to NF > is allowed when no active NFs are available. > > Signed-off-by: Naveen Yerramneni <[email protected]> > Acked-by: Sragdhara Datta Chaudhuri <[email protected]> > Acked-by: Aditya Mehakare <[email protected]> > --- > v1: > - First patch > > v2: > - Rebase with latest main > > v3: > - Fix format specifier issue > > v4: > - Added Acked-by tags Sragdhara, Aditya > --- > NEWS | 1 + > northd/northd.c | 69 +++++++++++++++++++++++++----- > ovn-nb.ovsschema | 8 +++- > ovn-nb.xml | 27 ++++++++++++ > tests/ovn-northd.at | 44 +++++++++++++++---- > tests/system-ovn.at | 102 +++++++++++++++++++++++++++----------------- > 6 files changed, 192 insertions(+), 59 deletions(-) > > diff --git a/NEWS b/NEWS > index 754934b6b..44e2011a8 100644 > --- a/NEWS > +++ b/NEWS > @@ -63,6 +63,7 @@ Post v25.09.0 > - Add a new experimental service - ovn-br-controller to program and > manage OVS bridges (not managed by ovn-controller) using OVN logical > flows. > For more details see man ovn-br(5). > + - Add fallback support for Network Function. > > OVN v25.09.0 - xxx xx xxxx > -------------------------- > diff --git a/northd/northd.c b/northd/northd.c > index cdf12ec86..73077710f 100644 > --- a/northd/northd.c > +++ b/northd/northd.c > @@ -18161,6 +18161,27 @@ build_lswitch_stateful_nf(struct ovn_port *op, > ds_cstr(match), ds_cstr(actions), lflow_ref); > } > > +static const char* > +network_function_group_get_fallback( > + const struct nbrec_network_function_group *nfg) > +{ > + if (nfg->fallback) { > + return nfg->fallback; > + } > + return "fail-close"; > +} > + > +static bool > +network_function_group_is_fallback_fail_open( > + const struct nbrec_network_function_group *nfg) > +{ > + const char *fallback = network_function_group_get_fallback(nfg); > + if (!strcasecmp(fallback, "fail-open")) { > + return true; > + } > + return false; > +} > + > static struct nbrec_network_function * > nf_get_active(const struct nbrec_network_function_group *nfg) > { > @@ -18237,21 +18258,23 @@ network_function_update_active(const struct > nbrec_network_function_group *nfg, > } > } > } else { > - /* No healthy NFs, keep nf_active_prev if set, else select first one > */ > - nf_active = nf_active_prev ? nf_active_prev : > nfg->network_function[0]; > + /* No healthy NFs, clear nf_active to apply fallback */ > + nf_active = NULL; > static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1); > - VLOG_WARN_RL(&rl, "NetworkFunction: No healthy network_function > found " > - "in network_function_group %s, " > - "selected network_function %s as active", nfg->name, > - nf_active->name); > + VLOG_WARN_RL(&rl, "NetworkFunction: No healthy network_function " > + "found in network_function_group %s, " > + "fallback to %s", nfg->name, > + network_function_group_get_fallback(nfg)); > } > free(healthy_nfs); > > if (nf_active_prev != nf_active) { > - static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1); > - VLOG_INFO_RL(&rl, "NetworkFunction: Update active network_function > %s " > - "in network_function_group %s", > - nf_active->name, nfg->name); > + if (nf_active) { > + static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1); > + VLOG_INFO_RL(&rl, "NetworkFunction: Update active > network_function" > + " %s in network_function_group %s", > + nf_active->name, nfg->name); > + } > nbrec_network_function_group_set_network_function_active(nfg, > nf_active); > } > @@ -18273,6 +18296,23 @@ static void build_network_function_active( > } > } > > +static void > +network_function_configure_fail_open_flows(struct lflow_table *lflows, > + const struct ovn_datapath *od, struct lflow_ref *lflow_ref, > + uint64_t nfg_id) > +{ > + struct ds match = DS_EMPTY_INITIALIZER; > + ds_put_format(&match, > + REG_NF_GROUP_ID " == %"PRIu8" || " > + "(ct.trk && ct_label.nf_group_id == %"PRIu8")", > + (uint8_t) nfg_id, (uint8_t) nfg_id); > + ovn_lflow_add(lflows, od, S_SWITCH_IN_NF, 10, > + ds_cstr(&match), "next;", lflow_ref); > + ovn_lflow_add(lflows, od, S_SWITCH_OUT_NF, 10, > + ds_cstr(&match), "next;", lflow_ref); > + ds_destroy(&match); > +} > + > static void > consider_network_function(struct lflow_table *lflows, > const struct ovn_datapath *od, > @@ -18283,6 +18323,15 @@ consider_network_function(struct lflow_table *lflows, > struct ds action = DS_EMPTY_INITIALIZER; > static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 1); > > + /* If NFG is in fail-open mode then, configure flows to with higher > + * priority than default drop rule to allow the traffic when there is no > + * active NF avaialble. > + */ > + if (network_function_group_is_fallback_fail_open(nfg)) { > + network_function_configure_fail_open_flows(lflows, od, lflow_ref, > + nfg->id); > + } > + > /* Currently we support only one active port-pair in a group. > * If there are multiple active pairs, take the first one. > * Load balancing would be added in future. */ > diff --git a/ovn-nb.ovsschema b/ovn-nb.ovsschema > index cbb4f98e7..8c2c1d861 100644 > --- a/ovn-nb.ovsschema > +++ b/ovn-nb.ovsschema > @@ -1,7 +1,7 @@ > { > "name": "OVN_Northbound", > - "version": "7.14.0", > - "cksum": "3428479461 43444", > + "version": "7.15.0", > + "cksum": "4060410729 43708", > "tables": { > "NB_Global": { > "columns": { > @@ -221,6 +221,10 @@ > "Network_Function_Group": { > "columns": { > "name": {"type": "string"}, > + "fallback": {"type": {"key": {"type": "string", > + "enum": ["set", ["fail-open", > + "fail-close"]]}, > + "min": 0, "max": 1}}, > "network_function": {"type": > {"key": {"type": "uuid", > "refTable": "Network_Function", > diff --git a/ovn-nb.xml b/ovn-nb.xml > index b5fe44e53..1c04a1107 100644 > --- a/ovn-nb.xml > +++ b/ovn-nb.xml > @@ -6261,6 +6261,33 @@ or > <code>Network_Function_Group</code>. > </column> > > + <column name="fallback"> > + Fallback setting when no active network functions are available. > + <p> > + Supports following fallback mechanisms. > + If not specified, fail-close will be applied when no active Network > + Functions are available. > + </p> > + > + <dl> > + <dt><code>fail-open</code></dt> > + <dd> > + <p> > + Traffic bypasses Network Function and gets allowed when there is > + no active Network Function available. > + </p> > + </dd> > + > + <dt><code>fail-close</code></dt> > + <dd> > + <p> > + Traffic gets dropped when there is no active Network Function > + available. > + </p> > + </dd> > + </dl> > + </column> > + > <column name="network_function"> > A list of network functions which belong to this group. > </column> > diff --git a/tests/ovn-northd.at b/tests/ovn-northd.at > index 448bc66ae..eb7b21ef2 100644 > --- a/tests/ovn-northd.at > +++ b/tests/ovn-northd.at > @@ -18000,6 +18000,8 @@ OVN_FOR_EACH_NORTHD_NO_HV([ > AT_SETUP([Check network function]) > ovn_start > > +AS_BOX([Create a NF and add it to a from-lport ACL]) > + > # Create a NF and add it to a from-lport ACL. > check ovn-nbctl ls-add sw0 > check ovn-nbctl lsp-add sw0 sw0-nf-p1 > @@ -18020,6 +18022,11 @@ check ovn-nbctl lsp-add sw0 sw0-p3 -- > lsp-set-addresses sw0-p3 "00:00:00:00:00:0 > check ovn-nbctl pg-add pg0 sw0-p1 > check ovn-nbctl acl-add pg0 from-lport 1002 "inport == @pg0 && ip4.dst == > 10.0.0.3" allow-related nfg0 > > +# Add hypervisor and bind NF ports > +check ovn-sbctl chassis-add hv1 geneve 127.0.0.1 > +check ovn-sbctl lsp-bind sw0-nf-p1 hv1 > +check ovn-sbctl lsp-bind sw0-nf-p2 hv1 > + > check ovn-nbctl --wait=sb sync > > ovn-sbctl dump-flows sw0 > sw0flows > @@ -18107,6 +18114,8 @@ ct_next(ct_state=new|trk) { > }; > ]) > > +AS_BOX([Create another NF and add it to a to-lport ACL.]) > + > # Create another NF and add it to a to-lport ACL. > check ovn-nbctl lsp-add sw0 sw0-nf-p3 > check ovn-nbctl lsp-add sw0 sw0-nf-p4 > @@ -18119,6 +18128,8 @@ check ovn-nbctl set logical_switch_port sw0-nf-p4 \ > check ovn-nbctl nf-add nf1 sw0-nf-p3 sw0-nf-p4 > check ovn-nbctl nfg-add nfg1 2 inline nf1 > check ovn-nbctl acl-add pg0 to-lport 1003 "outport == @pg0 && ip4.src == > 10.0.0.4" allow-related nfg1 > +check ovn-sbctl lsp-bind sw0-nf-p3 hv1 > +check ovn-sbctl lsp-bind sw0-nf-p4 hv1 > check ovn-nbctl --wait=sb sync > > ovn-sbctl dump-flows sw0 > sw0flows > @@ -18412,7 +18423,7 @@ AT_CHECK( > ]) > > # Set the service monitor for nf0 to offline and nf1 to offline > -# and verify nf1 is still the active. > +# and verify fail-close is applied. > > AS_BOX([Set the service monitor for nf0 to offline and nf1 to offline]) > check ovn-sbctl set service_monitor $nfsw-p2 status=offline > @@ -18428,18 +18439,33 @@ AT_CHECK( > [grep -E 'ls_(in|out)_network_function' lflows | ovn_strip_lflows | sort], > [0], [dnl > table=??(ls_in_network_function), priority=0 , match=(1), action=(next;) > table=??(ls_in_network_function), priority=1 , match=(reg8[[21]] == 1), > action=(drop;) > - table=??(ls_in_network_function), priority=100 , match=(inport == > "child-3"), action=(reg5[[16..31]] = ct_label.tun_if_id; next;) > - table=??(ls_in_network_function), priority=100 , match=(inport == > "child-4"), action=(reg5[[16..31]] = ct_label.tun_if_id; next;) > table=??(ls_in_network_function), priority=100 , match=(reg8[[21]] == 1 > && eth.mcast), action=(next;) > - table=??(ls_in_network_function), priority=99 , match=(reg8[[21]] == 1 > && reg8[[22]] == 0 && ct_label.nf_group_id == 1), action=(outport = > "child-3"; output;) > - table=??(ls_in_network_function), priority=99 , match=(reg8[[21]] == 1 > && reg8[[22]] == 1 && reg0[[22..29]] == 1), action=(outport = "child-3"; > output;) > table=??(ls_out_network_function), priority=0 , match=(1), > action=(next;) > table=??(ls_out_network_function), priority=1 , match=(reg8[[21]] == > 1), action=(drop;) > - table=??(ls_out_network_function), priority=100 , match=(outport == > "child-3"), action=(next;) > - table=??(ls_out_network_function), priority=100 , match=(outport == > "child-4"), action=(next;) > table=??(ls_out_network_function), priority=100 , match=(reg8[[21]] == 1 > && eth.mcast), action=(next;) > - table=??(ls_out_network_function), priority=99 , match=(reg8[[21]] == 1 > && reg8[[22]] == 0 && ct_label.nf_group_id == 1), action=(outport = > "child-4"; reg8[[23]] = 1; next(pipeline=ingress, table=??);) > - table=??(ls_out_network_function), priority=99 , match=(reg8[[21]] == 1 > && reg8[[22]] == 1 && reg0[[22..29]] == 1), action=(outport = "child-4"; > reg8[[23]] = 1; next(pipeline=ingress, table=??);) > +]) > + > +AS_BOX([Configure NFG fallback method to fail-open]) > + > +# Configure NFG fallback method to fail-open > +nfg_uuid=$(fetch_column nb:network_function_group _uuid name=nfg0) > +check ovn-nbctl set network_function_group $nfg_uuid fallback=fail-open > + > +check ovn-nbctl --wait=sb sync > + > +ovn-sbctl dump-flows $sw > lflows > +AT_CAPTURE_FILE([lflows]) > + > +AT_CHECK( > + [grep -E 'ls_(in|out)_network_function' lflows | ovn_strip_lflows | sort], > [0], [dnl > + table=??(ls_in_network_function), priority=0 , match=(1), action=(next;) > + table=??(ls_in_network_function), priority=1 , match=(reg8[[21]] == 1), > action=(drop;) > + table=??(ls_in_network_function), priority=10 , match=(reg0[[22..29]] == > 1 || (ct.trk && ct_label.nf_group_id == 1)), action=(next;) > + table=??(ls_in_network_function), priority=100 , match=(reg8[[21]] == 1 > && eth.mcast), action=(next;) > + table=??(ls_out_network_function), priority=0 , match=(1), > action=(next;) > + table=??(ls_out_network_function), priority=1 , match=(reg8[[21]] == > 1), action=(drop;) > + table=??(ls_out_network_function), priority=10 , match=(reg0[[22..29]] > == 1 || (ct.trk && ct_label.nf_group_id == 1)), action=(next;) > + table=??(ls_out_network_function), priority=100 , match=(reg8[[21]] == 1 > && eth.mcast), action=(next;) > ]) > > AT_CLEANUP > diff --git a/tests/system-ovn.at b/tests/system-ovn.at > index 5b34e621f..312fa8e9e 100644 > --- a/tests/system-ovn.at > +++ b/tests/system-ovn.at > @@ -19020,6 +19020,7 @@ AS_BOX([Test-1: Single NF without health check]) > check ovn-nbctl nf-add nf0 nf-p1 nf-p2 > nf0_uuid=$(fetch_column nb:network_function _uuid name=nf0) > check ovn-nbctl nfg-add nfg0 1 inline nf0 > +nfg_uuid=$(fetch_column nb:network_function_group _uuid name=nfg0) > > check ovn-nbctl pg-add pg0 server > check ovn-nbctl acl-add pg0 from-lport 1001 "inport == @pg0 && ip4.dst == > 192.168.1.10" allow-related nfg0 > @@ -19033,7 +19034,40 @@ NS_CHECK_EXEC([nf], [ip link set dev nf-p1 master > br0]) > NS_CHECK_EXEC([nf], [ip link set dev nf-p2 master br0]) > NS_CHECK_EXEC([nf], [ip link set dev br0 up]) > > +start_tcp_server_client() { > + client_ns=$1 > + server_ns=$2 > + sip=$3 > + port=${4:-10000} > + fifo_path=${5:-/tmp/nffifo} > + wait_for_server=${6:-yes} > + > + # Start a TCP server > + : > output.txt > + NETNS_DAEMONIZE($server_ns, [server.py -i $sip -p $port], [server.pid]) > + on_exit 'kill $(cat server.pid)' > + > + # Ensure TCP server is ready for connections > + if [[ "$wait_for_server" == "yes" ]]; then > + OVS_WAIT_FOR_OUTPUT([cat output.txt], [0], [dnl > +Server Ready > +]) > + fi > + > + # Make a FIFO and send its output to a server > + if [[ ! -p "$fifo_path" ]]; then > + mkfifo "$fifo_path" > + on_exit "rm -rf $fifo_path" > + fi > + > + NETNS_DAEMONIZE($client_ns, [client.py -f "$fifo_path" -i $sip -p > $port], [client.pid]) > + on_exit 'kill $(cat client.pid)' > +} > + > validate_traffic() { > + # Empty the file > + : > output.txt > + > send_data=$1; recv_data=$2; pkt_cnt=$3; > AT_CHECK([printf "$send_data\n" > /tmp/nffifo], [0], [dnl > ]) > @@ -19047,8 +19081,6 @@ $recv_data > ]) > fi > > - : > output.txt > - > OVS_WAIT_UNTIL([ > total_pkts=$(cat pkt.pcap | wc -l) > test ${total_pkts} -ge ${pkt_cnt} > @@ -19058,22 +19090,7 @@ $recv_data > validate_single_nf_no_health_check() { > client_ns=$1; server_ns=$2; sip=$3; direction=$4 > > - # Start a TCP server > - NETNS_DAEMONIZE($server_ns, [server.py -i $sip -p 10000], [server.pid]) > - on_exit 'kill $(cat server.pid)' > - > - # Ensure TCP server is ready for connections > - OVS_WAIT_FOR_OUTPUT([cat output.txt], [0], [dnl > -Server Ready > -]) > - : > output.txt > - > - # Make a FIFO and send its output to a server > - mkfifo /tmp/nffifo > - on_exit 'rm -rf /tmp/nffifo' > - > - NETNS_DAEMONIZE($client_ns, [client.py -f "/tmp/nffifo" -i $sip -p > 10000], [client.pid]) > - on_exit 'kill $(cat client.pid)' > + start_tcp_server_client $client_ns $server_ns $sip > > AS_BOX([$direction: Verify traffic forwarding through single NF without > health check]) > > @@ -19083,7 +19100,6 @@ Server Ready > on_exit 'kill $(pidof tcpdump)' > > # Verify no service monitors exist when health check is not configured > - #AT_CHECK([ovn-sbctl list service_monitor | grep -v "^$"], [1]) > AT_CHECK([ovn-sbctl list service_monitor | wc -l], [0], [dnl > 0 > ]) > @@ -19135,26 +19151,15 @@ check ovn-nbctl --wait=hv sync > validate_nf_with_traffic() { > client_ns=$1; server_ns=$2; sip=$3; direction=$4 > > - # Start a TCP server > - NETNS_DAEMONIZE($server_ns, [server.py -i $sip -p 10000], [server.pid]) > - on_exit 'kill $(cat server.pid)' > - > - # Ensure TCP server is ready for connections > - OVS_WAIT_FOR_OUTPUT([cat output.txt], [0], [dnl > -Server Ready > -]) > - : > output.txt > - > - # Make a FIFO and send its output to a server > - mkfifo /tmp/nffifo > - on_exit 'rm -rf /tmp/nffifo' > - > - NETNS_DAEMONIZE($client_ns, [client.py -f "/tmp/nffifo" -i $sip -p > 10000], [client.pid]) > - on_exit 'kill $(cat client.pid)' > + start_tcp_server_client $client_ns $server_ns $sip > > AS_BOX([$direction: Verify traffic forwarding through NF when nf0 is > active]) > NS_CHECK_EXEC([nf], [ip link set dev br0 up]) > NS_CHECK_EXEC([nf], [ip link set dev br1 down]) > + # set fallback to fail-close > + check ovn-nbctl set network_function_group $nfg_uuid fallback=fail-close > + check ovn-nbctl --wait=hv sync > + > > NS_CHECK_EXEC([nf], [tcpdump -l -nvv -i nf-p1 tcp > pkt.pcap > 2>tcpdump_err &]) > OVS_WAIT_UNTIL([grep "listening" tcpdump_err]) > @@ -19186,23 +19191,44 @@ Server Ready > > validate_traffic "test" "test" 2 > > - AS_BOX([$direction: Verify traffic forwarding through NF when nf0 and > nf1 are down]) > + AS_BOX([$direction: Verify traffic forwarding through NF when nf0 and > nf1 are down " > + "when fallback set to fail-close]) > > kill $(pidof tcpdump) > NS_CHECK_EXEC([nf], [tcpdump -l -nvv -i nf-p3 tcp > pkt.pcap > 2>tcpdump_err &]) > OVS_WAIT_UNTIL([grep "listening" tcpdump_err]) > on_exit 'kill $(pidof tcpdump)' > > - # Bring nf0 down and nf1 up > + # Bring nf1 down > NS_CHECK_EXEC([nf], [ip link set dev br1 down]) > # sleep to allow service_monitor to detect the state > sleep 5 > > - ovn-sbctl dump-flows sw0 > lflows_nf1_active > + ovn-sbctl dump-flows sw0 > lflows_both_down_fail_close > ovn-sbctl list service_monitor > > validate_traffic "test" "" 0 > > + AS_BOX(["$direction: Verify traffic forwarding through NF when nf0 and > nf1 are down " > + "when fallback set to fail-open"]) > + > + # Restart the client before fail-open test > + kill $(cat client.pid) > + kill $(cat server.pid) > + > + start_tcp_server_client $client_ns $server_ns $sip 10000 /tmp/nffifo no > + > + check ovn-nbctl set network_function_group $nfg_uuid fallback=fail-open > + check ovn-nbctl --wait=hv sync > + > + kill $(cat tcpdump-nf.pid) > + NETNS_START_TCPDUMP([nf], [-nvv -i nf-p3 tcp], [tcpdump-nf]) > + > + ovn-sbctl dump-flows sw0 > lflows_both_down_fail_open > + ovn-sbctl list service_monitor > + > + validate_traffic "test" "test" 0 > + > kill $(cat client.pid) > kill $(cat server.pid) > rm -f client.pid > -- > 2.43.5 > > _______________________________________________ > dev mailing list > [email protected] > https://mail.openvswitch.org/mailman/listinfo/ovs-dev > _______________________________________________ dev mailing list [email protected] https://mail.openvswitch.org/mailman/listinfo/ovs-dev
