Thanks Naveen!

I merged this to main.

On Mon, Nov 17, 2025 at 11:13 AM Naveen Yerramneni
<[email protected]> wrote:
>
> Fallback setting can be configured at Network Function Group level.
> Following values are supported:
>   - fail-close: All traffic that has to be redirected to NF
>                 gets dropped when no active NFs are available.
>                 This is the default setting.
>   - fail-open: All traffic that has to be redirected to NF
>                is allowed when no active NFs are available.
>
> Signed-off-by: Naveen Yerramneni <[email protected]>
> Acked-by: Sragdhara Datta Chaudhuri <[email protected]>
> Acked-by: Aditya Mehakare <[email protected]>
> ---
> v1:
> - First patch
>
> v2:
> - Rebase with latest main
>
> v3:
> - Fix format specifier issue
>
> v4:
> - Added Acked-by tags Sragdhara, Aditya
> ---
>  NEWS                |   1 +
>  northd/northd.c     |  69 +++++++++++++++++++++++++-----
>  ovn-nb.ovsschema    |   8 +++-
>  ovn-nb.xml          |  27 ++++++++++++
>  tests/ovn-northd.at |  44 +++++++++++++++----
>  tests/system-ovn.at | 102 +++++++++++++++++++++++++++-----------------
>  6 files changed, 192 insertions(+), 59 deletions(-)
>
> diff --git a/NEWS b/NEWS
> index 754934b6b..44e2011a8 100644
> --- a/NEWS
> +++ b/NEWS
> @@ -63,6 +63,7 @@ Post v25.09.0
>    - Add a new experimental service - ovn-br-controller to program and
>      manage OVS bridges (not managed by ovn-controller) using OVN logical 
> flows.
>      For more details see man ovn-br(5).
> +   - Add fallback support for Network Function.
>
>  OVN v25.09.0 - xxx xx xxxx
>  --------------------------
> diff --git a/northd/northd.c b/northd/northd.c
> index cdf12ec86..73077710f 100644
> --- a/northd/northd.c
> +++ b/northd/northd.c
> @@ -18161,6 +18161,27 @@ build_lswitch_stateful_nf(struct ovn_port *op,
>                    ds_cstr(match), ds_cstr(actions), lflow_ref);
>  }
>
> +static const char*
> +network_function_group_get_fallback(
> +    const struct nbrec_network_function_group *nfg)
> +{
> +    if (nfg->fallback) {
> +        return nfg->fallback;
> +    }
> +    return "fail-close";
> +}
> +
> +static bool
> +network_function_group_is_fallback_fail_open(
> +    const struct nbrec_network_function_group *nfg)
> +{
> +    const char *fallback = network_function_group_get_fallback(nfg);
> +    if (!strcasecmp(fallback, "fail-open")) {
> +        return true;
> +    }
> +    return false;
> +}
> +
>  static struct nbrec_network_function *
>  nf_get_active(const struct nbrec_network_function_group *nfg)
>  {
> @@ -18237,21 +18258,23 @@ network_function_update_active(const struct 
> nbrec_network_function_group *nfg,
>              }
>          }
>      } else {
> -        /* No healthy NFs, keep nf_active_prev if set, else select first one 
> */
> -        nf_active = nf_active_prev ? nf_active_prev : 
> nfg->network_function[0];
> +        /* No healthy NFs, clear nf_active to apply fallback */
> +        nf_active = NULL;
>          static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1);
> -        VLOG_WARN_RL(&rl, "NetworkFunction: No healthy network_function 
> found "
> -                     "in network_function_group %s, "
> -                     "selected network_function %s as active", nfg->name,
> -                     nf_active->name);
> +        VLOG_WARN_RL(&rl, "NetworkFunction: No healthy network_function "
> +                     "found in network_function_group %s, "
> +                     "fallback to %s", nfg->name,
> +                     network_function_group_get_fallback(nfg));
>      }
>      free(healthy_nfs);
>
>      if (nf_active_prev != nf_active) {
> -        static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1);
> -        VLOG_INFO_RL(&rl, "NetworkFunction: Update active network_function 
> %s "
> -                     "in network_function_group %s",
> -                     nf_active->name, nfg->name);
> +        if (nf_active) {
> +            static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1);
> +            VLOG_INFO_RL(&rl, "NetworkFunction: Update active 
> network_function"
> +                         " %s in network_function_group %s",
> +                         nf_active->name, nfg->name);
> +        }
>          nbrec_network_function_group_set_network_function_active(nfg,
>                                                                   nf_active);
>      }
> @@ -18273,6 +18296,23 @@ static void build_network_function_active(
>      }
>  }
>
> +static void
> +network_function_configure_fail_open_flows(struct lflow_table *lflows,
> +              const struct ovn_datapath *od, struct lflow_ref *lflow_ref,
> +              uint64_t nfg_id)
> +{
> +    struct ds match = DS_EMPTY_INITIALIZER;
> +    ds_put_format(&match,
> +                  REG_NF_GROUP_ID " == %"PRIu8" || "
> +                  "(ct.trk && ct_label.nf_group_id == %"PRIu8")",
> +                  (uint8_t) nfg_id, (uint8_t) nfg_id);
> +    ovn_lflow_add(lflows, od, S_SWITCH_IN_NF, 10,
> +        ds_cstr(&match), "next;", lflow_ref);
> +    ovn_lflow_add(lflows, od, S_SWITCH_OUT_NF, 10,
> +        ds_cstr(&match), "next;", lflow_ref);
> +    ds_destroy(&match);
> +}
> +
>  static void
>  consider_network_function(struct lflow_table *lflows,
>                            const struct ovn_datapath *od,
> @@ -18283,6 +18323,15 @@ consider_network_function(struct lflow_table *lflows,
>      struct ds action = DS_EMPTY_INITIALIZER;
>      static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 1);
>
> +    /* If NFG is in fail-open mode then, configure flows to with higher
> +     * priority than default drop rule to allow the traffic when there is no
> +     * active NF avaialble.
> +     */
> +    if (network_function_group_is_fallback_fail_open(nfg)) {
> +        network_function_configure_fail_open_flows(lflows, od, lflow_ref,
> +                                                   nfg->id);
> +    }
> +
>      /* Currently we support only one active port-pair in a group.
>       * If there are multiple active pairs, take the first one.
>       * Load balancing would be added in future. */
> diff --git a/ovn-nb.ovsschema b/ovn-nb.ovsschema
> index cbb4f98e7..8c2c1d861 100644
> --- a/ovn-nb.ovsschema
> +++ b/ovn-nb.ovsschema
> @@ -1,7 +1,7 @@
>  {
>      "name": "OVN_Northbound",
> -    "version": "7.14.0",
> -    "cksum": "3428479461 43444",
> +    "version": "7.15.0",
> +    "cksum": "4060410729 43708",
>      "tables": {
>          "NB_Global": {
>              "columns": {
> @@ -221,6 +221,10 @@
>          "Network_Function_Group": {
>              "columns": {
>                  "name": {"type": "string"},
> +                "fallback": {"type": {"key": {"type": "string",
> +                                      "enum": ["set", ["fail-open",
> +                                                       "fail-close"]]},
> +                                      "min": 0, "max": 1}},
>                  "network_function": {"type":
>                                    {"key": {"type": "uuid",
>                                             "refTable": "Network_Function",
> diff --git a/ovn-nb.xml b/ovn-nb.xml
> index b5fe44e53..1c04a1107 100644
> --- a/ovn-nb.xml
> +++ b/ovn-nb.xml
> @@ -6261,6 +6261,33 @@ or
>        <code>Network_Function_Group</code>.
>      </column>
>
> +    <column name="fallback">
> +      Fallback setting when no active network functions are available.
> +      <p>
> +        Supports following fallback mechanisms.
> +        If not specified, fail-close will be applied when no active Network
> +        Functions are available.
> +      </p>
> +
> +      <dl>
> +        <dt><code>fail-open</code></dt>
> +        <dd>
> +          <p>
> +            Traffic bypasses Network Function and gets allowed when there is
> +            no active Network Function available.
> +          </p>
> +        </dd>
> +
> +        <dt><code>fail-close</code></dt>
> +        <dd>
> +          <p>
> +           Traffic gets dropped when there is no active Network Function
> +           available.
> +          </p>
> +        </dd>
> +      </dl>
> +    </column>
> +
>      <column name="network_function">
>        A list of network functions which belong to this group.
>      </column>
> diff --git a/tests/ovn-northd.at b/tests/ovn-northd.at
> index 448bc66ae..eb7b21ef2 100644
> --- a/tests/ovn-northd.at
> +++ b/tests/ovn-northd.at
> @@ -18000,6 +18000,8 @@ OVN_FOR_EACH_NORTHD_NO_HV([
>  AT_SETUP([Check network function])
>  ovn_start
>
> +AS_BOX([Create a NF and add it to a from-lport ACL])
> +
>  # Create a NF and add it to a from-lport ACL.
>  check ovn-nbctl ls-add sw0
>  check ovn-nbctl lsp-add sw0 sw0-nf-p1
> @@ -18020,6 +18022,11 @@ check ovn-nbctl lsp-add sw0 sw0-p3 -- 
> lsp-set-addresses sw0-p3 "00:00:00:00:00:0
>  check ovn-nbctl pg-add pg0 sw0-p1
>  check ovn-nbctl acl-add pg0 from-lport 1002 "inport == @pg0 && ip4.dst == 
> 10.0.0.3" allow-related nfg0
>
> +# Add hypervisor and bind NF ports
> +check ovn-sbctl chassis-add hv1 geneve 127.0.0.1
> +check ovn-sbctl lsp-bind sw0-nf-p1 hv1
> +check ovn-sbctl lsp-bind sw0-nf-p2 hv1
> +
>  check ovn-nbctl --wait=sb sync
>
>  ovn-sbctl dump-flows sw0 > sw0flows
> @@ -18107,6 +18114,8 @@ ct_next(ct_state=new|trk) {
>  };
>  ])
>
> +AS_BOX([Create another NF and add it to a to-lport ACL.])
> +
>  # Create another NF and add it to a to-lport ACL.
>  check ovn-nbctl lsp-add sw0 sw0-nf-p3
>  check ovn-nbctl lsp-add sw0 sw0-nf-p4
> @@ -18119,6 +18128,8 @@ check ovn-nbctl set logical_switch_port sw0-nf-p4 \
>  check ovn-nbctl nf-add nf1 sw0-nf-p3 sw0-nf-p4
>  check ovn-nbctl nfg-add nfg1 2 inline nf1
>  check ovn-nbctl acl-add pg0 to-lport 1003 "outport == @pg0 && ip4.src == 
> 10.0.0.4" allow-related nfg1
> +check ovn-sbctl lsp-bind sw0-nf-p3 hv1
> +check ovn-sbctl lsp-bind sw0-nf-p4 hv1
>  check ovn-nbctl --wait=sb sync
>
>  ovn-sbctl dump-flows sw0 > sw0flows
> @@ -18412,7 +18423,7 @@ AT_CHECK(
>  ])
>
>  # Set the service monitor for nf0 to offline and nf1 to offline
> -# and verify nf1 is still the active.
> +# and verify fail-close is applied.
>
>  AS_BOX([Set the service monitor for nf0 to offline and nf1 to offline])
>  check ovn-sbctl set service_monitor $nfsw-p2 status=offline
> @@ -18428,18 +18439,33 @@ AT_CHECK(
>    [grep -E 'ls_(in|out)_network_function' lflows | ovn_strip_lflows | sort], 
> [0], [dnl
>    table=??(ls_in_network_function), priority=0    , match=(1), action=(next;)
>    table=??(ls_in_network_function), priority=1    , match=(reg8[[21]] == 1), 
> action=(drop;)
> -  table=??(ls_in_network_function), priority=100  , match=(inport == 
> "child-3"), action=(reg5[[16..31]] = ct_label.tun_if_id; next;)
> -  table=??(ls_in_network_function), priority=100  , match=(inport == 
> "child-4"), action=(reg5[[16..31]] = ct_label.tun_if_id; next;)
>    table=??(ls_in_network_function), priority=100  , match=(reg8[[21]] == 1 
> && eth.mcast), action=(next;)
> -  table=??(ls_in_network_function), priority=99   , match=(reg8[[21]] == 1 
> && reg8[[22]] == 0 && ct_label.nf_group_id == 1), action=(outport = 
> "child-3"; output;)
> -  table=??(ls_in_network_function), priority=99   , match=(reg8[[21]] == 1 
> && reg8[[22]] == 1 && reg0[[22..29]] == 1), action=(outport = "child-3"; 
> output;)
>    table=??(ls_out_network_function), priority=0    , match=(1), 
> action=(next;)
>    table=??(ls_out_network_function), priority=1    , match=(reg8[[21]] == 
> 1), action=(drop;)
> -  table=??(ls_out_network_function), priority=100  , match=(outport == 
> "child-3"), action=(next;)
> -  table=??(ls_out_network_function), priority=100  , match=(outport == 
> "child-4"), action=(next;)
>    table=??(ls_out_network_function), priority=100  , match=(reg8[[21]] == 1 
> && eth.mcast), action=(next;)
> -  table=??(ls_out_network_function), priority=99   , match=(reg8[[21]] == 1 
> && reg8[[22]] == 0 && ct_label.nf_group_id == 1), action=(outport = 
> "child-4"; reg8[[23]] = 1; next(pipeline=ingress, table=??);)
> -  table=??(ls_out_network_function), priority=99   , match=(reg8[[21]] == 1 
> && reg8[[22]] == 1 && reg0[[22..29]] == 1), action=(outport = "child-4"; 
> reg8[[23]] = 1; next(pipeline=ingress, table=??);)
> +])
> +
> +AS_BOX([Configure NFG fallback method to fail-open])
> +
> +# Configure NFG fallback method to fail-open
> +nfg_uuid=$(fetch_column nb:network_function_group _uuid name=nfg0)
> +check ovn-nbctl set network_function_group $nfg_uuid fallback=fail-open
> +
> +check ovn-nbctl --wait=sb sync
> +
> +ovn-sbctl dump-flows $sw > lflows
> +AT_CAPTURE_FILE([lflows])
> +
> +AT_CHECK(
> +  [grep -E 'ls_(in|out)_network_function' lflows | ovn_strip_lflows | sort], 
> [0], [dnl
> +  table=??(ls_in_network_function), priority=0    , match=(1), action=(next;)
> +  table=??(ls_in_network_function), priority=1    , match=(reg8[[21]] == 1), 
> action=(drop;)
> +  table=??(ls_in_network_function), priority=10   , match=(reg0[[22..29]] == 
> 1 || (ct.trk && ct_label.nf_group_id == 1)), action=(next;)
> +  table=??(ls_in_network_function), priority=100  , match=(reg8[[21]] == 1 
> && eth.mcast), action=(next;)
> +  table=??(ls_out_network_function), priority=0    , match=(1), 
> action=(next;)
> +  table=??(ls_out_network_function), priority=1    , match=(reg8[[21]] == 
> 1), action=(drop;)
> +  table=??(ls_out_network_function), priority=10   , match=(reg0[[22..29]] 
> == 1 || (ct.trk && ct_label.nf_group_id == 1)), action=(next;)
> +  table=??(ls_out_network_function), priority=100  , match=(reg8[[21]] == 1 
> && eth.mcast), action=(next;)
>  ])
>
>  AT_CLEANUP
> diff --git a/tests/system-ovn.at b/tests/system-ovn.at
> index 5b34e621f..312fa8e9e 100644
> --- a/tests/system-ovn.at
> +++ b/tests/system-ovn.at
> @@ -19020,6 +19020,7 @@ AS_BOX([Test-1: Single NF without health check])
>  check ovn-nbctl nf-add nf0 nf-p1 nf-p2
>  nf0_uuid=$(fetch_column nb:network_function _uuid name=nf0)
>  check ovn-nbctl nfg-add nfg0 1 inline nf0
> +nfg_uuid=$(fetch_column nb:network_function_group _uuid name=nfg0)
>
>  check ovn-nbctl pg-add pg0 server
>  check ovn-nbctl acl-add pg0 from-lport 1001 "inport == @pg0 && ip4.dst == 
> 192.168.1.10" allow-related nfg0
> @@ -19033,7 +19034,40 @@ NS_CHECK_EXEC([nf], [ip link set dev nf-p1 master 
> br0])
>  NS_CHECK_EXEC([nf], [ip link set dev nf-p2 master br0])
>  NS_CHECK_EXEC([nf], [ip link set dev br0 up])
>
> +start_tcp_server_client() {
> +    client_ns=$1
> +    server_ns=$2
> +    sip=$3
> +    port=${4:-10000}
> +    fifo_path=${5:-/tmp/nffifo}
> +    wait_for_server=${6:-yes}
> +
> +    # Start a TCP server
> +    : > output.txt
> +    NETNS_DAEMONIZE($server_ns, [server.py -i $sip -p $port], [server.pid])
> +    on_exit 'kill $(cat server.pid)'
> +
> +    # Ensure TCP server is ready for connections
> +    if [[ "$wait_for_server" == "yes" ]]; then
> +        OVS_WAIT_FOR_OUTPUT([cat output.txt], [0], [dnl
> +Server Ready
> +])
> +    fi
> +
> +    # Make a FIFO and send its output to a server
> +    if [[ ! -p "$fifo_path" ]]; then
> +        mkfifo "$fifo_path"
> +        on_exit "rm -rf $fifo_path"
> +    fi
> +
> +    NETNS_DAEMONIZE($client_ns, [client.py -f "$fifo_path" -i $sip -p 
> $port], [client.pid])
> +    on_exit 'kill $(cat client.pid)'
> +}
> +
>  validate_traffic() {
> +    # Empty the file
> +    : > output.txt
> +
>      send_data=$1; recv_data=$2; pkt_cnt=$3;
>      AT_CHECK([printf "$send_data\n" > /tmp/nffifo], [0], [dnl
>  ])
> @@ -19047,8 +19081,6 @@ $recv_data
>  ])
>      fi
>
> -    : > output.txt
> -
>      OVS_WAIT_UNTIL([
>          total_pkts=$(cat pkt.pcap | wc -l)
>          test ${total_pkts} -ge ${pkt_cnt}
> @@ -19058,22 +19090,7 @@ $recv_data
>  validate_single_nf_no_health_check() {
>      client_ns=$1; server_ns=$2; sip=$3; direction=$4
>
> -    # Start a TCP server
> -    NETNS_DAEMONIZE($server_ns, [server.py -i $sip -p 10000], [server.pid])
> -    on_exit 'kill $(cat server.pid)'
> -
> -    # Ensure TCP server is ready for connections
> -    OVS_WAIT_FOR_OUTPUT([cat output.txt], [0], [dnl
> -Server Ready
> -])
> -    : > output.txt
> -
> -    # Make a FIFO and send its output to a server
> -    mkfifo /tmp/nffifo
> -    on_exit 'rm -rf /tmp/nffifo'
> -
> -    NETNS_DAEMONIZE($client_ns, [client.py -f "/tmp/nffifo" -i $sip -p 
> 10000], [client.pid])
> -    on_exit 'kill $(cat client.pid)'
> +    start_tcp_server_client $client_ns $server_ns $sip
>
>      AS_BOX([$direction: Verify traffic forwarding through single NF without 
> health check])
>
> @@ -19083,7 +19100,6 @@ Server Ready
>      on_exit 'kill $(pidof tcpdump)'
>
>      # Verify no service monitors exist when health check is not configured
> -    #AT_CHECK([ovn-sbctl list service_monitor | grep -v "^$"], [1])
>      AT_CHECK([ovn-sbctl list service_monitor | wc -l], [0], [dnl
>  0
>  ])
> @@ -19135,26 +19151,15 @@ check ovn-nbctl --wait=hv sync
>  validate_nf_with_traffic() {
>      client_ns=$1; server_ns=$2; sip=$3; direction=$4
>
> -    # Start a TCP server
> -    NETNS_DAEMONIZE($server_ns, [server.py -i $sip -p 10000], [server.pid])
> -    on_exit 'kill $(cat server.pid)'
> -
> -    # Ensure TCP server is ready for connections
> -    OVS_WAIT_FOR_OUTPUT([cat output.txt], [0], [dnl
> -Server Ready
> -])
> -    : > output.txt
> -
> -    # Make a FIFO and send its output to a server
> -    mkfifo /tmp/nffifo
> -    on_exit 'rm -rf /tmp/nffifo'
> -
> -    NETNS_DAEMONIZE($client_ns, [client.py -f "/tmp/nffifo" -i $sip -p 
> 10000], [client.pid])
> -    on_exit 'kill $(cat client.pid)'
> +    start_tcp_server_client $client_ns $server_ns $sip
>
>      AS_BOX([$direction: Verify traffic forwarding through NF when nf0 is 
> active])
>      NS_CHECK_EXEC([nf], [ip link set dev br0 up])
>      NS_CHECK_EXEC([nf], [ip link set dev br1 down])
> +    # set fallback to fail-close
> +    check ovn-nbctl set network_function_group $nfg_uuid fallback=fail-close
> +    check ovn-nbctl --wait=hv sync
> +
>
>      NS_CHECK_EXEC([nf], [tcpdump -l -nvv -i nf-p1 tcp > pkt.pcap 
> 2>tcpdump_err &])
>      OVS_WAIT_UNTIL([grep "listening" tcpdump_err])
> @@ -19186,23 +19191,44 @@ Server Ready
>
>      validate_traffic "test" "test" 2
>
> -    AS_BOX([$direction: Verify traffic forwarding through NF when nf0 and 
> nf1 are down])
> +    AS_BOX([$direction: Verify traffic forwarding through NF when nf0 and 
> nf1 are down "
> +            "when fallback set to fail-close])
>
>      kill $(pidof tcpdump)
>      NS_CHECK_EXEC([nf], [tcpdump -l -nvv -i nf-p3 tcp > pkt.pcap 
> 2>tcpdump_err &])
>      OVS_WAIT_UNTIL([grep "listening" tcpdump_err])
>      on_exit 'kill $(pidof tcpdump)'
>
> -    # Bring nf0 down and nf1 up
> +    # Bring nf1 down
>      NS_CHECK_EXEC([nf], [ip link set dev br1 down])
>      # sleep to allow service_monitor to detect the state
>      sleep 5
>
> -    ovn-sbctl dump-flows sw0 > lflows_nf1_active
> +    ovn-sbctl dump-flows sw0 > lflows_both_down_fail_close
>      ovn-sbctl list service_monitor
>
>      validate_traffic "test" "" 0
>
> +    AS_BOX(["$direction: Verify traffic forwarding through NF when nf0 and 
> nf1 are down "
> +            "when fallback set to fail-open"])
> +
> +    # Restart the client before fail-open test
> +    kill $(cat client.pid)
> +    kill $(cat server.pid)
> +
> +    start_tcp_server_client $client_ns $server_ns $sip 10000 /tmp/nffifo no
> +
> +    check ovn-nbctl set network_function_group $nfg_uuid fallback=fail-open
> +    check ovn-nbctl --wait=hv sync
> +
> +    kill $(cat tcpdump-nf.pid)
> +    NETNS_START_TCPDUMP([nf], [-nvv -i nf-p3 tcp], [tcpdump-nf])
> +
> +    ovn-sbctl dump-flows sw0 > lflows_both_down_fail_open
> +    ovn-sbctl list service_monitor
> +
> +    validate_traffic "test" "test" 0
> +
>      kill $(cat client.pid)
>      kill $(cat server.pid)
>      rm -f client.pid
> --
> 2.43.5
>
> _______________________________________________
> dev mailing list
> [email protected]
> https://mail.openvswitch.org/mailman/listinfo/ovs-dev
>

_______________________________________________
dev mailing list
[email protected]
https://mail.openvswitch.org/mailman/listinfo/ovs-dev

Reply via email to