On 29 Nov 2025, at 0:08, [email protected] wrote:

> From: Numan Siddique <[email protected]>
>
> netdev-offload-tc when offloading a flow to tc, uses the flow's
> recirc_id as flower chain id.  recirc_id is of type 'uint32_t'.
> Kernel tc reserves the upper nibble (4 bits) of the tc flower's
> chain index for extended action opcode [1].  If a flow's recirc_id
> value is greater than 268,435,455 (0x0fffffff), kernel returns
> EINVAL when ovs-vswitchd tries to offload the flow.
>
> This commit fixes this offload issue by setting the maximum
> value of recirc id to 268,435,455 when hw-offload is enabled.
>
> Note: This commit renames a static function - mirror_destroy()
> to mirror_delete() in vswitchd/bridge.c since a public function
> with the same name exists in ofproto/ofproto-dpif-mirror.c.

Hi Numan,

Rather than adding all this infrastructure, should we maybe limit the recirc_id 
to 2^28 - 1?

Others WDYT? Anyhow, some comments below.

//Eelco

>
> [1] - 
> https://elixir.bootlin.com/linux/v6.17.6/source/tools/include/uapi/linux/pkt_cls.h#L50-L64
>
> Reported-at: 
> https://mail.openvswitch.org/pipermail/ovs-dev/2025-November/427485.html
> Suggested-by: Ilya Maximets <[email protected]>
> Signed-off-by: Numan Siddique <[email protected]>
> ---
>  lib/netdev-offload-provider.h |  4 ++++
>  lib/netdev-offload-tc.c       |  6 ++++++
>  lib/netdev-offload.c          | 23 +++++++++++++++++++++++
>  lib/netdev-offload.h          |  1 +
>  ofproto/ofproto-dpif-rid.c    | 18 ++++++++++++++++--
>  ofproto/ofproto-dpif-rid.h    |  2 ++
>  vswitchd/bridge.c             | 12 +++++++-----
>  7 files changed, 59 insertions(+), 7 deletions(-)
>
> diff --git a/lib/netdev-offload-provider.h b/lib/netdev-offload-provider.h
> index 9108856d18..9def5858c7 100644
> --- a/lib/netdev-offload-provider.h
> +++ b/lib/netdev-offload-provider.h
> @@ -121,6 +121,10 @@ struct netdev_flow_api {
>      int (*meter_del)(ofproto_meter_id meter_id,
>                       struct ofputil_meter_stats *stats);
>
> +    /* Returns the maximum value of the flow recirc_id which the provider can
> +     * support, if it uses the flow recirc_id to offload. */
> +    uint32_t (*get_max_recirc_id)(void);
> +
>      /* Initializies the netdev flow api.
>       * Return 0 if successful, otherwise returns a positive errno value. */
>      int (*init_flow_api)(struct netdev *);
> diff --git a/lib/netdev-offload-tc.c b/lib/netdev-offload-tc.c
> index d4b6e72f0a..ba157ce2c8 100644
> --- a/lib/netdev-offload-tc.c
> +++ b/lib/netdev-offload-tc.c
> @@ -3430,6 +3430,11 @@ meter_tc_del_policer(ofproto_meter_id meter_id,
>      return err;
>  }
>
> +static uint32_t
> +netdev_tc_get_max_recird_id(void) {

Guess you made a spelling error, should be netdev_tc_get_max_recirc_id()?

> +    return TC_ACT_EXT_VAL_MASK;

We return this value independent of whether offload is enabled or not. Will 
this not always limit the number of recirculation IDs?

> +}
> +
>  const struct netdev_flow_api netdev_offload_tc = {
>     .type = "linux_tc",
>     .flow_flush = netdev_tc_flow_flush,
> @@ -3443,5 +3448,6 @@ const struct netdev_flow_api netdev_offload_tc = {
>     .meter_set = meter_tc_set_policer,
>     .meter_get = meter_tc_get_policer,
>     .meter_del = meter_tc_del_policer,
> +   .get_max_recirc_id = netdev_tc_get_max_recird_id,
>     .init_flow_api = netdev_tc_init_flow_api,
>  };
> diff --git a/lib/netdev-offload.c b/lib/netdev-offload.c
> index 8a9d365559..c6c5ae486d 100644
> --- a/lib/netdev-offload.c
> +++ b/lib/netdev-offload.c
> @@ -908,3 +908,26 @@ netdev_set_flow_api_enabled(const struct smap 
> *ovs_other_config)
>          }
>      }
>  }
> +
> +/* Returns the minimum of the max recirc id supported by each registered 
> flow,
> + * API, if it supports get_max_recirc_id().
> + * Returns 0, if none of the registered flow API supports 
> get_max_recirc_id().
> + */
> +uint32_t
> +netdev_get_max_recird_id(void)
> +{
> +    uint32_t max_recirc_id = 0;
> +
> +    struct netdev_registered_flow_api *rfa;
> +
> +    CMAP_FOR_EACH (rfa, cmap_node, &netdev_flow_apis) {
> +        if (rfa->flow_api->get_max_recirc_id) {

Maybe if (!rfa->flow_api->get_max_recirc_id) {
    continue
} To save one indent level.

> +            uint32_t rfa_max_recirc_id = rfa->flow_api->get_max_recirc_id();

Needs a new line here.

> +            if (!max_recirc_id || rfa_max_recirc_id < max_recirc_id) {
> +                max_recirc_id = rfa_max_recirc_id;
> +            }
> +        }
> +    }
> +
> +    return max_recirc_id;
> +}
> diff --git a/lib/netdev-offload.h b/lib/netdev-offload.h
> index 7fc30a4243..f62a6f5d94 100644
> --- a/lib/netdev-offload.h
> +++ b/lib/netdev-offload.h
> @@ -127,6 +127,7 @@ bool netdev_is_flow_api_enabled(void);
>  void netdev_set_flow_api_enabled(const struct smap *ovs_other_config);
>  bool netdev_is_offload_rebalance_policy_enabled(void);
>  int netdev_flow_get_n_flows(struct netdev *netdev, uint64_t *n_flows);
> +uint32_t netdev_get_max_recird_id(void);
>
>  struct dpif_port;
>  int netdev_ports_insert(struct netdev *, struct dpif_port *);
> diff --git a/ofproto/ofproto-dpif-rid.c b/ofproto/ofproto-dpif-rid.c
> index f01468025c..f7bef09c3d 100644
> --- a/ofproto/ofproto-dpif-rid.c
> +++ b/ofproto/ofproto-dpif-rid.c
> @@ -36,6 +36,8 @@ static struct ovs_list expired OVS_GUARDED_BY(mutex)
>
>  static uint32_t next_id OVS_GUARDED_BY(mutex) = 1; /* Possible next free id. 
> */
>
> +static uint32_t max_recirc_id;
> +
>  #define RECIRC_POOL_STATIC_IDS 1024
>
>  static void recirc_id_node_free(struct recirc_id_node *);
> @@ -83,6 +85,16 @@ recirc_run(void)
>      ovs_mutex_unlock(&mutex);
>  }
>
> +/* Sets the max recirc id.  Caller cannnot reduce the max recirc id once set.

Three nnn’s

> + */
> +void
> +recirc_set_max_recirc_id(uint32_t max_recirc_id_)
> +{
> +    if (max_recirc_id_ > max_recirc_id) {
> +        max_recirc_id = max_recirc_id_;
> +    }
> +}
> +
>  /* We use the id as the hash value, which works due to cmap internal 
> rehashing.
>   * We also only insert nodes with unique IDs, so all possible hash collisions
>   * remain internal to the cmap. */
> @@ -228,7 +240,8 @@ frozen_state_free(struct frozen_state *state)
>
>  /* Allocate a unique recirculation id for the given set of flow metadata.
>   * The ID space is 2^^32, so there should never be a situation in which all
> - * the IDs are used up.  We loop until we find a free one. */
> + * the IDs are used up.  Although, the ID space could be limited to less
> + * than 2^^32, if max_recirc_id is set.  We loop until we find a free one. */
>  static struct recirc_id_node *
>  recirc_alloc_id__(const struct frozen_state *state, uint32_t hash)
>  {
> @@ -247,7 +260,8 @@ recirc_alloc_id__(const struct frozen_state *state, 
> uint32_t hash)
>             RECIRC_POOL_STATIC_IDS IDs on the later rounds, though, as some of
>             the initial allocations may be for long term uses (like bonds). */
>          node->id = next_id++;
> -        if (OVS_UNLIKELY(!node->id)) {
> +        if (OVS_UNLIKELY(!node->id ||
> +                         (max_recirc_id && node->id >= max_recirc_id))) {
>              next_id = RECIRC_POOL_STATIC_IDS + 1;
>              node->id = next_id++;
>          }
> diff --git a/ofproto/ofproto-dpif-rid.h b/ofproto/ofproto-dpif-rid.h
> index 4df630c62b..712f5b81e8 100644
> --- a/ofproto/ofproto-dpif-rid.h
> +++ b/ofproto/ofproto-dpif-rid.h
> @@ -212,6 +212,8 @@ static inline bool recirc_id_node_try_ref_rcu(const 
> struct recirc_id_node *n_)
>
>  void recirc_id_node_unref(const struct recirc_id_node *);
>
> +void recirc_set_max_recirc_id(uint32_t max_recirc_id_);
> +
>  void recirc_run(void);
>
>  /* Recirculation IDs on which references are held. */
> diff --git a/vswitchd/bridge.c b/vswitchd/bridge.c
> index 475eefefaa..c00ee4b7c9 100644
> --- a/vswitchd/bridge.c
> +++ b/vswitchd/bridge.c
> @@ -43,6 +43,7 @@
>  #include "odp-execute.h"
>  #include "ofproto/bond.h"
>  #include "ofproto/ofproto.h"
> +#include "ofproto/ofproto-dpif-rid.h"
>  #include "openvswitch/dynamic-string.h"
>  #include "openvswitch/list.h"
>  #include "openvswitch/meta-flow.h"
> @@ -328,7 +329,7 @@ static void run_system_stats(void);
>  static void bridge_configure_mirrors(struct bridge *);
>  static struct mirror *mirror_create(struct bridge *,
>                                      const struct ovsrec_mirror *);
> -static void mirror_destroy(struct mirror *);
> +static void mirror_delete(struct mirror *);
>  static bool mirror_configure(struct mirror *);
>  static void mirror_refresh_stats(struct mirror *);
>
> @@ -3398,6 +3399,7 @@ bridge_run(void)
>          netdev_set_flow_api_enabled(&cfg->other_config);
>          dpdk_init(&cfg->other_config);
>          userspace_tso_init(&cfg->other_config);
> +        recirc_set_max_recirc_id(netdev_get_max_recird_id());
>      }
>
>      /* Initialize the ofproto library.  This only needs to run once, but
> @@ -3708,7 +3710,7 @@ bridge_destroy(struct bridge *br, bool del)
>              port_destroy(port);
>          }
>          HMAP_FOR_EACH_SAFE (mirror, hmap_node, &br->mirrors) {
> -            mirror_destroy(mirror);
> +            mirror_delete(mirror);
>          }
>
>          hmap_remove(&all_bridges, &br->node);
> @@ -5153,7 +5155,7 @@ bridge_configure_mirrors(struct bridge *br)
>
>          atom.uuid = m->uuid;
>          if (!ovsdb_datum_find_key(mc, &atom, OVSDB_TYPE_UUID, NULL)) {
> -            mirror_destroy(m);
> +            mirror_delete(m);
>          }
>      }
>
> @@ -5166,7 +5168,7 @@ bridge_configure_mirrors(struct bridge *br)
>          }
>          m->cfg = cfg;
>          if (!mirror_configure(m)) {
> -            mirror_destroy(m);
> +            mirror_delete(m);
>          }
>      }
>
> @@ -5192,7 +5194,7 @@ mirror_create(struct bridge *br, const struct 
> ovsrec_mirror *cfg)
>  }
>
>  static void
> -mirror_destroy(struct mirror *m)
> +mirror_delete(struct mirror *m)
>  {
>      if (m) {
>          struct bridge *br = m->bridge;
> -- 
> 2.52.0
>
> _______________________________________________
> dev mailing list
> [email protected]
> https://mail.openvswitch.org/mailman/listinfo/ovs-dev

_______________________________________________
dev mailing list
[email protected]
https://mail.openvswitch.org/mailman/listinfo/ovs-dev

Reply via email to