On 3/3/26 9:33 PM, Mike Pattrick via dev wrote:
> The source address parameter in ovs_router_lookup is both an input and
> an output. However, the interface was complex. The caller could
> inadvertently set a search over v4 or v6 rules based on if the source
> address was initialized to in6addr_any or in6addr_v4mapped_any. The
> lookup function even used these two values interchangeably.
> 
> This patch uses dst address to determine if the lookup is v4 or v6, and
> considers both v6_any and v4mapped_any to be the null value equally. Now
> if the caller just wants src as output, they can initialize it to v6_any
> and lookup will still work correctly.
> 
> Fixes: dc14e92bcc25 ("route-table: Introduce multi-table route lookup.")
> Signed-off-by: Mike Pattrick <[email protected]>
> ---
> v2:
>  - Split src parameter into in and out versions in lookup function.
>  - Big refactor of lookup function.
>  - Added comment to explain their use.
>  - Changed unit test formatting.
>  - Added address family to router classifier.
> ---
>  lib/flow.c                   |   2 +-
>  lib/netdev-vport.c           |   2 +-
>  lib/ovs-router.c             | 115 +++++++++++++++++++----------------
>  lib/ovs-router.h             |   2 +-
>  ofproto/ofproto-dpif-sflow.c |   2 +-
>  ofproto/ofproto-dpif-xlate.c |  30 +++++----
>  tests/ovs-router.at          |  22 +++++++
>  7 files changed, 108 insertions(+), 67 deletions(-)
> 
> diff --git a/lib/flow.c b/lib/flow.c
> index a59a25c46..d14603fe3 100644
> --- a/lib/flow.c
> +++ b/lib/flow.c
> @@ -3731,7 +3731,7 @@ flow_get_tunnel_netdev(struct flow_tnl *tunnel)
>          return NULL;
>      }
>  
> -    if (!ovs_router_lookup(0, &ip6, iface, NULL, &gw)) {
> +    if (!ovs_router_lookup(0, &ip6, NULL, iface, NULL, &gw)) {
>          return NULL;
>      }
>  
> diff --git a/lib/netdev-vport.c b/lib/netdev-vport.c
> index d11269d00..aa5223128 100644
> --- a/lib/netdev-vport.c
> +++ b/lib/netdev-vport.c
> @@ -300,7 +300,7 @@ tunnel_check_status_change__(struct netdev_vport *netdev)
>      iface[0] = '\0';
>      route = &tnl_cfg->ipv6_dst;
>      mark = tnl_cfg->egress_pkt_mark;
> -    if (ovs_router_lookup(mark, route, iface, NULL, &gw)) {
> +    if (ovs_router_lookup(mark, route, NULL, iface, NULL, &gw)) {
>          struct netdev *egress_netdev;
>  
>          if (!netdev_open(iface, NULL, &egress_netdev)) {
> diff --git a/lib/ovs-router.c b/lib/ovs-router.c
> index c11a52764..30113592f 100644
> --- a/lib/ovs-router.c
> +++ b/lib/ovs-router.c
> @@ -169,90 +169,94 @@ ovs_router_lookup_fallback(const struct in6_addr 
> *ip6_dst,
>      return true;
>  }
>  
> +/* If the caller does not want to match on source prefix, src_in must be 
> NULL.
> + * If src_in is not NULL, src_out will be set based on it instead of the 
> route
> + * entry. */

This comment is confusing.  Can we just say that these two are mutually 
exclusive?
And maybe assert that they are not passed in together?

>  bool
>  ovs_router_lookup(uint32_t mark, const struct in6_addr *ip6_dst,
> +                  const struct in6_addr *src_in,

Maybe callit ip6_src to make it more clear that it has the same
semantics as the ip6_dst here?

>                    char output_netdev[],
> -                  struct in6_addr *src, struct in6_addr *gw)
> +                  struct in6_addr *src_out, struct in6_addr *gw)
>  {
> -    struct flow flow = {.ipv6_dst = *ip6_dst, .pkt_mark = mark};
> -    const struct in6_addr *from_src = src;
> -    const struct cls_rule *cr = NULL;
> +    bool is_ipv4 = IN6_IS_ADDR_V4MAPPED(ip6_dst);
> +    ovs_be16 dl_type = is_ipv4 ? htons(ETH_TYPE_IP) : htons(ETH_TYPE_IPV6);
> +    const struct cls_rule *cr;
>      struct router_rule *rule;
> +    struct classifier *cls;
> +    struct flow flow;
>  
> -    if (src && ipv6_addr_is_set(src)) {
> -        struct flow flow_src = {.ipv6_dst = *src, .pkt_mark = mark};
> -        struct classifier *cls_local = cls_find(CLS_LOCAL);
> -        const struct cls_rule *cr_src;
> +    if (src_in) {
> +        flow = (struct flow) {.ipv6_dst = *src_in, .pkt_mark = mark,
> +                              .dl_type = dl_type};

This feels wrong.  In the flow structure, if we're setting ipv6_dst,
then the dl_type must be ipv6.  This may not be enforced by the
classifier today, but it can be in the future.  The content of the
struct is internally inconsistent if the dl_type doesn't match what
is actually set.  If we want to add dl_type into a mix, then we
need to properly set nw_dst vs ipv6_dst.

> +        ovs_assert(is_ipv4 == IN6_IS_ADDR_V4MAPPED(src_in));
>  
> -        if (!cls_local) {
> +        cls = cls_find(CLS_LOCAL);
> +
> +        if (!cls) {
>              return false;
>          }
>  
> -        cr_src = classifier_lookup(cls_local, OVS_VERSION_MAX, &flow_src,
> +        cr = classifier_lookup(cls, OVS_VERSION_MAX, &flow,
>                                     NULL, NULL);

Indentaion is off after the change.

> -        if (!cr_src) {
> +        if (!cr) {
>              return false;
>          }
> -    }
> -
> -    if (!from_src) {
> -        if (IN6_IS_ADDR_V4MAPPED(ip6_dst)) {
> -            from_src = &in6addr_v4mapped_any;
> +        if (src_out) {
> +            *src_out = *src_in;
> +            src_out = NULL;
> +        }
> +    } else {
> +        if (is_ipv4) {
> +            src_in = &in6addr_v4mapped_any;
>          } else {
> -            from_src = &in6addr_any;
> +            src_in = &in6addr_any;
>          }
>      }
>  
> +    flow = (struct flow){.ipv6_dst = *ip6_dst, .pkt_mark = mark,
> +                         .dl_type = dl_type};
> +
>      PVECTOR_FOR_EACH (rule, &rules) {
>          uint8_t plen = rule->ipv4 ? rule->src_prefix + 96 : rule->src_prefix;
>          bool matched;
>  
> -        if ((IN6_IS_ADDR_V4MAPPED(from_src) && !rule->ipv4) ||
> -            (!IN6_IS_ADDR_V4MAPPED(from_src) && rule->ipv4)) {
> +        if (is_ipv4 != rule->ipv4) {
>              continue;
>          }
>  
>          matched = (!rule->src_prefix ||
> -                   ipv6_addr_equals_masked(&rule->from_addr, from_src, 
> plen));
> +                   ipv6_addr_equals_masked(&rule->from_addr, src_in, plen));
>  
>          if (rule->invert) {
>              matched = !matched;
>          }
>  
> -        if (matched) {
> -            struct classifier *cls = cls_find(rule->lookup_table);
> +        if (!matched) {
> +            continue;
> +        }
> +        cls = cls_find(rule->lookup_table);
>  
> -            if (!cls) {
> -                /* A rule can be added before the table is created. */
> -                continue;
> -            }
> -            cr = classifier_lookup(cls, OVS_VERSION_MAX, &flow, NULL,
> -                                   NULL);
> -            if (cr) {
> -                struct ovs_router_entry *p = ovs_router_entry_cast(cr);
> -                /* Avoid matching mapped IPv4 of a packet against default 
> IPv6
> -                 * route entry.  Either packet dst is IPv6 or both packet and
> -                 * route entry dst are mapped IPv4.
> -                 */
> -                if (!IN6_IS_ADDR_V4MAPPED(ip6_dst) ||
> -                    IN6_IS_ADDR_V4MAPPED(&p->nw_addr)) {
> -                    break;
> -                }
> -            }
> +        if (!cls) {
> +            /* A rule can be added before the table is created. */
> +            continue;
> +        }
> +        cr = classifier_lookup(cls, OVS_VERSION_MAX, &flow, NULL,
> +                               NULL);

Can be on the same line.

> +        if (!cr) {
> +            continue;
>          }
> -    }
>  
> -    if (cr) {
>          struct ovs_router_entry *p = ovs_router_entry_cast(cr);
>  
>          ovs_strlcpy(output_netdev, p->output_netdev, IFNAMSIZ);
>          *gw = p->gw;
> -        if (src && !ipv6_addr_is_set(src)) {
> -            *src = p->src_addr;
> +        if (src_out) {
> +            *src_out = p->src_addr;
>          }
>          return true;
>      }
> -    return ovs_router_lookup_fallback(ip6_dst, output_netdev, src, gw);
> +
> +    return ovs_router_lookup_fallback(ip6_dst, output_netdev, src_out, gw);
>  }
>  
>  static void
> @@ -268,14 +272,23 @@ static void rt_init_match(struct match *match, uint32_t 
> mark,
>  {
>      struct in6_addr dst;
>      struct in6_addr mask;
> +    ovs_be16 dl_type;
> +
> +    if (IN6_IS_ADDR_V4MAPPED(ip6_dst)) {
> +        dl_type = htons(ETH_TYPE_IP);
> +    } else {
> +        dl_type = htons(ETH_TYPE_IPV6);
> +    }
>  
>      mask = ipv6_create_mask(plen);
>  
>      dst = ipv6_addr_bitand(ip6_dst, &mask);
>      memset(match, 0, sizeof *match);
>      match->flow.ipv6_dst = dst;
> +    match->flow.dl_type = dl_type;

Same comment about internal consistency of the struct flow.

>      match->wc.masks.ipv6_dst = mask;
>      match->wc.masks.pkt_mark = UINT32_MAX;
> +    match->wc.masks.dl_type = htons(UINT16_MAX);
>      match->flow.pkt_mark = mark;
>  }
>  
> @@ -1126,7 +1139,8 @@ static void
>  ovs_router_lookup_cmd(struct unixctl_conn *conn, int argc,
>                        const char *argv[], void *aux OVS_UNUSED)
>  {
> -    struct in6_addr gw, src6 = in6addr_any;
> +    struct in6_addr gw, src_out, src6;
> +    struct in6_addr *src_in = NULL;
>      char src6_s[IPV6_SCAN_LEN + 1];
>      char iface[IFNAMSIZ];
>      struct in6_addr ip6;
> @@ -1156,10 +1170,13 @@ ovs_router_lookup_cmd(struct unixctl_conn *conn, int 
> argc,
>          if (is_ipv6) {
>              if (ovs_scan(argv[i], "src="IPV6_SCAN_FMT, src6_s) &&
>                  ipv6_parse(src6_s, &src6)) {
> +                src_in = &src6;
>                  continue;
>              }
>          } else {
>              if (ovs_scan(argv[i], "src="IP_SCAN_FMT, IP_SCAN_ARGS(&src))) {
> +                in6_addr_set_mapped_ipv4(&src6, src);
> +                src_in = &src6;
>                  continue;
>              }
>          }
> @@ -1168,15 +1185,11 @@ ovs_router_lookup_cmd(struct unixctl_conn *conn, int 
> argc,
>          return;
>      }
>  
> -    if (src) {
> -        in6_addr_set_mapped_ipv4(&src6, src);
> -    }
> -
> -    if (ovs_router_lookup(mark, &ip6, iface, &src6, &gw)) {
> +    if (ovs_router_lookup(mark, &ip6, src_in, iface, &src_out, &gw)) {
>          struct ds ds = DS_EMPTY_INITIALIZER;
>  
>          ds_put_format(&ds, "src ");
> -        ipv6_format_mapped(&src6, &ds);
> +        ipv6_format_mapped(&src_out, &ds);
>          ds_put_format(&ds, "\ngateway ");
>          ipv6_format_mapped(&gw, &ds);
>          ds_put_format(&ds, "\ndev %s\n", iface);
> diff --git a/lib/ovs-router.h b/lib/ovs-router.h
> index bd1ab7a9a..ff465a7c7 100644
> --- a/lib/ovs-router.h
> +++ b/lib/ovs-router.h
> @@ -34,7 +34,7 @@ enum {
>  };
>  
>  bool ovs_router_lookup(uint32_t mark, const struct in6_addr *ip_dst,
> -                       char output_netdev[],
> +                       const struct in6_addr *src_in, char output_netdev[],
>                         struct in6_addr *src, struct in6_addr *gw);
>  void ovs_router_init(void);
>  bool ovs_router_is_referenced(uint32_t table);
> diff --git a/ofproto/ofproto-dpif-sflow.c b/ofproto/ofproto-dpif-sflow.c
> index e043d7cbc..27e5955a6 100644
> --- a/ofproto/ofproto-dpif-sflow.c
> +++ b/ofproto/ofproto-dpif-sflow.c
> @@ -482,7 +482,7 @@ sflow_choose_agent_address(const char *agent_device,
>  
>              struct in6_addr gw, src = in6addr_any;
>              char name[IFNAMSIZ];
> -            if (ovs_router_lookup(0, &target_ip, name, &src, &gw)) {
> +            if (ovs_router_lookup(0, &target_ip, NULL, name, &src, &gw)) {
>                  ip = src;
>                  goto success;
>              }
> diff --git a/ofproto/ofproto-dpif-xlate.c b/ofproto/ofproto-dpif-xlate.c
> index f2db4723b..cfeca24a3 100644
> --- a/ofproto/ofproto-dpif-xlate.c
> +++ b/ofproto/ofproto-dpif-xlate.c
> @@ -3679,16 +3679,28 @@ process_special(struct xlate_ctx *ctx, const struct 
> xport *xport)
>  static int
>  tnl_route_lookup_flow(const struct xlate_ctx *ctx,
>                        const struct flow *oflow,
> -                      struct in6_addr *ip, struct in6_addr *src,
> +                      struct in6_addr *ip, struct in6_addr *out_src,
>                        struct xport **out_port)
>  {
> -    char out_dev[IFNAMSIZ];
>      struct xbridge *xbridge;
> -    struct in6_addr gw;
> +    char out_dev[IFNAMSIZ];
> +    struct in6_addr in_src;
> +    struct in6_addr *_in_src = &in_src;

We don't use underscore prefixes for internal variables.

>      struct in6_addr dst;
> +    struct in6_addr gw;
> +
> +    /* If tunnel src address is set, it overrides route lookup. */

This is a little confusing as well.  It should say "Use source address
for the route lookup, if provided."

> +    if (oflow->tunnel.ip_src) {
> +        in6_addr_set_mapped_ipv4(&in_src, oflow->tunnel.ip_src);
> +    } else if (ipv6_addr_is_set(&oflow->tunnel.ipv6_src)) {
> +        in_src = oflow->tunnel.ipv6_src;
> +    } else {
> +        _in_src = NULL;

I'd suggest to use a boolean for marking if we have the source
ip set or not and then use a ternary as a function argumant below.
It should be easier to read this way.

> +    }
>  
>      dst = flow_tnl_dst(&oflow->tunnel);
> -    if (!ovs_router_lookup(oflow->pkt_mark, &dst, out_dev, src, &gw)) {
> +    if (!ovs_router_lookup(oflow->pkt_mark, &dst, _in_src, out_dev, out_src,
> +                           &gw)) {
>          return -ENOENT;
>      }
>  
> @@ -3878,8 +3890,8 @@ native_tunnel_output(struct xlate_ctx *ctx, const 
> struct xport *xport,
>      struct ovs_action_push_tnl tnl_push_data;
>      struct xport *out_dev = NULL;
>      ovs_be32 s_ip = 0, d_ip = 0;
> -    struct in6_addr s_ip6 = in6addr_any;
> -    struct in6_addr d_ip6 = in6addr_any;
> +    struct in6_addr s_ip6;
> +    struct in6_addr d_ip6;
>      struct eth_addr smac;
>      struct eth_addr dmac;
>      int err;
> @@ -3897,12 +3909,6 @@ native_tunnel_output(struct xlate_ctx *ctx, const 
> struct xport *xport,
>      memcpy(&old_base_flow, &ctx->base_flow, sizeof old_base_flow);
>      memcpy(&old_flow, &ctx->xin->flow, sizeof old_flow);
>  
> -    if (flow->tunnel.ip_src) {
> -        in6_addr_set_mapped_ipv4(&s_ip6, flow->tunnel.ip_src);
> -    } else if (ipv6_addr_is_set(&flow->tunnel.ipv6_src)) {
> -        s_ip6 = flow->tunnel.ipv6_src;
> -    }
> -
>      err = tnl_route_lookup_flow(ctx, flow, &d_ip6, &s_ip6, &out_dev);
>      if (err) {
>          put_cloned_drop_action(ctx->xbridge->ofproto, ctx->odp_actions,
> diff --git a/tests/ovs-router.at b/tests/ovs-router.at
> index b5282fd19..5f37e6c26 100644
> --- a/tests/ovs-router.at
> +++ b/tests/ovs-router.at
> @@ -315,6 +315,28 @@ OVS_VSWITCHD_START([add-port br0 p1 -- set Interface p1 
> type=dummy])
>  AT_CHECK([ovs-appctl netdev-dummy/ip4addr br0 192.0.2.1/24], [0], [OK
>  ])
>  
> +AT_CHECK([ovs-appctl ovs/route/rule/add from=all table=15], [0], [OK
> +])
> +AT_CHECK([ovs-appctl ovs/route/add 2.2.2.3/32 br0 192.0.2.1 table=15], [0], 
> [OK
> +])
> +
> +AT_CHECK([ovs-appctl ovs/route/show table=all | sort], [0], [dnl
> +Cached: 192.0.2.0/24 dev br0 SRC 192.0.2.1
> +Cached: 192.0.2.1/32 dev br0 SRC 192.0.2.1 local
> +User: 2.2.2.3/32 dev br0 GW 192.0.2.1 SRC 192.0.2.1 table 15
> +])
> +
> +AT_CHECK([ovs-appctl ovs/route/lookup 2.2.2.3], [0], [dnl
> +src 192.0.2.1
> +gateway 192.0.2.1
> +dev br0
> +])
> +
> +AT_CHECK([ovs-appctl ovs/route/del 2.2.2.3/32 table=15], [0], [OK
> +])
> +AT_CHECK([ovs-appctl ovs/route/rule/del from=all table=15], [0], [OK
> +])
> +
>  AT_CHECK([ovs-appctl ovs/route/add 10.1.1.0/24 br0 192.0.2.2 table=11], [0], 
> [OK
>  ])
>  AT_CHECK([ovs-appctl ovs/route/add 10.2.2.0/24 br0 192.0.2.2 table=12], [0], 
> [OK

_______________________________________________
dev mailing list
[email protected]
https://mail.openvswitch.org/mailman/listinfo/ovs-dev

Reply via email to