On 10/15/20 9:46 AM, Toke Høiland-Jørgensen wrote:
> diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
> index bf5a99d803e4..980cc1363be8 100644
> --- a/include/uapi/linux/bpf.h
> +++ b/include/uapi/linux/bpf.h
> @@ -3677,15 +3677,19 @@ union bpf_attr {
>   *   Return
>   *           The id is returned or 0 in case the id could not be retrieved.
>   *
> - * long bpf_redirect_neigh(u32 ifindex, u64 flags)
> + * long bpf_redirect_neigh(u32 ifindex, struct bpf_redir_neigh *params, int 
> plen, u64 flags)

why not fold ifindex into params? with params and plen this should be
extensible later if needed.

A couple of nits below that caught me eye.


>   *   Description
>   *           Redirect the packet to another net device of index *ifindex*
>   *           and fill in L2 addresses from neighboring subsystem. This helper
>   *           is somewhat similar to **bpf_redirect**\ (), except that it
>   *           populates L2 addresses as well, meaning, internally, the helper
> - *           performs a FIB lookup based on the skb's networking header to
> - *           get the address of the next hop and then relies on the neighbor
> - *           lookup for the L2 address of the nexthop.
> + *           relies on the neighbor lookup for the L2 address of the nexthop.
> + *
> + *           The helper will perform a FIB lookup based on the skb's
> + *           networking header to get the address of the next hop, unless
> + *           this is supplied by the caller in the *params* argument. The
> + *           *plen* argument indicates the len of *params* and should be set
> + *           to 0 if *params* is NULL.
>   *
>   *           The *flags* argument is reserved and must be 0. The helper is
>   *           currently only supported for tc BPF program types, and enabled
> @@ -4906,6 +4910,17 @@ struct bpf_fib_lookup {
>       __u8    dmac[6];     /* ETH_ALEN */
>  };
>  
> +struct bpf_redir_neigh {
> +     /* network family for lookup (AF_INET, AF_INET6)
> +      */

second line for the comment is not needed.

> +     __u8    nh_family;
> +     /* network address of nexthop; skips fib lookup to find gateway */
> +     union {
> +             __be32          ipv4_nh;
> +             __u32           ipv6_nh[4];  /* in6_addr; network order */
> +     };
> +};
> +
>  enum bpf_task_fd_type {
>       BPF_FD_TYPE_RAW_TRACEPOINT,     /* tp name */
>       BPF_FD_TYPE_TRACEPOINT,         /* tp name */
> diff --git a/net/core/filter.c b/net/core/filter.c
> index c5e2a1c5fd8d..d073031a3a61 100644
> --- a/net/core/filter.c
> +++ b/net/core/filter.c
> @@ -2165,12 +2165,11 @@ static int __bpf_redirect(struct sk_buff *skb, struct 
> net_device *dev,
>  }
>  
>  #if IS_ENABLED(CONFIG_IPV6)
> -static int bpf_out_neigh_v6(struct net *net, struct sk_buff *skb)
> +static int bpf_out_neigh_v6(struct net *net, struct sk_buff *skb,
> +                         struct net_device *dev, const struct in6_addr 
> *nexthop)
>  {
> -     struct dst_entry *dst = skb_dst(skb);
> -     struct net_device *dev = dst->dev;
>       u32 hh_len = LL_RESERVED_SPACE(dev);
> -     const struct in6_addr *nexthop;
> +     struct dst_entry *dst = NULL;
>       struct neighbour *neigh;
>  
>       if (dev_xmit_recursion()) {
> @@ -2196,8 +2195,11 @@ static int bpf_out_neigh_v6(struct net *net, struct 
> sk_buff *skb)
>       }
>  
>       rcu_read_lock_bh();
> -     nexthop = rt6_nexthop(container_of(dst, struct rt6_info, dst),
> -                           &ipv6_hdr(skb)->daddr);
> +     if (!nexthop) {
> +             dst = skb_dst(skb);
> +             nexthop = rt6_nexthop(container_of(dst, struct rt6_info, dst),
> +                                   &ipv6_hdr(skb)->daddr);
> +     }
>       neigh = ip_neigh_gw6(dev, nexthop);
>       if (likely(!IS_ERR(neigh))) {
>               int ret;
> @@ -2210,36 +2212,46 @@ static int bpf_out_neigh_v6(struct net *net, struct 
> sk_buff *skb)
>               return ret;
>       }
>       rcu_read_unlock_bh();
> -     IP6_INC_STATS(dev_net(dst->dev),
> -                   ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES);
> +     if (dst)
> +             IP6_INC_STATS(dev_net(dst->dev),
> +                           ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES);
>  out_drop:
>       kfree_skb(skb);
>       return -ENETDOWN;
>  }
>  
> -static int __bpf_redirect_neigh_v6(struct sk_buff *skb, struct net_device 
> *dev)
> +static int __bpf_redirect_neigh_v6(struct sk_buff *skb, struct net_device 
> *dev,
> +                                struct bpf_nh_params *nh)
>  {
>       const struct ipv6hdr *ip6h = ipv6_hdr(skb);
> +     struct in6_addr *nexthop = NULL;
>       struct net *net = dev_net(dev);
>       int err, ret = NET_XMIT_DROP;
> -     struct dst_entry *dst;
> -     struct flowi6 fl6 = {
> -             .flowi6_flags   = FLOWI_FLAG_ANYSRC,
> -             .flowi6_mark    = skb->mark,
> -             .flowlabel      = ip6_flowinfo(ip6h),
> -             .flowi6_oif     = dev->ifindex,
> -             .flowi6_proto   = ip6h->nexthdr,
> -             .daddr          = ip6h->daddr,
> -             .saddr          = ip6h->saddr,
> -     };
>  
> -     dst = ipv6_stub->ipv6_dst_lookup_flow(net, NULL, &fl6, NULL);
> -     if (IS_ERR(dst))
> -             goto out_drop;
> +     if (!nh->nh_family) {
> +             struct dst_entry *dst;

reverse xmas tree ordering

> +             struct flowi6 fl6 = {
> +                     .flowi6_flags = FLOWI_FLAG_ANYSRC,
> +                     .flowi6_mark = skb->mark,
> +                     .flowlabel = ip6_flowinfo(ip6h),
> +                     .flowi6_oif = dev->ifindex,
> +                     .flowi6_proto = ip6h->nexthdr,
> +                     .daddr = ip6h->daddr,
> +                     .saddr = ip6h->saddr,
> +             };
> +
> +             dst = ipv6_stub->ipv6_dst_lookup_flow(net, NULL, &fl6, NULL);
> +             if (IS_ERR(dst))
> +                     goto out_drop;
>  
> -     skb_dst_set(skb, dst);
> +             skb_dst_set(skb, dst);
> +     } else if (nh->nh_family == AF_INET6) {
> +             nexthop = &nh->ipv6_nh;
> +     } else {
> +             goto out_drop;
> +     }
>  
> -     err = bpf_out_neigh_v6(net, skb);
> +     err = bpf_out_neigh_v6(net, skb, dev, nexthop);
>       if (unlikely(net_xmit_eval(err)))
>               dev->stats.tx_errors++;
>       else


Reply via email to