Re: [RFC v3 2/3] vxlan: add support for underlay in non-default VRF

2018-11-20 Thread Alexis Bauvin
Le 20 nov. 2018 à 16:25, Roopa Prabhu  a écrit :
> 
> On Tue, Nov 20, 2018 at 6:23 AM Alexis Bauvin  wrote:
>> 
>> Creating a VXLAN device with is underlay in the non-default VRF makes
>> egress route lookup fail or incorrect since it will resolve in the
>> default VRF, and ingress fail because the socket listens in the default
>> VRF.
>> 
>> This patch binds the underlying UDP tunnel socket to the l3mdev of the
>> lower device of the VXLAN device. This will listen in the proper VRF and
>> output traffic from said l3mdev, matching l3mdev routing rules and
>> looking up the correct routing table.
>> 
>> When the VXLAN device does not have a lower device, or the lower device
>> is in the default VRF, the socket will not be bound to any interface,
>> keeping the previous behaviour.
>> 
>> The underlay l3mdev is deduced from the VXLAN lower device
>> (IFLA_VXLAN_LINK).
>> 
>> The l3mdev_master_upper_ifindex_by_index function has been added to
>> l3mdev. Its goal is to fetch the effective l3mdev of an interface which
>> is not a direct slave of said l3mdev. It handles the following example,
>> properly resolving the l3mdev of eth0 to vrf-blue:
>> 
>> +--+ +-+
>> |  | | |
>> | vrf-blue | | vrf-red |
>> |  | | |
>> ++-+ +++
>> ||
>> ||
>> ++-+ +++
>> |  | | |
>> | br-blue  | | br-red  |
>> |  | | |
>> ++-+ +---+-+---+
>> |   | |
>> | +-+ +-+
>> | | |
>> ++-++--++   +++
>> |  |  lower device  |   |   | |
>> |   eth0   | <- - - - - - - | vxlan-red |   | tap-red | (... more taps)
>> |  ||   |   | |
>> +--++---+   +-+
>> 
>> Signed-off-by: Alexis Bauvin 
>> Reviewed-by: Amine Kherbouche 
>> Tested-by: Amine Kherbouche 
>> ---
>> drivers/net/vxlan.c  | 32 
>> include/net/l3mdev.h | 22 ++
>> net/l3mdev/l3mdev.c  | 18 ++
>> 3 files changed, 64 insertions(+), 8 deletions(-)
>> 
>> diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c
>> index 27bd586b94b0..a3de08122269 100644
>> --- a/drivers/net/vxlan.c
>> +++ b/drivers/net/vxlan.c
>> @@ -212,7 +212,7 @@ static inline struct vxlan_rdst 
>> *first_remote_rtnl(struct vxlan_fdb *fdb)
>>  * and enabled unshareable flags.
>>  */
>> static struct vxlan_sock *vxlan_find_sock(struct net *net, sa_family_t 
>> family,
>> - __be16 port, u32 flags)
>> + __be16 port, u32 flags, int 
>> ifindex)
>> {
>>struct vxlan_sock *vs;
>> 
>> @@ -221,7 +221,8 @@ static struct vxlan_sock *vxlan_find_sock(struct net 
>> *net, sa_family_t family,
>>hlist_for_each_entry_rcu(vs, vs_head(net, port), hlist) {
>>if (inet_sk(vs->sock->sk)->inet_sport == port &&
>>vxlan_get_sk_family(vs) == family &&
>> -   vs->flags == flags)
>> +   vs->flags == flags &&
>> +   vs->sock->sk->sk_bound_dev_if == ifindex)
>>return vs;
>>}
>>return NULL;
>> @@ -261,7 +262,7 @@ static struct vxlan_dev *vxlan_find_vni(struct net *net, 
>> int ifindex,
>> {
>>struct vxlan_sock *vs;
>> 
>> -   vs = vxlan_find_sock(net, family, port, flags);
>> +   vs = vxlan_find_sock(net, family, port, flags, ifindex);
>>if (!vs)
>>return NULL;
>> 
>> @@ -2172,6 +2173,9 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct 
>> net_device *dev,
>>struct rtable *rt;
>>__be16 df = 0;
>> 
>> +   if (!ifindex)
>> +   ifindex = sock4->sock->sk->sk_bound_dev_if;
>> +
>>rt = vxlan_get_route(vxlan, dev, sock4, skb, ifindex, tos,
>> dst->sin.sin_addr.s_addr,
>> _ip.sin.sin_addr.s_addr,
>> @@ -2210,6 +2214,9 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct 
>> net_device *dev,
>>} else {
>>struct vxlan_sock *sock6 = rcu_dereference(vxlan->vn6_sock);
>> 
>> +   if (!ifindex)
>> +   ifindex = sock6->sock->sk->sk_bound_dev_if;
>> +
>>ndst = vxlan6_get_route(vxlan, dev, sock6, skb, ifindex, tos,
>>label, >sin6.sin6_addr,
>>

Re: [RFC v3 2/3] vxlan: add support for underlay in non-default VRF

2018-11-20 Thread Alexis Bauvin
Le 20 nov. 2018 à 15:57, David Ahern  a écrit :
> 
> On 11/20/18 7:23 AM, Alexis Bauvin wrote:
>> Creating a VXLAN device with is underlay in the non-default VRF makes
>> egress route lookup fail or incorrect since it will resolve in the
>> default VRF, and ingress fail because the socket listens in the default
>> VRF.
>> 
>> This patch binds the underlying UDP tunnel socket to the l3mdev of the
>> lower device of the VXLAN device. This will listen in the proper VRF and
>> output traffic from said l3mdev, matching l3mdev routing rules and
>> looking up the correct routing table.
>> 
>> When the VXLAN device does not have a lower device, or the lower device
>> is in the default VRF, the socket will not be bound to any interface,
>> keeping the previous behaviour.
>> 
>> The underlay l3mdev is deduced from the VXLAN lower device
>> (IFLA_VXLAN_LINK).
>> 
>> The l3mdev_master_upper_ifindex_by_index function has been added to
>> l3mdev. Its goal is to fetch the effective l3mdev of an interface which
>> is not a direct slave of said l3mdev. It handles the following example,
>> properly resolving the l3mdev of eth0 to vrf-blue:
>> 
>> +--+ +-+
>> |  | | |
>> | vrf-blue | | vrf-red |
>> |  | | |
>> ++-+ +++
>> ||
>> ||
>> ++-+ +++
>> |  | | |
>> | br-blue  | | br-red  |
>> |  | | |
>> ++-+ +---+-+---+
>> |   | |
>> | +-+ +-+
>> | | |
>> ++-++--++   +++
>> |  |  lower device  |   |   | |
>> |   eth0   | <- - - - - - - | vxlan-red |   | tap-red | (... more taps)
>> |  ||   |   | |
>> +--++---+   +-+
> 
> same here. Very helpful diagram.
> 
> 
>> diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c
>> index 27bd586b94b0..a3de08122269 100644
>> --- a/drivers/net/vxlan.c
>> +++ b/drivers/net/vxlan.c
> 
> The vxlan changes look ok to me. It would be good for someone who know
> that code better than I do to review it.
> 
> Move the following l3mdev changes to a separate patch - introduce infra
> changes separate from their use:

Sure, will do in next version. Thanks!

>> diff --git a/include/net/l3mdev.h b/include/net/l3mdev.h
>> index 3832099289c5..78fa0ac4613c 100644
>> --- a/include/net/l3mdev.h
>> +++ b/include/net/l3mdev.h
>> @@ -101,6 +101,17 @@ struct net_device *l3mdev_master_dev_rcu(const struct 
>> net_device *_dev)
>>  return master;
>> }
>> 
>> +int l3mdev_master_upper_ifindex_by_index_rcu(struct net *net, int ifindex);
>> +static inline
>> +int l3mdev_master_upper_ifindex_by_index(struct net *net, int ifindex)
>> +{
>> +rcu_read_lock();
>> +ifindex = l3mdev_master_upper_ifindex_by_index_rcu(net, ifindex);
>> +rcu_read_unlock();
>> +
>> +return ifindex;
>> +}
>> +
>> u32 l3mdev_fib_table_rcu(const struct net_device *dev);
>> u32 l3mdev_fib_table_by_index(struct net *net, int ifindex);
>> static inline u32 l3mdev_fib_table(const struct net_device *dev)
>> @@ -207,6 +218,17 @@ static inline int l3mdev_master_ifindex_by_index(struct 
>> net *net, int ifindex)
>>  return 0;
>> }
>> 
>> +static inline
>> +int l3mdev_master_upper_ifindex_by_index_rcu(struct net *net, int ifindex)
>> +{
>> +return 0;
>> +}
>> +static inline
>> +int l3mdev_master_upper_ifindex_by_index(struct net *net, int ifindex)
>> +{
>> +return 0;
>> +}
>> +
>> static inline
>> struct net_device *l3mdev_master_dev_rcu(const struct net_device *dev)
>> {
>> diff --git a/net/l3mdev/l3mdev.c b/net/l3mdev/l3mdev.c
>> index 8da86ceca33d..309dee76724e 100644
>> --- a/net/l3mdev/l3mdev.c
>> +++ b/net/l3mdev/l3mdev.c
>> @@ -46,6 +46,24 @@ int l3mdev_master_ifindex_rcu(const struct net_device 
>> *dev)
>> }
>> EXPORT_SYMBOL_GPL(l3mdev_master_ifindex_rcu);
>> 
>> +/**
>> + *  l3mdev_master_upper_ifindex_by_index - get index of upper l3 master
>> + * device
>> + *  @net: network namespace for device index lookup
>> + *  @ifindex: targeted interface
>> + */
>> +int l3mdev_master_upper_ifindex_by_index_rcu(struct net *net, int ifindex)
>> +{
>> +struct net_device *dev;
>> +
>> +dev = dev_get_by_index_rcu(net, ifindex);
>> +while (dev && !netif_is_l3_master(dev))
>> +dev = netdev_master_upper_dev_get(dev);
>> +
>> +return dev ? dev->ifindex : 0;
>> +}
>> +EXPORT_SYMBOL_GPL(l3mdev_master_upper_ifindex_by_index_rcu);
>> +
>> /**
>>  *   l3mdev_fib_table - get FIB table 

Re: [RFC v3 2/3] vxlan: add support for underlay in non-default VRF

2018-11-20 Thread Roopa Prabhu
On Tue, Nov 20, 2018 at 6:23 AM Alexis Bauvin  wrote:
>
> Creating a VXLAN device with is underlay in the non-default VRF makes
> egress route lookup fail or incorrect since it will resolve in the
> default VRF, and ingress fail because the socket listens in the default
> VRF.
>
> This patch binds the underlying UDP tunnel socket to the l3mdev of the
> lower device of the VXLAN device. This will listen in the proper VRF and
> output traffic from said l3mdev, matching l3mdev routing rules and
> looking up the correct routing table.
>
> When the VXLAN device does not have a lower device, or the lower device
> is in the default VRF, the socket will not be bound to any interface,
> keeping the previous behaviour.
>
> The underlay l3mdev is deduced from the VXLAN lower device
> (IFLA_VXLAN_LINK).
>
> The l3mdev_master_upper_ifindex_by_index function has been added to
> l3mdev. Its goal is to fetch the effective l3mdev of an interface which
> is not a direct slave of said l3mdev. It handles the following example,
> properly resolving the l3mdev of eth0 to vrf-blue:
>
> +--+ +-+
> |  | | |
> | vrf-blue | | vrf-red |
> |  | | |
> ++-+ +++
>  ||
>  ||
> ++-+ +++
> |  | | |
> | br-blue  | | br-red  |
> |  | | |
> ++-+ +---+-+---+
>  |   | |
>  | +-+ +-+
>  | | |
> ++-++--++   +++
> |  |  lower device  |   |   | |
> |   eth0   | <- - - - - - - | vxlan-red |   | tap-red | (... more taps)
> |  ||   |   | |
> +--++---+   +-+
>
> Signed-off-by: Alexis Bauvin 
> Reviewed-by: Amine Kherbouche 
> Tested-by: Amine Kherbouche 
> ---
>  drivers/net/vxlan.c  | 32 
>  include/net/l3mdev.h | 22 ++
>  net/l3mdev/l3mdev.c  | 18 ++
>  3 files changed, 64 insertions(+), 8 deletions(-)
>
> diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c
> index 27bd586b94b0..a3de08122269 100644
> --- a/drivers/net/vxlan.c
> +++ b/drivers/net/vxlan.c
> @@ -212,7 +212,7 @@ static inline struct vxlan_rdst *first_remote_rtnl(struct 
> vxlan_fdb *fdb)
>   * and enabled unshareable flags.
>   */
>  static struct vxlan_sock *vxlan_find_sock(struct net *net, sa_family_t 
> family,
> - __be16 port, u32 flags)
> + __be16 port, u32 flags, int ifindex)
>  {
> struct vxlan_sock *vs;
>
> @@ -221,7 +221,8 @@ static struct vxlan_sock *vxlan_find_sock(struct net 
> *net, sa_family_t family,
> hlist_for_each_entry_rcu(vs, vs_head(net, port), hlist) {
> if (inet_sk(vs->sock->sk)->inet_sport == port &&
> vxlan_get_sk_family(vs) == family &&
> -   vs->flags == flags)
> +   vs->flags == flags &&
> +   vs->sock->sk->sk_bound_dev_if == ifindex)
> return vs;
> }
> return NULL;
> @@ -261,7 +262,7 @@ static struct vxlan_dev *vxlan_find_vni(struct net *net, 
> int ifindex,
>  {
> struct vxlan_sock *vs;
>
> -   vs = vxlan_find_sock(net, family, port, flags);
> +   vs = vxlan_find_sock(net, family, port, flags, ifindex);
> if (!vs)
> return NULL;
>
> @@ -2172,6 +2173,9 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct 
> net_device *dev,
> struct rtable *rt;
> __be16 df = 0;
>
> +   if (!ifindex)
> +   ifindex = sock4->sock->sk->sk_bound_dev_if;
> +
> rt = vxlan_get_route(vxlan, dev, sock4, skb, ifindex, tos,
>  dst->sin.sin_addr.s_addr,
>  _ip.sin.sin_addr.s_addr,
> @@ -2210,6 +2214,9 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct 
> net_device *dev,
> } else {
> struct vxlan_sock *sock6 = rcu_dereference(vxlan->vn6_sock);
>
> +   if (!ifindex)
> +   ifindex = sock6->sock->sk->sk_bound_dev_if;
> +
> ndst = vxlan6_get_route(vxlan, dev, sock6, skb, ifindex, tos,
> label, >sin6.sin6_addr,
> _ip.sin6.sin6_addr,
> @@ -2813,7 +2820,7 @@ static const struct ethtool_ops vxlan_ethtool_ops = {
>  };
>
>  static struct socket *vxlan_create_sock(struct 

Re: [RFC v3 2/3] vxlan: add support for underlay in non-default VRF

2018-11-20 Thread David Ahern
On 11/20/18 7:23 AM, Alexis Bauvin wrote:
> Creating a VXLAN device with is underlay in the non-default VRF makes
> egress route lookup fail or incorrect since it will resolve in the
> default VRF, and ingress fail because the socket listens in the default
> VRF.
> 
> This patch binds the underlying UDP tunnel socket to the l3mdev of the
> lower device of the VXLAN device. This will listen in the proper VRF and
> output traffic from said l3mdev, matching l3mdev routing rules and
> looking up the correct routing table.
> 
> When the VXLAN device does not have a lower device, or the lower device
> is in the default VRF, the socket will not be bound to any interface,
> keeping the previous behaviour.
> 
> The underlay l3mdev is deduced from the VXLAN lower device
> (IFLA_VXLAN_LINK).
> 
> The l3mdev_master_upper_ifindex_by_index function has been added to
> l3mdev. Its goal is to fetch the effective l3mdev of an interface which
> is not a direct slave of said l3mdev. It handles the following example,
> properly resolving the l3mdev of eth0 to vrf-blue:
> 
> +--+ +-+
> |  | | |
> | vrf-blue | | vrf-red |
> |  | | |
> ++-+ +++
>  ||
>  ||
> ++-+ +++
> |  | | |
> | br-blue  | | br-red  |
> |  | | |
> ++-+ +---+-+---+
>  |   | |
>  | +-+ +-+
>  | | |
> ++-++--++   +++
> |  |  lower device  |   |   | |
> |   eth0   | <- - - - - - - | vxlan-red |   | tap-red | (... more taps)
> |  ||   |   | |
> +--++---+   +-+

same here. Very helpful diagram.


> diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c
> index 27bd586b94b0..a3de08122269 100644
> --- a/drivers/net/vxlan.c
> +++ b/drivers/net/vxlan.c

The vxlan changes look ok to me. It would be good for someone who know
that code better than I do to review it.

Move the following l3mdev changes to a separate patch - introduce infra
changes separate from their use:

> diff --git a/include/net/l3mdev.h b/include/net/l3mdev.h
> index 3832099289c5..78fa0ac4613c 100644
> --- a/include/net/l3mdev.h
> +++ b/include/net/l3mdev.h
> @@ -101,6 +101,17 @@ struct net_device *l3mdev_master_dev_rcu(const struct 
> net_device *_dev)
>   return master;
>  }
>  
> +int l3mdev_master_upper_ifindex_by_index_rcu(struct net *net, int ifindex);
> +static inline
> +int l3mdev_master_upper_ifindex_by_index(struct net *net, int ifindex)
> +{
> + rcu_read_lock();
> + ifindex = l3mdev_master_upper_ifindex_by_index_rcu(net, ifindex);
> + rcu_read_unlock();
> +
> + return ifindex;
> +}
> +
>  u32 l3mdev_fib_table_rcu(const struct net_device *dev);
>  u32 l3mdev_fib_table_by_index(struct net *net, int ifindex);
>  static inline u32 l3mdev_fib_table(const struct net_device *dev)
> @@ -207,6 +218,17 @@ static inline int l3mdev_master_ifindex_by_index(struct 
> net *net, int ifindex)
>   return 0;
>  }
>  
> +static inline
> +int l3mdev_master_upper_ifindex_by_index_rcu(struct net *net, int ifindex)
> +{
> + return 0;
> +}
> +static inline
> +int l3mdev_master_upper_ifindex_by_index(struct net *net, int ifindex)
> +{
> + return 0;
> +}
> +
>  static inline
>  struct net_device *l3mdev_master_dev_rcu(const struct net_device *dev)
>  {
> diff --git a/net/l3mdev/l3mdev.c b/net/l3mdev/l3mdev.c
> index 8da86ceca33d..309dee76724e 100644
> --- a/net/l3mdev/l3mdev.c
> +++ b/net/l3mdev/l3mdev.c
> @@ -46,6 +46,24 @@ int l3mdev_master_ifindex_rcu(const struct net_device *dev)
>  }
>  EXPORT_SYMBOL_GPL(l3mdev_master_ifindex_rcu);
>  
> +/**
> + *   l3mdev_master_upper_ifindex_by_index - get index of upper l3 master
> + *  device
> + *   @net: network namespace for device index lookup
> + *   @ifindex: targeted interface
> + */
> +int l3mdev_master_upper_ifindex_by_index_rcu(struct net *net, int ifindex)
> +{
> + struct net_device *dev;
> +
> + dev = dev_get_by_index_rcu(net, ifindex);
> + while (dev && !netif_is_l3_master(dev))
> + dev = netdev_master_upper_dev_get(dev);
> +
> + return dev ? dev->ifindex : 0;
> +}
> +EXPORT_SYMBOL_GPL(l3mdev_master_upper_ifindex_by_index_rcu);
> +
>  /**
>   *   l3mdev_fib_table - get FIB table id associated with an L3
>   * master interface
> 



[RFC v3 2/3] vxlan: add support for underlay in non-default VRF

2018-11-20 Thread Alexis Bauvin
Creating a VXLAN device with is underlay in the non-default VRF makes
egress route lookup fail or incorrect since it will resolve in the
default VRF, and ingress fail because the socket listens in the default
VRF.

This patch binds the underlying UDP tunnel socket to the l3mdev of the
lower device of the VXLAN device. This will listen in the proper VRF and
output traffic from said l3mdev, matching l3mdev routing rules and
looking up the correct routing table.

When the VXLAN device does not have a lower device, or the lower device
is in the default VRF, the socket will not be bound to any interface,
keeping the previous behaviour.

The underlay l3mdev is deduced from the VXLAN lower device
(IFLA_VXLAN_LINK).

The l3mdev_master_upper_ifindex_by_index function has been added to
l3mdev. Its goal is to fetch the effective l3mdev of an interface which
is not a direct slave of said l3mdev. It handles the following example,
properly resolving the l3mdev of eth0 to vrf-blue:

+--+ +-+
|  | | |
| vrf-blue | | vrf-red |
|  | | |
++-+ +++
 ||
 ||
++-+ +++
|  | | |
| br-blue  | | br-red  |
|  | | |
++-+ +---+-+---+
 |   | |
 | +-+ +-+
 | | |
++-++--++   +++
|  |  lower device  |   |   | |
|   eth0   | <- - - - - - - | vxlan-red |   | tap-red | (... more taps)
|  ||   |   | |
+--++---+   +-+

Signed-off-by: Alexis Bauvin 
Reviewed-by: Amine Kherbouche 
Tested-by: Amine Kherbouche 
---
 drivers/net/vxlan.c  | 32 
 include/net/l3mdev.h | 22 ++
 net/l3mdev/l3mdev.c  | 18 ++
 3 files changed, 64 insertions(+), 8 deletions(-)

diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c
index 27bd586b94b0..a3de08122269 100644
--- a/drivers/net/vxlan.c
+++ b/drivers/net/vxlan.c
@@ -212,7 +212,7 @@ static inline struct vxlan_rdst *first_remote_rtnl(struct 
vxlan_fdb *fdb)
  * and enabled unshareable flags.
  */
 static struct vxlan_sock *vxlan_find_sock(struct net *net, sa_family_t family,
- __be16 port, u32 flags)
+ __be16 port, u32 flags, int ifindex)
 {
struct vxlan_sock *vs;
 
@@ -221,7 +221,8 @@ static struct vxlan_sock *vxlan_find_sock(struct net *net, 
sa_family_t family,
hlist_for_each_entry_rcu(vs, vs_head(net, port), hlist) {
if (inet_sk(vs->sock->sk)->inet_sport == port &&
vxlan_get_sk_family(vs) == family &&
-   vs->flags == flags)
+   vs->flags == flags &&
+   vs->sock->sk->sk_bound_dev_if == ifindex)
return vs;
}
return NULL;
@@ -261,7 +262,7 @@ static struct vxlan_dev *vxlan_find_vni(struct net *net, 
int ifindex,
 {
struct vxlan_sock *vs;
 
-   vs = vxlan_find_sock(net, family, port, flags);
+   vs = vxlan_find_sock(net, family, port, flags, ifindex);
if (!vs)
return NULL;
 
@@ -2172,6 +2173,9 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct 
net_device *dev,
struct rtable *rt;
__be16 df = 0;
 
+   if (!ifindex)
+   ifindex = sock4->sock->sk->sk_bound_dev_if;
+
rt = vxlan_get_route(vxlan, dev, sock4, skb, ifindex, tos,
 dst->sin.sin_addr.s_addr,
 _ip.sin.sin_addr.s_addr,
@@ -2210,6 +2214,9 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct 
net_device *dev,
} else {
struct vxlan_sock *sock6 = rcu_dereference(vxlan->vn6_sock);
 
+   if (!ifindex)
+   ifindex = sock6->sock->sk->sk_bound_dev_if;
+
ndst = vxlan6_get_route(vxlan, dev, sock6, skb, ifindex, tos,
label, >sin6.sin6_addr,
_ip.sin6.sin6_addr,
@@ -2813,7 +2820,7 @@ static const struct ethtool_ops vxlan_ethtool_ops = {
 };
 
 static struct socket *vxlan_create_sock(struct net *net, bool ipv6,
-   __be16 port, u32 flags)
+   __be16 port, u32 flags, int ifindex)
 {
struct socket *sock;
struct udp_port_cfg udp_conf;
@@ -2831,6 +2838,7 @@ static struct