Le 20 nov. 2018 à 16:04, David Ahern <d...@cumulusnetworks.com> a écrit : > > On 11/20/18 7:23 AM, Alexis Bauvin wrote: >> When underlay VRF changes, either because the lower device itself changed, >> or its VRF changed, this patch releases the current socket of the VXLAN >> device and recreates another one in the right VRF. This allows for >> on-the-fly change of the underlay VRF of a VXLAN device. >> >> Signed-off-by: Alexis Bauvin <abau...@scaleway.com> >> Reviewed-by: Amine Kherbouche <akherbou...@scaleway.com> >> Tested-by: Amine Kherbouche <akherbou...@scaleway.com> >> --- >> drivers/net/vxlan.c | 94 +++++++++++++++++++++++++++++++++++++++++++++ >> 1 file changed, 94 insertions(+) >> >> diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c >> index a3de08122269..1e6ccad6df6a 100644 >> --- a/drivers/net/vxlan.c >> +++ b/drivers/net/vxlan.c >> @@ -208,6 +208,18 @@ static inline struct vxlan_rdst >> *first_remote_rtnl(struct vxlan_fdb *fdb) >> return list_first_entry(&fdb->remotes, struct vxlan_rdst, list); >> } >> >> +static int vxlan_is_in_l3mdev_chain(struct net_device *chain, >> + struct net_device *dev) >> +{ >> + if (!chain) >> + return 0; >> + >> + if (chain->ifindex == dev->ifindex) >> + return 1; >> + return vxlan_is_in_l3mdev_chain(netdev_master_upper_dev_get(chain), >> + dev); >> +} > > This should return bool and true/false.
Will change in v4. > Also, why l3mdev in the name? None of the checks look at whether it is > an l3mdev master. The original intention was to detect if, through a master change, the l3mdev of the lower device changed. However, it is right it is much more generic than that. It will tell if dev is a master (direct or indirect) of chain. A rename to vxlan_is_upper_master would be preferred, maybe even move the function to net/core/dev.c, with other master-related functions. What do you think? > And again here, someone more familiar with the vxlan code should review it. > >> + >> /* Find VXLAN socket based on network namespace, address family and UDP port >> * and enabled unshareable flags. >> */ >> @@ -3720,6 +3732,33 @@ struct net_device *vxlan_dev_create(struct net *net, >> const char *name, >> } >> EXPORT_SYMBOL_GPL(vxlan_dev_create); >> >> +static int vxlan_reopen(struct vxlan_net *vn, struct vxlan_dev *vxlan) >> +{ >> + int ret = 0; >> + >> + if (vxlan_addr_multicast(&vxlan->default_dst.remote_ip) && >> + !vxlan_group_used(vn, vxlan)) >> + ret = vxlan_igmp_leave(vxlan); >> + vxlan_sock_release(vxlan); >> + >> + if (ret < 0) >> + return ret; >> + >> + ret = vxlan_sock_add(vxlan); >> + if (ret < 0) >> + return ret; >> + >> + if (vxlan_addr_multicast(&vxlan->default_dst.remote_ip)) { >> + ret = vxlan_igmp_join(vxlan); >> + if (ret == -EADDRINUSE) >> + ret = 0; >> + if (ret) >> + vxlan_sock_release(vxlan); >> + } >> + >> + return ret; >> +} >> + >> static void vxlan_handle_lowerdev_unregister(struct vxlan_net *vn, >> struct net_device *dev) >> { >> @@ -3742,6 +3781,55 @@ static void vxlan_handle_lowerdev_unregister(struct >> vxlan_net *vn, >> unregister_netdevice_many(&list_kill); >> } >> >> +static void vxlan_handle_change_upper(struct vxlan_net *vn, >> + struct net_device *dev) >> +{ >> + struct vxlan_dev *vxlan, *next; >> + >> + list_for_each_entry_safe(vxlan, next, &vn->vxlan_list, next) { >> + struct net_device *lower; >> + int err; >> + >> + lower = __dev_get_by_index(vxlan->net, >> + vxlan->cfg.remote_ifindex); >> + if (!vxlan_is_in_l3mdev_chain(lower, dev)) >> + continue; >> + >> + err = vxlan_reopen(vn, vxlan); >> + if (err < 0) >> + netdev_err(vxlan->dev, "Failed to reopen socket: %d\n", >> + err); >> + } >> +} >> + >> +static void vxlan_handle_change(struct vxlan_net *vn, struct net_device >> *dev) >> +{ >> + struct vxlan_dev *vxlan = netdev_priv(dev); >> + struct vxlan_sock *sock; >> + int l3mdev_index; >> + >> +#if IS_ENABLED(CONFIG_IPV6) >> + bool metadata = vxlan->cfg.flags & VXLAN_F_COLLECT_METADATA; >> + bool ipv6 = vxlan->cfg.flags & VXLAN_F_IPV6 || metadata; >> + >> + sock = ipv6 ? rcu_dereference(vxlan->vn6_sock) >> + : rcu_dereference(vxlan->vn4_sock); >> +#else >> + sock = rcu_dereference(vxlan->vn4_sock); >> +#endif >> + >> + l3mdev_index = >> + l3mdev_master_upper_ifindex_by_index(vxlan->net, >> + vxlan->cfg.remote_ifindex); >> + if (sock->sock->sk->sk_bound_dev_if != l3mdev_index) { >> + int ret = vxlan_reopen(vn, vxlan); >> + >> + if (ret < 0) >> + netdev_err(vxlan->dev, "Failed to reopen socket: %d\n", >> + ret); >> + } >> +} >> + >> static int vxlan_netdevice_event(struct notifier_block *unused, >> unsigned long event, void *ptr) >> { >> @@ -3756,6 +3844,12 @@ static int vxlan_netdevice_event(struct >> notifier_block *unused, >> } else if (event == NETDEV_UDP_TUNNEL_PUSH_INFO || >> event == NETDEV_UDP_TUNNEL_DROP_INFO) { >> vxlan_offload_rx_ports(dev, event == >> NETDEV_UDP_TUNNEL_PUSH_INFO); >> + } else if (event == NETDEV_CHANGEUPPER) { >> + vxlan_handle_change_upper(vn, dev); >> + } else if (event == NETDEV_CHANGE) { >> + if (dev->rtnl_link_ops && >> + !strcmp(dev->rtnl_link_ops->kind, vxlan_link_ops.kind)) >> + vxlan_handle_change(vn, dev); >> } >> >> return NOTIFY_DONE;