This commit allows the creation of multiple VTI tunnels with the same src+dst pair, via a new VTI_KEYED flag. This makes it possible to maintain multiple IPsec tunnels to the same security gateway, with the tunnels distinguished by SPI.
The new semantics are as follows: - The output path is the same as existing VTIs. A routing lookup matches a VTI interface. The VTI uses its o_key to as the mark to select an XFRM state. The state transforms the packet. - Input works as follows: 1. Attempt to match a regular VTI by IP addresses only. If that succeeds, use the i_key as the mark to look up the xfrm state. 2. If the match failed, do an XFRM state lookup that ignores the mark. If that finds an state, then use the state match's mark to find the tunnel by its i_key. - ICMP errors are similar to input, except the search is for the outbound XFRM state, because the only data that is available is the outbound SPI. Thus, ICMP errors are only processed if the ikey is the same as the same as the okey. AFAICS this is consistent with GRE tunnels, but not with existing VTI behaviour. Tested: https://android-review.googlesource.com/571524 Signed-off-by: Lorenzo Colitti <lore...@google.com> --- include/uapi/linux/if_tunnel.h | 3 ++ net/ipv4/ip_vti.c | 75 +++++++++++++++++++++++-------- net/ipv6/ip6_vti.c | 100 +++++++++++++++++++++++++++++++---------- 3 files changed, 136 insertions(+), 42 deletions(-) diff --git a/include/uapi/linux/if_tunnel.h b/include/uapi/linux/if_tunnel.h index 1b3d148c4560..c2ec509cbc9e 100644 --- a/include/uapi/linux/if_tunnel.h +++ b/include/uapi/linux/if_tunnel.h @@ -147,6 +147,8 @@ enum { /* VTI-mode i_flags */ #define VTI_ISVTI ((__force __be16)0x0001) +#define VTI_KEYED ((__force __be16)0x0002) +#define VTI_IFLAG_MASK ((__force __be16)0x0003) enum { IFLA_VTI_UNSPEC, @@ -156,6 +158,7 @@ enum { IFLA_VTI_LOCAL, IFLA_VTI_REMOTE, IFLA_VTI_FWMARK, + IFLA_VTI_IFLAGS, __IFLA_VTI_MAX, }; diff --git a/net/ipv4/ip_vti.c b/net/ipv4/ip_vti.c index 850625598187..f5793782c418 100644 --- a/net/ipv4/ip_vti.c +++ b/net/ipv4/ip_vti.c @@ -63,6 +63,17 @@ vti4_find_tunnel(struct sk_buff *skb, __be32 spi, struct xfrm_state **x) *x = xfrm_state_lookup(net, be32_to_cpu(tunnel->parms.i_key), (xfrm_address_t *)&iph->daddr, spi, iph->protocol, AF_INET); + } else { + *x = xfrm_state_lookup_loose(net, skb->mark, + (xfrm_address_t *) &iph->daddr, + spi, iph->protocol, AF_INET); + if (!*x) + return NULL; + tunnel = ip_tunnel_lookup(itn, skb->dev->ifindex, TUNNEL_KEY, + iph->saddr, iph->daddr, + cpu_to_be32((*x)->mark.v)); + if (!tunnel) + xfrm_state_put(*x); } return tunnel; @@ -302,7 +313,6 @@ static netdev_tx_t vti_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) static int vti4_err(struct sk_buff *skb, u32 info) { __be32 spi; - __u32 mark; struct xfrm_state *x; struct ip_tunnel *tunnel; struct ip_esp_hdr *esph; @@ -313,13 +323,6 @@ static int vti4_err(struct sk_buff *skb, u32 info) int protocol = iph->protocol; struct ip_tunnel_net *itn = net_generic(net, vti_net_id); - tunnel = ip_tunnel_lookup(itn, skb->dev->ifindex, TUNNEL_NO_KEY, - iph->daddr, iph->saddr, 0); - if (!tunnel) - return -1; - - mark = be32_to_cpu(tunnel->parms.o_key); - switch (protocol) { case IPPROTO_ESP: esph = (struct ip_esp_hdr *)(skb->data+(iph->ihl<<2)); @@ -347,18 +350,46 @@ static int vti4_err(struct sk_buff *skb, u32 info) return 0; } - x = xfrm_state_lookup(net, mark, (const xfrm_address_t *)&iph->daddr, - spi, protocol, AF_INET); - if (!x) - return 0; + tunnel = ip_tunnel_lookup(itn, skb->dev->ifindex, TUNNEL_NO_KEY, + iph->daddr, iph->saddr, 0); + if (tunnel) { + x = xfrm_state_lookup(net, be32_to_cpu(tunnel->parms.o_key), + (xfrm_address_t *)&iph->daddr, + spi, iph->protocol, AF_INET); + } else { + x = xfrm_state_lookup_loose(net, skb->mark, + (xfrm_address_t *)&iph->daddr, + spi, iph->protocol, AF_INET); + if (!x) + goto out; + tunnel = ip_tunnel_lookup(itn, skb->dev->ifindex, TUNNEL_KEY, + iph->daddr, iph->saddr, + cpu_to_be32(x->mark.v)); + } + + if (!tunnel || !x) + goto out; if (icmp_hdr(skb)->type == ICMP_DEST_UNREACH) ipv4_update_pmtu(skb, net, info, 0, 0, protocol, 0); else ipv4_redirect(skb, net, 0, 0, protocol, 0); - xfrm_state_put(x); - return 0; +out: + if (x) + xfrm_state_put(x); + + return tunnel ? 0 : -1; +} + +static __be16 vti_flags_to_tnl_flags(__be16 i_flags) +{ + return VTI_ISVTI | ((i_flags & VTI_KEYED) ? GRE_KEY : 0); +} + +static __be16 tnl_flags_to_vti_flags(__be16 i_flags) +{ + return VTI_ISVTI | ((i_flags & GRE_KEY) ? VTI_KEYED : 0); } static int @@ -381,7 +412,7 @@ vti_tunnel_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) if (!(p.o_flags & GRE_KEY)) p.o_key = 0; - p.i_flags = VTI_ISVTI; + p.i_flags = vti_flags_to_tnl_flags(p.i_flags); err = ip_tunnel_ioctl(dev, &p, cmd); if (err) @@ -508,8 +539,6 @@ static void vti_netlink_parms(struct nlattr *data[], if (!data) return; - parms->i_flags = VTI_ISVTI; - if (data[IFLA_VTI_LINK]) parms->link = nla_get_u32(data[IFLA_VTI_LINK]); @@ -527,6 +556,11 @@ static void vti_netlink_parms(struct nlattr *data[], if (data[IFLA_VTI_FWMARK]) *fwmark = nla_get_u32(data[IFLA_VTI_FWMARK]); + + if (data[IFLA_VTI_IFLAGS]) + parms->i_flags = nla_get_be16(data[IFLA_VTI_IFLAGS]); + + parms->i_flags = vti_flags_to_tnl_flags(parms->i_flags); } static int vti_newlink(struct net *src_net, struct net_device *dev, @@ -567,6 +601,8 @@ static size_t vti_get_size(const struct net_device *dev) nla_total_size(4) + /* IFLA_VTI_FWMARK */ nla_total_size(4) + + /* IFLA_VTI_IFLAGS */ + nla_total_size(2) + 0; } @@ -580,7 +616,9 @@ static int vti_fill_info(struct sk_buff *skb, const struct net_device *dev) nla_put_be32(skb, IFLA_VTI_OKEY, p->o_key) || nla_put_in_addr(skb, IFLA_VTI_LOCAL, p->iph.saddr) || nla_put_in_addr(skb, IFLA_VTI_REMOTE, p->iph.daddr) || - nla_put_u32(skb, IFLA_VTI_FWMARK, t->fwmark)) + nla_put_u32(skb, IFLA_VTI_FWMARK, t->fwmark) || + nla_put_be16(skb, IFLA_VTI_IFLAGS, + tnl_flags_to_vti_flags(p->i_flags))) return -EMSGSIZE; return 0; @@ -593,6 +631,7 @@ static const struct nla_policy vti_policy[IFLA_VTI_MAX + 1] = { [IFLA_VTI_LOCAL] = { .len = FIELD_SIZEOF(struct iphdr, saddr) }, [IFLA_VTI_REMOTE] = { .len = FIELD_SIZEOF(struct iphdr, daddr) }, [IFLA_VTI_FWMARK] = { .type = NLA_U32 }, + [IFLA_VTI_IFLAGS] = { .type = NLA_U16 }, }; static struct rtnl_link_ops vti_link_ops __read_mostly = { diff --git a/net/ipv6/ip6_vti.c b/net/ipv6/ip6_vti.c index d0676f2f99eb..3797738c828f 100644 --- a/net/ipv6/ip6_vti.c +++ b/net/ipv6/ip6_vti.c @@ -54,9 +54,10 @@ #define IP6_VTI_HASH_SIZE_SHIFT 5 #define IP6_VTI_HASH_SIZE (1 << IP6_VTI_HASH_SIZE_SHIFT) -static u32 HASH(const struct in6_addr *addr1, const struct in6_addr *addr2) +static u32 HASH(const struct in6_addr *addr1, const struct in6_addr *addr2, + __be32 i_key) { - u32 hash = ipv6_addr_hash(addr1) ^ ipv6_addr_hash(addr2); + u32 hash = ipv6_addr_hash(addr1) ^ ipv6_addr_hash(addr2) ^ i_key; return hash_32(hash, IP6_VTI_HASH_SIZE_SHIFT); } @@ -78,11 +79,17 @@ struct vti6_net { #define for_each_vti6_tunnel_rcu(start) \ for (t = rcu_dereference(start); t; t = rcu_dereference(t->next)) +static __be32 vti6_get_hash_key(const struct __ip6_tnl_parm *p) +{ + return (p->i_flags & GRE_KEY) ? p->i_key : 0; +} + /** - * vti6_tnl_lookup - fetch tunnel matching the end-point addresses + * vti6_tnl_lookup - fetch tunnel matching the end-point addresses and i_key * @net: network namespace * @remote: the address of the tunnel exit-point * @local: the address of the tunnel entry-point + * @local: the i_key of the tunnel * * Return: * tunnel matching given end-points if found, @@ -91,9 +98,9 @@ struct vti6_net { **/ static struct ip6_tnl * vti6_tnl_lookup(struct net *net, const struct in6_addr *remote, - const struct in6_addr *local) + const struct in6_addr *local, __be32 i_key) { - unsigned int hash = HASH(remote, local); + unsigned int hash = HASH(remote, local, i_key); struct ip6_tnl *t; struct vti6_net *ip6n = net_generic(net, vti6_net_id); struct in6_addr any; @@ -101,21 +108,24 @@ vti6_tnl_lookup(struct net *net, const struct in6_addr *remote, for_each_vti6_tunnel_rcu(ip6n->tnls_r_l[hash]) { if (ipv6_addr_equal(local, &t->parms.laddr) && ipv6_addr_equal(remote, &t->parms.raddr) && + vti6_get_hash_key(&t->parms) == i_key && (t->dev->flags & IFF_UP)) return t; } memset(&any, 0, sizeof(any)); - hash = HASH(&any, local); + hash = HASH(&any, local, i_key); for_each_vti6_tunnel_rcu(ip6n->tnls_r_l[hash]) { if (ipv6_addr_equal(local, &t->parms.laddr) && + vti6_get_hash_key(&t->parms) == i_key && (t->dev->flags & IFF_UP)) return t; } - hash = HASH(remote, &any); + hash = HASH(remote, &any, i_key); for_each_vti6_tunnel_rcu(ip6n->tnls_r_l[hash]) { if (ipv6_addr_equal(remote, &t->parms.raddr) && + vti6_get_hash_key(&t->parms) == i_key && (t->dev->flags & IFF_UP)) return t; } @@ -147,7 +157,7 @@ vti6_tnl_bucket(struct vti6_net *ip6n, const struct __ip6_tnl_parm *p) if (!ipv6_addr_any(remote) || !ipv6_addr_any(local)) { prio = 1; - h = HASH(remote, local); + h = HASH(remote, local, vti6_get_hash_key(p)); } return &ip6n->tnls[prio][h]; } @@ -266,7 +276,8 @@ static struct ip6_tnl *vti6_locate(struct net *net, struct __ip6_tnl_parm *p, (t = rtnl_dereference(*tp)) != NULL; tp = &t->next) { if (ipv6_addr_equal(local, &t->parms.laddr) && - ipv6_addr_equal(remote, &t->parms.raddr)) { + ipv6_addr_equal(remote, &t->parms.raddr) && + vti6_get_hash_key(&t->parms) == vti6_get_hash_key(p)) { if (create) return NULL; @@ -304,11 +315,21 @@ vti6_find_tunnel(struct sk_buff *skb, __be32 spi, struct xfrm_state **x) struct net *net = dev_net(skb->dev); struct ip6_tnl *t; - t = vti6_tnl_lookup(net, &ipv6h->saddr, &ipv6h->daddr); + t = vti6_tnl_lookup(net, &ipv6h->saddr, &ipv6h->daddr, 0); if (t) { *x = xfrm_state_lookup(net, be32_to_cpu(t->parms.i_key), (xfrm_address_t *)&ipv6h->daddr, spi, ipv6h->nexthdr, AF_INET6); + } else { + *x = xfrm_state_lookup_loose(net, skb->mark, + (xfrm_address_t *) &ipv6h->daddr, + spi, ipv6h->nexthdr, AF_INET6); + if (!*x) + return NULL; + t = vti6_tnl_lookup(net, &ipv6h->saddr, &ipv6h->daddr, + cpu_to_be32((*x)->mark.v)); + if (!t) + xfrm_state_put(*x); } return t; @@ -613,7 +634,6 @@ static int vti6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, u8 type, u8 code, int offset, __be32 info) { __be32 spi; - __u32 mark; struct xfrm_state *x; struct ip6_tnl *t; struct ip_esp_hdr *esph; @@ -623,12 +643,6 @@ static int vti6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, const struct ipv6hdr *iph = (const struct ipv6hdr *)skb->data; int protocol = iph->nexthdr; - t = vti6_tnl_lookup(dev_net(skb->dev), &iph->daddr, &iph->saddr); - if (!t) - return -1; - - mark = be32_to_cpu(t->parms.o_key); - switch (protocol) { case IPPROTO_ESP: esph = (struct ip_esp_hdr *)(skb->data + offset); @@ -650,19 +664,35 @@ static int vti6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, type != NDISC_REDIRECT) return 0; - x = xfrm_state_lookup(net, mark, (const xfrm_address_t *)&iph->daddr, - spi, protocol, AF_INET6); - if (!x) - return 0; + t = vti6_tnl_lookup(net, &iph->daddr, &iph->saddr, 0); + if (t) { + x = xfrm_state_lookup(net, be32_to_cpu(t->parms.o_key), + (xfrm_address_t *)&iph->daddr, + spi, protocol, AF_INET6); + } else { + x = xfrm_state_lookup_loose(net, skb->mark, + (xfrm_address_t *) &iph->daddr, + spi, protocol, AF_INET6); + if (!x) + goto out; + t = vti6_tnl_lookup(net, &iph->daddr, &iph->saddr, + cpu_to_be32(x->mark.v)); + } + + if (!t || !x) + goto out; if (type == NDISC_REDIRECT) ip6_redirect(skb, net, skb->dev->ifindex, 0, sock_net_uid(net, NULL)); else ip6_update_pmtu(skb, net, info, 0, 0, sock_net_uid(net, NULL)); - xfrm_state_put(x); - return 0; +out: + if (x) + xfrm_state_put(x); + + return t ? 0 : -1; } static void vti6_link_config(struct ip6_tnl *t) @@ -957,9 +987,21 @@ static int vti6_validate(struct nlattr *tb[], struct nlattr *data[], return 0; } +static __be16 vti_flags_to_tnl_flags(__be16 i_flags) +{ + return VTI_ISVTI | ((i_flags & VTI_KEYED) ? GRE_KEY : 0); +} + +static __be16 tnl_flags_to_vti_flags(__be16 i_flags) +{ + return VTI_ISVTI | ((i_flags & GRE_KEY) ? VTI_KEYED : 0); +} + static void vti6_netlink_parms(struct nlattr *data[], struct __ip6_tnl_parm *parms) { + __be16 i_flags = 0; + memset(parms, 0, sizeof(*parms)); if (!data) @@ -982,6 +1024,11 @@ static void vti6_netlink_parms(struct nlattr *data[], if (data[IFLA_VTI_FWMARK]) parms->fwmark = nla_get_u32(data[IFLA_VTI_FWMARK]); + + if (data[IFLA_VTI_IFLAGS]) + i_flags = nla_get_be16(data[IFLA_VTI_IFLAGS]); + + parms->i_flags = vti_flags_to_tnl_flags(i_flags); } static int vti6_newlink(struct net *src_net, struct net_device *dev, @@ -1051,6 +1098,8 @@ static size_t vti6_get_size(const struct net_device *dev) nla_total_size(4) + /* IFLA_VTI_FWMARK */ nla_total_size(4) + + /* IFLA_VTI_IFLAGS */ + nla_total_size(2) + 0; } @@ -1064,7 +1113,9 @@ static int vti6_fill_info(struct sk_buff *skb, const struct net_device *dev) nla_put_in6_addr(skb, IFLA_VTI_REMOTE, &parm->raddr) || nla_put_be32(skb, IFLA_VTI_IKEY, parm->i_key) || nla_put_be32(skb, IFLA_VTI_OKEY, parm->o_key) || - nla_put_u32(skb, IFLA_VTI_FWMARK, parm->fwmark)) + nla_put_u32(skb, IFLA_VTI_FWMARK, parm->fwmark) || + nla_put_be16(skb, IFLA_VTI_IFLAGS, + tnl_flags_to_vti_flags(parm->i_flags))) goto nla_put_failure; return 0; @@ -1079,6 +1130,7 @@ static const struct nla_policy vti6_policy[IFLA_VTI_MAX + 1] = { [IFLA_VTI_IKEY] = { .type = NLA_U32 }, [IFLA_VTI_OKEY] = { .type = NLA_U32 }, [IFLA_VTI_FWMARK] = { .type = NLA_U32 }, + [IFLA_VTI_IFLAGS] = { .type = NLA_U16 }, }; static struct rtnl_link_ops vti6_link_ops __read_mostly = { -- 2.15.1.504.g5279b80103-goog