Signed-off-by: Yifeng Sun <pkusunyif...@gmail.com>
---
.travis.yml | 2 +
NEWS | 2 +
acinclude.m4 | 20 ++++-
datapath/conntrack.c | 72 ++++++++++++++++-
.../include/net/netfilter/nf_conntrack_core.h | 6 ++
.../net/netfilter/nf_conntrack_count.h | 2 +
datapath/linux/compat/nf_conncount.c | 6 +-
datapath/linux/compat/nf_conntrack_core.c | 80 +++++++++++++++++++
datapath/linux/compat/nf_conntrack_proto.c | 3 +
9 files changed, 186 insertions(+), 7 deletions(-)
diff --git a/.travis.yml b/.travis.yml
index 32d5f1918..dc7a20b6e 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -35,6 +35,8 @@ env:
- KERNEL=3.16.54 TESTSUITE=1 DPDK=1
- KERNEL=3.16.54 DPDK_SHARED=1
- KERNEL=3.16.54 DPDK_SHARED=1 OPTS="--enable-shared"
+ - KERNEL=4.20.16
+ - KERNEL=4.19.29
- KERNEL=4.18.20
- KERNEL=4.17.19
- KERNEL=4.16.18
diff --git a/NEWS b/NEWS
index 1e4744dbd..af5b5222f 100644
--- a/NEWS
+++ b/NEWS
@@ -25,6 +25,8 @@ Post-v2.11.0
- OVN:
* Select IPAM mac_prefix in a random manner if not provided by the user
- New QoS type "linux-netem" on Linux.
+ - Linux datapath:
+ * Support for the kernel versions 4.19.x, 4.20.x.
v2.11.0 - 19 Feb 2019
---------------------
diff --git a/acinclude.m4 b/acinclude.m4
index 3cd6ea730..7b11b7740 100644
--- a/acinclude.m4
+++ b/acinclude.m4
@@ -151,10 +151,10 @@ AC_DEFUN([OVS_CHECK_LINUX], [
AC_MSG_RESULT([$kversion])
if test "$version" -ge 4; then
- if test "$version" = 4 && test "$patchlevel" -le 18; then
+ if test "$version" = 4 && test "$patchlevel" -le 20; then
: # Linux 4.x
else
- AC_ERROR([Linux kernel in $KBUILD is version $kversion, but version
newer than 4.18.x is not supported (please refer to the FAQ for advice)])
+ AC_ERROR([Linux kernel in $KBUILD is version $kversion, but version
newer than 4.20.x is not supported (please refer to the FAQ for advice)])
fi
elif test "$version" = 3 && test "$patchlevel" -ge 10; then
: # Linux 3.x
@@ -583,6 +583,22 @@ AC_DEFUN([OVS_CHECK_LINUX_COMPAT], [
[OVS_DEFINE([HAVE_VOID_INET_FRAGS_INIT])])
OVS_GREP_IFELSE([$KSRC/include/net/inetpeer.h], [vif],
[OVS_DEFINE([HAVE_INETPEER_VIF_SUPPORT])])
+ OVS_FIND_PARAM_IFELSE([$KSRC/include/net/netfilter/nf_conntrack_helper.h],
+ [nf_ct_helper_ext_add], [nf_conntrack_helper],
+ [OVS_DEFINE([HAVE_NF_CT_HELPER_EXT_ADD_TAKES_HELPER])])
+ OVS_FIND_PARAM_IFELSE([$KSRC/include/net/netfilter/nf_conntrack_core.h],
+ [nf_ct_invert_tuple], [l3proto],
+ [OVS_DEFINE([HAVE_NF_CT_INVERT_TUPLE_TAKES_L3PROTO])])
+ OVS_GREP_IFELSE([$KSRC/include/net/netfilter/nf_conntrack_core.h],
[nf_ct_get_tuple],
+ [OVS_DEFINE([HAVE_NF_CT_GET_TUPLE])])
+ OVS_FIND_PARAM_IFELSE([$KSRC/include/net/netfilter/nf_conntrack_core.h],
+ [nf_conntrack_in], [net],
+ [OVS_DEFINE([HAVE_NF_CONNTRACK_IN_TAKES_NET])])
+ OVS_GREP_IFELSE([$KSRC/include/net/ipv6_frag.h], [IP6_DEFRAG_CONNTRACK_IN],
+ [OVS_DEFINE([HAVE_IPV6_FRAG_H])])
+ OVS_FIND_PARAM_IFELSE([$KSRC/include/net/netfilter/nf_conntrack_l4proto.h],
+ [__nf_ct_l4proto_find], [l3proto],
+ [OVS_DEFINE([HAVE_NF_CT_L4PROTO_FIND_TAKES_L3PROTO])])
dnl Check for dst_cache and ipv6 lable to use backported tunnel infrastructure.
dnl OVS does not really need ipv6 label field, but its presence signifies
that
diff --git a/datapath/conntrack.c b/datapath/conntrack.c
index a7dc9e0c3..b9b79e2cc 100644
--- a/datapath/conntrack.c
+++ b/datapath/conntrack.c
@@ -38,6 +38,10 @@
#include <net/netfilter/nf_nat_l3proto.h>
#endif
+#if IS_ENABLED(CONFIG_NF_DEFRAG_IPV6) && defined(HAVE_IPV6_FRAG_H)
+#include <net/ipv6_frag.h>
+#endif
+
#include "datapath.h"
#include "conntrack.h"
#include "flow.h"
@@ -645,32 +649,62 @@ static struct nf_conn *
ovs_ct_find_existing(struct net *net, const struct nf_conntrack_zone *zone,
u8 l3num, struct sk_buff *skb, bool natted)
{
- const struct nf_conntrack_l3proto *l3proto;
const struct nf_conntrack_l4proto *l4proto;
struct nf_conntrack_tuple tuple;
struct nf_conntrack_tuple_hash *h;
struct nf_conn *ct;
- unsigned int dataoff;
u8 protonum;
+#ifdef HAVE_NF_CT_INVERT_TUPLE_TAKES_L3PROTO
+ const struct nf_conntrack_l3proto *l3proto;
+ unsigned int dataoff;
+
l3proto = __nf_ct_l3proto_find(l3num);
if (l3proto->get_l4proto(skb, skb_network_offset(skb), &dataoff,
&protonum) <= 0) {
pr_debug("ovs_ct_find_existing: Can't get protonum\n");
return NULL;
}
+#else
+ int protooff;
+
+ protooff = get_l4proto(skb, skb_network_offset(skb),
+ l3num, &protonum);
+ if (protooff <= 0) {
+ pr_warn("ovs_ct_find_existing: Can't get protonum\n");
+ return NULL;
+ }
+#endif
+
+#ifdef HAVE_NF_CT_L4PROTO_FIND_TAKES_L3PROTO
l4proto = __nf_ct_l4proto_find(l3num, protonum);
- if (!nf_ct_get_tuple(skb, skb_network_offset(skb), dataoff, l3num,
- protonum, net, &tuple, l3proto, l4proto)) {
+#else
+ l4proto = __nf_ct_l4proto_find(protonum);
+#endif
+
+#ifdef HAVE_NF_CT_GET_TUPLE
+ if (!nf_ct_get_tuplepr(skb, skb_network_offset(skb),
+ l3num, net, &tuple)) {
+ pr_debug("ovs_ct_find_existing: Can't get tuple\n");
+ return NULL;
+ }
+#else
+ if (!nf_ct_get_tuplepr(skb, skb_network_offset(skb),
+ l3num, net, &tuple)) {
pr_debug("ovs_ct_find_existing: Can't get tuple\n");
return NULL;
}
+#endif
/* Must invert the tuple if skb has been transformed by NAT. */
if (natted) {
struct nf_conntrack_tuple inverse;
+#ifdef HAVE_NF_CT_INVERT_TUPLE_TAKES_L3PROTO
if (!nf_ct_invert_tuple(&inverse, &tuple, l3proto, l4proto)) {
+#else
+ if (!nf_ct_invert_tuple(&inverse, &tuple, l4proto)) {
+#endif
pr_debug("ovs_ct_find_existing: Inversion failed!\n");
return NULL;
}
@@ -989,6 +1023,9 @@ static int __ovs_ct_lookup(struct net *net, struct
sw_flow_key *key,
if (!cached) {
struct nf_conn *tmpl = info->ct;
int err;
+#ifndef HAVE_NF_CONNTRACK_IN_TAKES_NET
+ struct nf_hook_state state = {};
+#endif
/* Associate skb with specified zone. */
if (tmpl) {
@@ -998,8 +1035,15 @@ static int __ovs_ct_lookup(struct net *net, struct
sw_flow_key *key,
nf_ct_set(skb, tmpl, IP_CT_NEW);
}
+#ifdef HAVE_NF_CONNTRACK_IN_TAKES_NET
err = nf_conntrack_in(net, info->family,
NF_INET_PRE_ROUTING, skb);
+#else
+ state.hook = NF_INET_PRE_ROUTING,
+ state.pf = info->family,
+ state.net = net,
+ err = nf_conntrack_in(skb, &state);
+#endif
if (err != NF_ACCEPT)
return -ENOENT;
@@ -1307,9 +1351,17 @@ int ovs_ct_execute(struct net *net, struct sk_buff *skb,
{
int nh_ofs;
int err;
+ /* From kernel 4.19.0+, Function handle_fragments may shrink skb's
+ * headroom, which will result in loss of ethernet header data.
+ * This buf is used to backup the header data before calling
+ * handle_fragments. */
+ char buf[32];
/* The conntrack module expects to be working at L3. */
nh_ofs = skb_network_offset(skb);
+ if (nh_ofs > sizeof(buf))
+ return -EINVAL;
+ memcpy(buf, skb->data, nh_ofs);
skb_pull_rcsum(skb, nh_ofs);
err = ovs_skb_network_trim(skb);
@@ -1326,8 +1378,16 @@ int ovs_ct_execute(struct net *net, struct sk_buff *skb,
err = ovs_ct_commit(net, key, info, skb);
else
err = ovs_ct_lookup(net, key, info, skb);
+ if (err)
+ return err;
+ if (skb_headroom(skb) < nh_ofs) {
+ err = pskb_expand_head(skb, nh_ofs, 0, GFP_ATOMIC);
+ if (err)
+ return err;
+ }
skb_push(skb, nh_ofs);
+ memcpy(skb->data, buf, nh_ofs);
skb_postpush_rcsum(skb, skb->data, nh_ofs);
if (err)
kfree_skb(skb);
@@ -1362,7 +1422,11 @@ static int ovs_ct_add_helper(struct ovs_conntrack_info
*info, const char *name,
return -EINVAL;
}
+#ifdef HAVE_NF_CT_HELPER_EXT_ADD_TAKES_HELPER
help = nf_ct_helper_ext_add(info->ct, helper, GFP_KERNEL);
+#else
+ help = nf_ct_helper_ext_add(info->ct, GFP_KERNEL);
+#endif
if (!help) {
nf_conntrack_helper_put(helper);
return -ENOMEM;
diff --git a/datapath/linux/compat/include/net/netfilter/nf_conntrack_core.h
b/datapath/linux/compat/include/net/netfilter/nf_conntrack_core.h
index 7834c8c25..7fca7dc55 100644
--- a/datapath/linux/compat/include/net/netfilter/nf_conntrack_core.h
+++ b/datapath/linux/compat/include/net/netfilter/nf_conntrack_core.h
@@ -104,4 +104,10 @@ static inline bool rpl_nf_ct_delete(struct nf_conn *ct,
u32 portid, int report)
#define nf_ct_delete rpl_nf_ct_delete
#endif /* HAVE_NF_CONN_TIMER */
+#ifndef HAVE_NF_CT_INVERT_TUPLE_TAKES_L3PROTO
+int rpl_get_l4proto(const struct sk_buff *skb,
+ unsigned int nhoff, u8 pf, u8 *l4num);
+#define get_l4proto rpl_get_l4proto
+#endif
+
#endif /* _NF_CONNTRACK_CORE_WRAPPER_H */
diff --git a/datapath/linux/compat/include/net/netfilter/nf_conntrack_count.h
b/datapath/linux/compat/include/net/netfilter/nf_conntrack_count.h
index fd536f3e1..a26eb9f87 100644
--- a/datapath/linux/compat/include/net/netfilter/nf_conntrack_count.h
+++ b/datapath/linux/compat/include/net/netfilter/nf_conntrack_count.h
@@ -4,6 +4,8 @@
#include <linux/list.h>
#ifdef HAVE_UPSTREAM_NF_CONNCOUNT
+#include <net/netfilter/nf_conntrack_tuple.h>
+#include <net/netfilter/nf_conntrack_zones.h>
#include_next <net/netfilter/nf_conntrack_count.h>
static inline int rpl_nf_conncount_modinit(void)
diff --git a/datapath/linux/compat/nf_conncount.c
b/datapath/linux/compat/nf_conncount.c
index 0bee96274..eeae440f8 100644
--- a/datapath/linux/compat/nf_conncount.c
+++ b/datapath/linux/compat/nf_conncount.c
@@ -13,6 +13,8 @@
* only ignore TIME_WAIT or gone connections
* (C) CC Computer Consultants GmbH, 2007
*/
+#ifndef HAVE_UPSTREAM_NF_CONNCOUNT
+
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
#include <linux/in.h>
#include <linux/in6.h>
@@ -138,7 +140,7 @@ static bool conn_free(struct nf_conncount_list *list,
if (list->count == 0) {
spin_unlock(&list->list_lock);
- return free_entry;
+ return free_entry;
}
list->count--;
@@ -635,3 +637,5 @@ void rpl_nf_conncount_modexit(void)
kmem_cache_destroy(conncount_conn_cachep);
kmem_cache_destroy(conncount_rb_cachep);
}
+
+#endif /* HAVE_UPSTREAM_NF_CONNCOUNT */
diff --git a/datapath/linux/compat/nf_conntrack_core.c
b/datapath/linux/compat/nf_conntrack_core.c
index a7d3d4331..ecfeae1a0 100644
--- a/datapath/linux/compat/nf_conntrack_core.c
+++ b/datapath/linux/compat/nf_conntrack_core.c
@@ -1,4 +1,7 @@
+#include <linux/types.h>
#include <linux/version.h>
+#include <net/ip.h>
+#include <net/ipv6.h>
#ifndef HAVE_NF_CT_ZONE_INIT
@@ -11,3 +14,80 @@ const struct nf_conntrack_zone nf_ct_zone_dflt = {
};
#endif /* HAVE_NF_CT_ZONE_INIT */
+
+#ifndef HAVE_NF_CT_INVERT_TUPLE_TAKES_L3PROTO
+static int ipv4_get_l4proto(const struct sk_buff *skb, unsigned int nhoff,
+ u_int8_t *protonum)
+{
+ int dataoff = -1;
+ const struct iphdr *iph;
+ struct iphdr _iph;
+
+ iph = skb_header_pointer(skb, nhoff, sizeof(_iph), &_iph);
+ if (!iph)
+ return -1;
+
+ /* Conntrack defragments packets, we might still see fragments
+ * inside ICMP packets though.
+ */
+ if (iph->frag_off & htons(IP_OFFSET))
+ return -1;
+
+ dataoff = nhoff + (iph->ihl << 2);
+ *protonum = iph->protocol;
+
+ /* Check bogus IP headers */
+ if (dataoff > skb->len) {
+ pr_debug("bogus IPv4 packet: nhoff %u, ihl %u, skblen %u\n",
+ nhoff, iph->ihl << 2, skb->len);
+ return -1;
+ }
+ return dataoff;
+}
+
+#if IS_ENABLED(CONFIG_IPV6)
+static int ipv6_get_l4proto(const struct sk_buff *skb, unsigned int nhoff,
+ u8 *protonum)
+{
+ int protoff = -1;
+ unsigned int extoff = nhoff + sizeof(struct ipv6hdr);
+ __be16 frag_off;
+ u8 nexthdr;
+
+ if (skb_copy_bits(skb, nhoff + offsetof(struct ipv6hdr, nexthdr),
+ &nexthdr, sizeof(nexthdr)) != 0) {
+ pr_debug("can't get nexthdr\n");
+ return -1;
+ }
+ protoff = ipv6_skip_exthdr(skb, extoff, &nexthdr, &frag_off);
+ /*
+ * (protoff == skb->len) means the packet has not data, just
+ * IPv6 and possibly extensions headers, but it is tracked anyway
+ */
+ if (protoff < 0 || (frag_off & htons(~0x7)) != 0) {
+ pr_debug("can't find proto in pkt\n");
+ return -1;
+ }
+
+ *protonum = nexthdr;
+ return protoff;
+}
+#endif
+
+int rpl_get_l4proto(const struct sk_buff *skb,
+ unsigned int nhoff, u8 pf, u8 *l4num)
+{
+ switch (pf) {
+ case NFPROTO_IPV4:
+ return ipv4_get_l4proto(skb, nhoff, l4num);
+#if IS_ENABLED(CONFIG_IPV6)
+ case NFPROTO_IPV6:
+ return ipv6_get_l4proto(skb, nhoff, l4num);
+#endif
+ default:
+ *l4num = 0;
+ break;
+ }
+ return -1;
+}
+#endif /* HAVE_NF_CT_INVERT_TUPLE_TAKES_L3PROTO */
diff --git a/datapath/linux/compat/nf_conntrack_proto.c
b/datapath/linux/compat/nf_conntrack_proto.c
index 4ac66f61c..89c2f5422 100644
--- a/datapath/linux/compat/nf_conntrack_proto.c
+++ b/datapath/linux/compat/nf_conntrack_proto.c
@@ -1,7 +1,10 @@
#include <linux/types.h>
#include <net/netfilter/nf_conntrack.h>
+
+#ifdef HAVE_NF_CT_INVERT_TUPLE_TAKES_L3PROTO
#include <net/netfilter/nf_conntrack_l3proto.h>
+#endif
/*
* Upstream net-next commmit 7e35ec0e8044