Hi Greg, thanks for the review. I will check on it. Yifeng
On Fri, Mar 29, 2019 at 8:19 AM Gregory Rose <gvrose8...@gmail.com> wrote: > > > On 3/15/2019 7:20 PM, Yifeng Sun wrote: > > This patch introduces changes needed by OVS to support latest > > Linux kernels (4.19.x and 4.20.x). Recent kernels changed many > > APIs used by OVS. One major change is that struct nf_conntrack_l3proto > > became unvisible outside of kernel, so the needed get_l4proto > > function is added in file compact/nf_conntrack_core.c to > > accommodate this issue. > > > > This patch doesn't introduce new failed tests when running > > 'make check-kmod' for kernels listed below: > > 3.10.0-957.5.1.el7.x86_64 > > 4.4.0-142-generic > > 4.17.14 > > 4.18.0-16-generic > > 4.19.26 > > 4.20.13 > > > > Travis passed at > > https://travis-ci.org/yifsun/ovs-travis/builds/507034016 > > Hi Yifeng, > > thanks for doing this work! However, I am seeing an issue when > compiling on 4.20.17 and then installing > the kernel modules: > > make -C /lib/modules/4.20.17/build > M=/home/gvrose/prj/ovs-experimental/_build/datapath/linux modules_install > make[2]: Entering directory '/home/gvrose/prj/linux-4.20.17' > INSTALL > /home/gvrose/prj/ovs-experimental/_build/datapath/linux/openvswitch.ko > INSTALL > /home/gvrose/prj/ovs-experimental/_build/datapath/linux/vport-geneve.ko > INSTALL > /home/gvrose/prj/ovs-experimental/_build/datapath/linux/vport-gre.ko > INSTALL > /home/gvrose/prj/ovs-experimental/_build/datapath/linux/vport-lisp.ko > INSTALL > /home/gvrose/prj/ovs-experimental/_build/datapath/linux/vport-stt.ko > INSTALL > /home/gvrose/prj/ovs-experimental/_build/datapath/linux/vport-vxlan.ko > DEPMOD 4.20.17 > depmod: WARNING: /lib/modules/4.20.17/extra/vport-lisp.ko needs unknown > symbol rpl_rtnl_delete_link > depmod: WARNING: /lib/modules/4.20.17/extra/vport-lisp.ko needs unknown > symbol rpl_lisp_xmit > depmod: WARNING: /lib/modules/4.20.17/extra/vport-lisp.ko needs unknown > symbol rpl_lisp_dev_create_fb > depmod: WARNING: /lib/modules/4.20.17/extra/vport-stt.ko needs unknown > symbol rpl_rtnl_delete_link > depmod: WARNING: /lib/modules/4.20.17/extra/vport-stt.ko needs unknown > symbol ovs_stt_dev_create_fb > depmod: WARNING: /lib/modules/4.20.17/extra/vport-stt.ko needs unknown > symbol ovs_stt_xmit > make[2]: Leaving directory '/home/gvrose/prj/linux-4.20.17' > > Could you check that? Maybe there were changes from 4.20.13 to 4.20.17? > > Thanks, > > - Greg > > > > > Signed-off-by: Yifeng Sun <pkusunyif...@gmail.com> > > --- > > .travis.yml | 2 + > > NEWS | 2 + > > acinclude.m4 | 20 ++++- > > datapath/conntrack.c | 72 ++++++++++++++++- > > .../include/net/netfilter/nf_conntrack_core.h | 6 ++ > > .../net/netfilter/nf_conntrack_count.h | 2 + > > datapath/linux/compat/nf_conncount.c | 6 +- > > datapath/linux/compat/nf_conntrack_core.c | 80 +++++++++++++++++++ > > datapath/linux/compat/nf_conntrack_proto.c | 3 + > > 9 files changed, 186 insertions(+), 7 deletions(-) > > > > diff --git a/.travis.yml b/.travis.yml > > index 32d5f1918..dc7a20b6e 100644 > > --- a/.travis.yml > > +++ b/.travis.yml > > @@ -35,6 +35,8 @@ env: > > - KERNEL=3.16.54 TESTSUITE=1 DPDK=1 > > - KERNEL=3.16.54 DPDK_SHARED=1 > > - KERNEL=3.16.54 DPDK_SHARED=1 OPTS="--enable-shared" > > + - KERNEL=4.20.16 > > + - KERNEL=4.19.29 > > - KERNEL=4.18.20 > > - KERNEL=4.17.19 > > - KERNEL=4.16.18 > > diff --git a/NEWS b/NEWS > > index 1e4744dbd..af5b5222f 100644 > > --- a/NEWS > > +++ b/NEWS > > @@ -25,6 +25,8 @@ Post-v2.11.0 > > - OVN: > > * Select IPAM mac_prefix in a random manner if not provided by the > > user > > - New QoS type "linux-netem" on Linux. > > + - Linux datapath: > > + * Support for the kernel versions 4.19.x, 4.20.x. > > > > v2.11.0 - 19 Feb 2019 > > --------------------- > > diff --git a/acinclude.m4 b/acinclude.m4 > > index 3cd6ea730..7b11b7740 100644 > > --- a/acinclude.m4 > > +++ b/acinclude.m4 > > @@ -151,10 +151,10 @@ AC_DEFUN([OVS_CHECK_LINUX], [ > > AC_MSG_RESULT([$kversion]) > > > > if test "$version" -ge 4; then > > - if test "$version" = 4 && test "$patchlevel" -le 18; then > > + if test "$version" = 4 && test "$patchlevel" -le 20; then > > : # Linux 4.x > > else > > - AC_ERROR([Linux kernel in $KBUILD is version $kversion, but > > version newer than 4.18.x is not supported (please refer to the FAQ for > > advice)]) > > + AC_ERROR([Linux kernel in $KBUILD is version $kversion, but > > version newer than 4.20.x is not supported (please refer to the FAQ for > > advice)]) > > fi > > elif test "$version" = 3 && test "$patchlevel" -ge 10; then > > : # Linux 3.x > > @@ -583,6 +583,22 @@ AC_DEFUN([OVS_CHECK_LINUX_COMPAT], [ > > [OVS_DEFINE([HAVE_VOID_INET_FRAGS_INIT])]) > > OVS_GREP_IFELSE([$KSRC/include/net/inetpeer.h], [vif], > > [OVS_DEFINE([HAVE_INETPEER_VIF_SUPPORT])]) > > + > > OVS_FIND_PARAM_IFELSE([$KSRC/include/net/netfilter/nf_conntrack_helper.h], > > + [nf_ct_helper_ext_add], [nf_conntrack_helper], > > + > > [OVS_DEFINE([HAVE_NF_CT_HELPER_EXT_ADD_TAKES_HELPER])]) > > + OVS_FIND_PARAM_IFELSE([$KSRC/include/net/netfilter/nf_conntrack_core.h], > > + [nf_ct_invert_tuple], [l3proto], > > + > > [OVS_DEFINE([HAVE_NF_CT_INVERT_TUPLE_TAKES_L3PROTO])]) > > + OVS_GREP_IFELSE([$KSRC/include/net/netfilter/nf_conntrack_core.h], > > [nf_ct_get_tuple], > > + [OVS_DEFINE([HAVE_NF_CT_GET_TUPLE])]) > > + OVS_FIND_PARAM_IFELSE([$KSRC/include/net/netfilter/nf_conntrack_core.h], > > + [nf_conntrack_in], [net], > > + [OVS_DEFINE([HAVE_NF_CONNTRACK_IN_TAKES_NET])]) > > + OVS_GREP_IFELSE([$KSRC/include/net/ipv6_frag.h], > > [IP6_DEFRAG_CONNTRACK_IN], > > + [OVS_DEFINE([HAVE_IPV6_FRAG_H])]) > > + > > OVS_FIND_PARAM_IFELSE([$KSRC/include/net/netfilter/nf_conntrack_l4proto.h], > > + [__nf_ct_l4proto_find], [l3proto], > > + > > [OVS_DEFINE([HAVE_NF_CT_L4PROTO_FIND_TAKES_L3PROTO])]) > > > > dnl Check for dst_cache and ipv6 lable to use backported tunnel > > infrastructure. > > dnl OVS does not really need ipv6 label field, but its presence > > signifies that > > diff --git a/datapath/conntrack.c b/datapath/conntrack.c > > index a7dc9e0c3..b9b79e2cc 100644 > > --- a/datapath/conntrack.c > > +++ b/datapath/conntrack.c > > @@ -38,6 +38,10 @@ > > #include <net/netfilter/nf_nat_l3proto.h> > > #endif > > > > +#if IS_ENABLED(CONFIG_NF_DEFRAG_IPV6) && defined(HAVE_IPV6_FRAG_H) > > +#include <net/ipv6_frag.h> > > +#endif > > + > > #include "datapath.h" > > #include "conntrack.h" > > #include "flow.h" > > @@ -645,32 +649,62 @@ static struct nf_conn * > > ovs_ct_find_existing(struct net *net, const struct nf_conntrack_zone > > *zone, > > u8 l3num, struct sk_buff *skb, bool natted) > > { > > - const struct nf_conntrack_l3proto *l3proto; > > const struct nf_conntrack_l4proto *l4proto; > > struct nf_conntrack_tuple tuple; > > struct nf_conntrack_tuple_hash *h; > > struct nf_conn *ct; > > - unsigned int dataoff; > > u8 protonum; > > > > +#ifdef HAVE_NF_CT_INVERT_TUPLE_TAKES_L3PROTO > > + const struct nf_conntrack_l3proto *l3proto; > > + unsigned int dataoff; > > + > > l3proto = __nf_ct_l3proto_find(l3num); > > if (l3proto->get_l4proto(skb, skb_network_offset(skb), &dataoff, > > &protonum) <= 0) { > > pr_debug("ovs_ct_find_existing: Can't get protonum\n"); > > return NULL; > > } > > +#else > > + int protooff; > > + > > + protooff = get_l4proto(skb, skb_network_offset(skb), > > + l3num, &protonum); > > + if (protooff <= 0) { > > + pr_warn("ovs_ct_find_existing: Can't get protonum\n"); > > + return NULL; > > + } > > +#endif > > + > > +#ifdef HAVE_NF_CT_L4PROTO_FIND_TAKES_L3PROTO > > l4proto = __nf_ct_l4proto_find(l3num, protonum); > > - if (!nf_ct_get_tuple(skb, skb_network_offset(skb), dataoff, l3num, > > - protonum, net, &tuple, l3proto, l4proto)) { > > +#else > > + l4proto = __nf_ct_l4proto_find(protonum); > > +#endif > > + > > +#ifdef HAVE_NF_CT_GET_TUPLE > > + if (!nf_ct_get_tuplepr(skb, skb_network_offset(skb), > > + l3num, net, &tuple)) { > > + pr_debug("ovs_ct_find_existing: Can't get tuple\n"); > > + return NULL; > > + } > > +#else > > + if (!nf_ct_get_tuplepr(skb, skb_network_offset(skb), > > + l3num, net, &tuple)) { > > pr_debug("ovs_ct_find_existing: Can't get tuple\n"); > > return NULL; > > } > > +#endif > > > > /* Must invert the tuple if skb has been transformed by NAT. */ > > if (natted) { > > struct nf_conntrack_tuple inverse; > > > > +#ifdef HAVE_NF_CT_INVERT_TUPLE_TAKES_L3PROTO > > if (!nf_ct_invert_tuple(&inverse, &tuple, l3proto, l4proto)) { > > +#else > > + if (!nf_ct_invert_tuple(&inverse, &tuple, l4proto)) { > > +#endif > > pr_debug("ovs_ct_find_existing: Inversion failed!\n"); > > return NULL; > > } > > @@ -989,6 +1023,9 @@ static int __ovs_ct_lookup(struct net *net, struct > > sw_flow_key *key, > > if (!cached) { > > struct nf_conn *tmpl = info->ct; > > int err; > > +#ifndef HAVE_NF_CONNTRACK_IN_TAKES_NET > > + struct nf_hook_state state = {}; > > +#endif > > > > /* Associate skb with specified zone. */ > > if (tmpl) { > > @@ -998,8 +1035,15 @@ static int __ovs_ct_lookup(struct net *net, struct > > sw_flow_key *key, > > nf_ct_set(skb, tmpl, IP_CT_NEW); > > } > > > > +#ifdef HAVE_NF_CONNTRACK_IN_TAKES_NET > > err = nf_conntrack_in(net, info->family, > > NF_INET_PRE_ROUTING, skb); > > +#else > > + state.hook = NF_INET_PRE_ROUTING, > > + state.pf = info->family, > > + state.net = net, > > + err = nf_conntrack_in(skb, &state); > > +#endif > > if (err != NF_ACCEPT) > > return -ENOENT; > > > > @@ -1307,9 +1351,17 @@ int ovs_ct_execute(struct net *net, struct sk_buff > > *skb, > > { > > int nh_ofs; > > int err; > > + /* From kernel 4.19.0+, Function handle_fragments may shrink skb's > > + * headroom, which will result in loss of ethernet header data. > > + * This buf is used to backup the header data before calling > > + * handle_fragments. */ > > + char buf[32]; > > > > /* The conntrack module expects to be working at L3. */ > > nh_ofs = skb_network_offset(skb); > > + if (nh_ofs > sizeof(buf)) > > + return -EINVAL; > > + memcpy(buf, skb->data, nh_ofs); > > skb_pull_rcsum(skb, nh_ofs); > > > > err = ovs_skb_network_trim(skb); > > @@ -1326,8 +1378,16 @@ int ovs_ct_execute(struct net *net, struct sk_buff > > *skb, > > err = ovs_ct_commit(net, key, info, skb); > > else > > err = ovs_ct_lookup(net, key, info, skb); > > + if (err) > > + return err; > > > > + if (skb_headroom(skb) < nh_ofs) { > > + err = pskb_expand_head(skb, nh_ofs, 0, GFP_ATOMIC); > > + if (err) > > + return err; > > + } > > skb_push(skb, nh_ofs); > > + memcpy(skb->data, buf, nh_ofs); > > skb_postpush_rcsum(skb, skb->data, nh_ofs); > > if (err) > > kfree_skb(skb); > > @@ -1362,7 +1422,11 @@ static int ovs_ct_add_helper(struct > > ovs_conntrack_info *info, const char *name, > > return -EINVAL; > > } > > > > +#ifdef HAVE_NF_CT_HELPER_EXT_ADD_TAKES_HELPER > > help = nf_ct_helper_ext_add(info->ct, helper, GFP_KERNEL); > > +#else > > + help = nf_ct_helper_ext_add(info->ct, GFP_KERNEL); > > +#endif > > if (!help) { > > nf_conntrack_helper_put(helper); > > return -ENOMEM; > > diff --git > > a/datapath/linux/compat/include/net/netfilter/nf_conntrack_core.h > > b/datapath/linux/compat/include/net/netfilter/nf_conntrack_core.h > > index 7834c8c25..7fca7dc55 100644 > > --- a/datapath/linux/compat/include/net/netfilter/nf_conntrack_core.h > > +++ b/datapath/linux/compat/include/net/netfilter/nf_conntrack_core.h > > @@ -104,4 +104,10 @@ static inline bool rpl_nf_ct_delete(struct nf_conn > > *ct, u32 portid, int report) > > #define nf_ct_delete rpl_nf_ct_delete > > #endif /* HAVE_NF_CONN_TIMER */ > > > > +#ifndef HAVE_NF_CT_INVERT_TUPLE_TAKES_L3PROTO > > +int rpl_get_l4proto(const struct sk_buff *skb, > > + unsigned int nhoff, u8 pf, u8 *l4num); > > +#define get_l4proto rpl_get_l4proto > > +#endif > > + > > #endif /* _NF_CONNTRACK_CORE_WRAPPER_H */ > > diff --git > > a/datapath/linux/compat/include/net/netfilter/nf_conntrack_count.h > > b/datapath/linux/compat/include/net/netfilter/nf_conntrack_count.h > > index fd536f3e1..a26eb9f87 100644 > > --- a/datapath/linux/compat/include/net/netfilter/nf_conntrack_count.h > > +++ b/datapath/linux/compat/include/net/netfilter/nf_conntrack_count.h > > @@ -4,6 +4,8 @@ > > #include <linux/list.h> > > > > #ifdef HAVE_UPSTREAM_NF_CONNCOUNT > > +#include <net/netfilter/nf_conntrack_tuple.h> > > +#include <net/netfilter/nf_conntrack_zones.h> > > #include_next <net/netfilter/nf_conntrack_count.h> > > > > static inline int rpl_nf_conncount_modinit(void) > > diff --git a/datapath/linux/compat/nf_conncount.c > > b/datapath/linux/compat/nf_conncount.c > > index 0bee96274..eeae440f8 100644 > > --- a/datapath/linux/compat/nf_conncount.c > > +++ b/datapath/linux/compat/nf_conncount.c > > @@ -13,6 +13,8 @@ > > * only ignore TIME_WAIT or gone connections > > * (C) CC Computer Consultants GmbH, 2007 > > */ > > +#ifndef HAVE_UPSTREAM_NF_CONNCOUNT > > + > > #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt > > #include <linux/in.h> > > #include <linux/in6.h> > > @@ -138,7 +140,7 @@ static bool conn_free(struct nf_conncount_list *list, > > > > if (list->count == 0) { > > spin_unlock(&list->list_lock); > > - return free_entry; > > + return free_entry; > > } > > > > list->count--; > > @@ -635,3 +637,5 @@ void rpl_nf_conncount_modexit(void) > > kmem_cache_destroy(conncount_conn_cachep); > > kmem_cache_destroy(conncount_rb_cachep); > > } > > + > > +#endif /* HAVE_UPSTREAM_NF_CONNCOUNT */ > > diff --git a/datapath/linux/compat/nf_conntrack_core.c > > b/datapath/linux/compat/nf_conntrack_core.c > > index a7d3d4331..ecfeae1a0 100644 > > --- a/datapath/linux/compat/nf_conntrack_core.c > > +++ b/datapath/linux/compat/nf_conntrack_core.c > > @@ -1,4 +1,7 @@ > > +#include <linux/types.h> > > #include <linux/version.h> > > +#include <net/ip.h> > > +#include <net/ipv6.h> > > > > #ifndef HAVE_NF_CT_ZONE_INIT > > > > @@ -11,3 +14,80 @@ const struct nf_conntrack_zone nf_ct_zone_dflt = { > > }; > > > > #endif /* HAVE_NF_CT_ZONE_INIT */ > > + > > +#ifndef HAVE_NF_CT_INVERT_TUPLE_TAKES_L3PROTO > > +static int ipv4_get_l4proto(const struct sk_buff *skb, unsigned int nhoff, > > + u_int8_t *protonum) > > +{ > > + int dataoff = -1; > > + const struct iphdr *iph; > > + struct iphdr _iph; > > + > > + iph = skb_header_pointer(skb, nhoff, sizeof(_iph), &_iph); > > + if (!iph) > > + return -1; > > + > > + /* Conntrack defragments packets, we might still see fragments > > + * inside ICMP packets though. > > + */ > > + if (iph->frag_off & htons(IP_OFFSET)) > > + return -1; > > + > > + dataoff = nhoff + (iph->ihl << 2); > > + *protonum = iph->protocol; > > + > > + /* Check bogus IP headers */ > > + if (dataoff > skb->len) { > > + pr_debug("bogus IPv4 packet: nhoff %u, ihl %u, skblen %u\n", > > + nhoff, iph->ihl << 2, skb->len); > > + return -1; > > + } > > + return dataoff; > > +} > > + > > +#if IS_ENABLED(CONFIG_IPV6) > > +static int ipv6_get_l4proto(const struct sk_buff *skb, unsigned int nhoff, > > + u8 *protonum) > > +{ > > + int protoff = -1; > > + unsigned int extoff = nhoff + sizeof(struct ipv6hdr); > > + __be16 frag_off; > > + u8 nexthdr; > > + > > + if (skb_copy_bits(skb, nhoff + offsetof(struct ipv6hdr, nexthdr), > > + &nexthdr, sizeof(nexthdr)) != 0) { > > + pr_debug("can't get nexthdr\n"); > > + return -1; > > + } > > + protoff = ipv6_skip_exthdr(skb, extoff, &nexthdr, &frag_off); > > + /* > > + * (protoff == skb->len) means the packet has not data, just > > + * IPv6 and possibly extensions headers, but it is tracked anyway > > + */ > > + if (protoff < 0 || (frag_off & htons(~0x7)) != 0) { > > + pr_debug("can't find proto in pkt\n"); > > + return -1; > > + } > > + > > + *protonum = nexthdr; > > + return protoff; > > +} > > +#endif > > + > > +int rpl_get_l4proto(const struct sk_buff *skb, > > + unsigned int nhoff, u8 pf, u8 *l4num) > > +{ > > + switch (pf) { > > + case NFPROTO_IPV4: > > + return ipv4_get_l4proto(skb, nhoff, l4num); > > +#if IS_ENABLED(CONFIG_IPV6) > > + case NFPROTO_IPV6: > > + return ipv6_get_l4proto(skb, nhoff, l4num); > > +#endif > > + default: > > + *l4num = 0; > > + break; > > + } > > + return -1; > > +} > > +#endif /* HAVE_NF_CT_INVERT_TUPLE_TAKES_L3PROTO */ > > diff --git a/datapath/linux/compat/nf_conntrack_proto.c > > b/datapath/linux/compat/nf_conntrack_proto.c > > index 4ac66f61c..89c2f5422 100644 > > --- a/datapath/linux/compat/nf_conntrack_proto.c > > +++ b/datapath/linux/compat/nf_conntrack_proto.c > > @@ -1,7 +1,10 @@ > > #include <linux/types.h> > > > > #include <net/netfilter/nf_conntrack.h> > > + > > +#ifdef HAVE_NF_CT_INVERT_TUPLE_TAKES_L3PROTO > > #include <net/netfilter/nf_conntrack_l3proto.h> > > +#endif > > > > /* > > * Upstream net-next commmit 7e35ec0e8044 > _______________________________________________ dev mailing list d...@openvswitch.org https://mail.openvswitch.org/mailman/listinfo/ovs-dev