Thanks Yi-Hung! I will look at it and come up with a new version. Yifeng
On Mon, Apr 15, 2019 at 5:39 PM Yi-Hung Wei <yihung....@gmail.com> wrote: > > On Fri, Apr 12, 2019 at 10:23 AM Yifeng Sun <pkusunyif...@gmail.com> wrote: > > > > This patch introduces changes needed by OVS to support latest > > Linux kernels (4.19.x and 4.20.x). Recent kernels changed many > > APIs that are being used by OVS. One major change is that > > struct nf_conntrack_l3proto became invisible outside of kernel, so > > get_l4proto function is added in file compact/nf_conntrack_core.c to > > accommodate this issue. > > > > In addition, if kernel is not compiled with CONFIG_NF_NAT_IPV4 > > or CONFIG_NF_NAT_IPV6, flow action 'ct(nat)' can cause kernel > > to crash. This patch handles this condition. > > > > This patch doesn't introduce new failed tests when running > > 'make check-kmod' for kernels listed below: > > 3.10.0-957.5.1.el7.x86_64 > > 4.4.0-142-generic > > 4.17.14 > > 4.18.0-16-generic > > 4.19.34 > > 4.20.17 > > > > Travis passed at > > https://travis-ci.org/yifsun/ovs-travis/builds/519011670 > > > > Signed-off-by: Yifeng Sun <pkusunyif...@gmail.com> > > v1->v2: Fixed the CONFIG_NF_NAT_IPV4 bug by using Greg's config > > file. Thanks Greg! > > --- > Hi Yifeng, > > Thanks for the patch. > > I think this patch mixes a couple upstream patches backport, 4.19, > 4.20 compilation issues, and the nf_nat issue together so that it may > be hard to keep track of the kernel backport. IMHO, it would be > easier to break this patch down to a couple of them, so that it would > be easier to maintain and review. My detailed comments are as below. > > > diff --git a/datapath/conntrack.c b/datapath/conntrack.c > > index 52208bad3029..ce36a8ddea50 100644 > > --- a/datapath/conntrack.c > > +++ b/datapath/conntrack.c > > @@ -38,6 +38,10 @@ > > #include <net/netfilter/nf_nat_l3proto.h> > > #endif > > > > +#if IS_ENABLED(CONFIG_NF_DEFRAG_IPV6) && defined(HAVE_IPV6_FRAG_H) > > +#include <net/ipv6_frag.h> > > +#endif > > + > I think this is related to an upstream change 70b095c843266 ("ipv6: > remove dependency of nf_defrag_ipv6 on ipv6 module"). Should we split > this out in anther patch? We may be able to hide the following in the > compat layer. > +#if IS_ENABLED(CONFIG_NF_DEFRAG_IPV6) && defined(HAVE_IPV6_FRAG_H) > +#endif > > > > #include "datapath.h" > > #include "conntrack.h" > > #include "flow.h" > > @@ -645,32 +649,62 @@ static struct nf_conn * > > ovs_ct_find_existing(struct net *net, const struct nf_conntrack_zone *zone, > > u8 l3num, struct sk_buff *skb, bool natted) > > { > > - const struct nf_conntrack_l3proto *l3proto; > > const struct nf_conntrack_l4proto *l4proto; > > struct nf_conntrack_tuple tuple; > > struct nf_conntrack_tuple_hash *h; > > struct nf_conn *ct; > > - unsigned int dataoff; > > u8 protonum; > > > > +#ifdef HAVE_NF_CT_INVERT_TUPLE_TAKES_L3PROTO > > + const struct nf_conntrack_l3proto *l3proto; > > + unsigned int dataoff; > > + > > l3proto = __nf_ct_l3proto_find(l3num); > > if (l3proto->get_l4proto(skb, skb_network_offset(skb), &dataoff, > > &protonum) <= 0) { > > pr_debug("ovs_ct_find_existing: Can't get protonum\n"); > > return NULL; > > } > > +#else > > + int protooff; > > + > > + protooff = get_l4proto(skb, skb_network_offset(skb), > > + l3num, &protonum); > > + if (protooff <= 0) { > > + pr_warn("ovs_ct_find_existing: Can't get protonum\n"); > > + return NULL; > > + } > > +#endif > > + > > +#ifdef HAVE_NF_CT_L4PROTO_FIND_TAKES_L3PROTO > > l4proto = __nf_ct_l4proto_find(l3num, protonum); > > - if (!nf_ct_get_tuple(skb, skb_network_offset(skb), dataoff, l3num, > > - protonum, net, &tuple, l3proto, l4proto)) { > > +#else > > + l4proto = __nf_ct_l4proto_find(protonum); > > +#endif > > + > > +#ifdef HAVE_NF_CT_GET_TUPLE > > + if (!nf_ct_get_tuplepr(skb, skb_network_offset(skb), > > + l3num, net, &tuple)) { > > + pr_debug("ovs_ct_find_existing: Can't get tuple\n"); > > + return NULL; > > + } > > +#else > > + if (!nf_ct_get_tuplepr(skb, skb_network_offset(skb), > > + l3num, net, &tuple)) { > > pr_debug("ovs_ct_find_existing: Can't get tuple\n"); > > return NULL; > > } > > +#endif > > > > /* Must invert the tuple if skb has been transformed by NAT. */ > > if (natted) { > > struct nf_conntrack_tuple inverse; > > > > +#ifdef HAVE_NF_CT_INVERT_TUPLE_TAKES_L3PROTO > > if (!nf_ct_invert_tuple(&inverse, &tuple, l3proto, > > l4proto)) { > > +#else > > + if (!nf_ct_invert_tuple(&inverse, &tuple, l4proto)) { > > +#endif > > pr_debug("ovs_ct_find_existing: Inversion > > failed!\n"); > > return NULL; > > } > The changes in ovs_ct_find_existing() is due to upstream commit > 60e3be94e6a ("openvswitch: use nf_ct_get_tuplepr, invert_tuplepr"). > Can we split it out as a separate patch? > > From the upstream commit 60e3be94e6a, instead of using > nf_ct_get_tuple() it invokes nf_ct_get_tuplepr(), and it looks like > nf_ct_get_tuplepr() was available in quite old kernel (at least > 2.6.26), and it gets updated to add network namespace support in > a31f1adc09489 ("netfilter: nf_conntrack: Add a struct net parameter to > l4_pkt_to_tuple"). Can we see if we can replace nf_ct_get_tuple() to > nf_ct_get_tuplepr() to avoid the above #ifde #else #endif logic. > > > > @@ -989,6 +1023,9 @@ static int __ovs_ct_lookup(struct net *net, struct > > sw_flow_key *key, > > if (!cached) { > > struct nf_conn *tmpl = info->ct; > > int err; > > +#ifndef HAVE_NF_CONNTRACK_IN_TAKES_NET > > + struct nf_hook_state state = {}; > > +#endif > > > > /* Associate skb with specified zone. */ > > if (tmpl) { > > @@ -998,8 +1035,15 @@ static int __ovs_ct_lookup(struct net *net, struct > > sw_flow_key *key, > > nf_ct_set(skb, tmpl, IP_CT_NEW); > > } > > > > +#ifdef HAVE_NF_CONNTRACK_IN_TAKES_NET > > err = nf_conntrack_in(net, info->family, > > NF_INET_PRE_ROUTING, skb); > > +#else > > + state.hook = NF_INET_PRE_ROUTING, > > + state.pf = info->family, > > + state.net = net, > > + err = nf_conntrack_in(skb, &state); > > +#endif > > if (err != NF_ACCEPT) > > return -ENOENT; > > > > The changes in __ovs_ct_lookup() is related to 93e66024b024 > ("netfilter: conntrack: pass nf_hook_state to packet and error > handlers"). In general, we would like to sychronize our > ./datapatch/*.c code to be as similar as to the upstream > ./net/openvswitch/*.c. In this case, We can try to hide the #if #else > #endif in the compat layer in ./datapath/linux/compat/ > > Here is an example. I only tested it on 4.4 kernel, it may need to be > tested on other kernels. > > diff --git a/acinclude.m4 b/acinclude.m4 > index 3cd6ea7302d5..d6cfbd54e357 100644 > --- a/acinclude.m4 > +++ b/acinclude.m4 > @@ -675,6 +675,9 @@ AC_DEFUN([OVS_CHECK_LINUX_COMPAT], [ > [nf_ct_set]) > OVS_GREP_IFELSE([$KSRC/include/net/netfilter/nf_conntrack.h], > [nf_ct_is_untracked]) > + OVS_FIND_PARAM_IFELSE([$KSRC/include/net/netfilter/nf_conntrack_core.h], > + [nf_conntrack_in], [u_int8_t pf], > + [OVS_DEFINE([HAVE_NF_CONNTRACK_IN_PF])]) > OVS_GREP_IFELSE([$KSRC/include/net/netfilter/nf_conntrack_zones.h], > [nf_ct_zone_init]) > OVS_GREP_IFELSE([$KSRC/include/net/netfilter/nf_conntrack_l3proto.h], > diff --git a/datapath/conntrack.c b/datapath/conntrack.c > index a7dc9e0c3513..5a97f913f0b2 100644 > --- a/datapath/conntrack.c > +++ b/datapath/conntrack.c > @@ -987,6 +987,11 @@ static int __ovs_ct_lookup(struct net *net, > struct sw_flow_key *key, > struct nf_conn *ct; > > if (!cached) { > + struct nf_hook_state state = { > + .hook = NF_INET_PRE_ROUTING, > + .pf = info->family, > + .net = net, > + }; > struct nf_conn *tmpl = info->ct; > int err; > > @@ -998,8 +1003,7 @@ static int __ovs_ct_lookup(struct net *net, > struct sw_flow_key *key, > nf_ct_set(skb, tmpl, IP_CT_NEW); > } > > - err = nf_conntrack_in(net, info->family, > - NF_INET_PRE_ROUTING, skb); > + err = nf_conntrack_in(skb, &state); > if (err != NF_ACCEPT) > return -ENOENT; > > diff --git a/datapath/linux/compat/include/net/netfilter/nf_conntrack_core.h > b/datapath/linux/compat/include/net/netfilter/nf_conntrack_core.h > index 7834c8c25f79..b05a5beda3cc 100644 > --- a/datapath/linux/compat/include/net/netfilter/nf_conntrack_core.h > +++ b/datapath/linux/compat/include/net/netfilter/nf_conntrack_core.h > @@ -104,4 +104,14 @@ static inline bool rpl_nf_ct_delete(struct > nf_conn *ct, u32 portid, int report) > #define nf_ct_delete rpl_nf_ct_delete > #endif /* HAVE_NF_CONN_TIMER */ > > +#ifdef HAVE_NF_CONNTRACK_IN_PF > + > +static inline bool rpl_nf_conntrack_in(struct sk_buff *skb, > + struct nf_hook_state *state) > +{ > + return nf_conntrack_in(state->net, state->pf, state->hook, skb); > +} > +#define nf_conntrack_in rpl_nf_conntrack_in > +#endif /* HAVE_NF_CONNTRACK_IN_PF */ > + > #endif /* _NF_CONNTRACK_CORE_WRAPPER_H */ > > > > @@ -1307,9 +1351,17 @@ int ovs_ct_execute(struct net *net, struct sk_buff > > *skb, > > { > > int nh_ofs; > > int err; > > + /* From kernel 4.19.0+, Function handle_fragments may shrink skb's > > + * headroom, which will result in loss of ethernet header data. > > + * This buf is used to backup the header data before calling > > + * handle_fragments. */ > > + char buf[32]; > > > > /* The conntrack module expects to be working at L3. */ > > nh_ofs = skb_network_offset(skb); > > + if (nh_ofs > sizeof(buf)) > > + return -EINVAL; > > + memcpy(buf, skb->data, nh_ofs); > > skb_pull_rcsum(skb, nh_ofs); > > > > err = ovs_skb_network_trim(skb); > > @@ -1326,8 +1378,16 @@ int ovs_ct_execute(struct net *net, struct sk_buff > > *skb, > > err = ovs_ct_commit(net, key, info, skb); > > else > > err = ovs_ct_lookup(net, key, info, skb); > > + if (err) > > + return err; > > > > + if (skb_headroom(skb) < nh_ofs) { > > + err = pskb_expand_head(skb, nh_ofs, 0, GFP_ATOMIC); > > + if (err) > > + return err; > > + } > > skb_push(skb, nh_ofs); > > + memcpy(skb->data, buf, nh_ofs); > > skb_postpush_rcsum(skb, skb->data, nh_ofs); > > if (err) > > kfree_skb(skb); > The change in ovs_ct_execute() looks like a bug fix in upstream > kernel. According to our backport policy, > http://docs.openvswitch.org/en/latest/internals/contributing/backporting-patches/ > Please upstream it to net-next before bring it back to datapath. > > > > > @@ -1362,7 +1422,11 @@ static int ovs_ct_add_helper(struct > > ovs_conntrack_info *info, const char *name, > > return -EINVAL; > > } > > > > +#ifdef HAVE_NF_CT_HELPER_EXT_ADD_TAKES_HELPER > > help = nf_ct_helper_ext_add(info->ct, helper, GFP_KERNEL); > > +#else > > + help = nf_ct_helper_ext_add(info->ct, GFP_KERNEL); > > +#endif > > if (!help) { > > nf_conntrack_helper_put(helper); > > return -ENOMEM; > The change here is related to upstream patch 440534d3c56b ("netfilter: > Remove useless param helper of nf_ct_helper_ext_add"). Can you try to > hide the #if #else #endif logic in the compat layer as the example in > __ovs_ct_lookup(). > > > > @@ -1387,6 +1451,20 @@ static int parse_nat(const struct nlattr *attr, > > bool have_proto_max = false; > > bool ip_vers = (info->family == NFPROTO_IPV6); > > > > +#ifndef CONFIG_NF_NAT_IPV4 > > + if (info->family == NFPROTO_IPV4) { > > + OVS_NLERR(log, "Flow action ct(nat) not supported without > > nf_nat_ipv4"); > > + return -ENOTSUPP; > > + } > > +#endif > > + > > +#ifndef CONFIG_NF_NAT_IPV6 > > + if (info->family == NFPROTO_IPV6) { > > + OVS_NLERR(log, "Flow action ct(nat) not supported without > > nf_nat_ipv6"); > > + return -ENOTSUPP; > > + } > > +#endif > > + > > nla_for_each_nested(a, attr, rem) { > > static const int ovs_nat_attr_lens[OVS_NAT_ATTR_MAX + 1][2] > > = { > > [OVS_NAT_ATTR_SRC] = {0, 0}, > Is this something that would happen in the upstream kernel? If this is > the case, we should upstream that before backport it to datapath. > > > I did not review the following compat code since they may need to > change accroding with different backport approach. > > > diff --git > > a/datapath/linux/compat/include/net/netfilter/nf_conntrack_core.h > > b/datapath/linux/compat/include/net/netfilter/nf_conntrack_core.h > > index 7834c8c25f79..7fca7dc551c8 100644 > > Thanks, > > -Yi-Hung _______________________________________________ dev mailing list d...@openvswitch.org https://mail.openvswitch.org/mailman/listinfo/ovs-dev