The commit is pushed to "branch-rh7-3.10.0-1160.11.1.vz7.172.x-ovz" and will appear at https://src.openvz.org/scm/ovz/vzkernel.git after rh7-3.10.0-1160.11.1.vz7.172.5 ------> commit bd3f66019d759e8d366bee23679092c7cd30f5ae Author: Vasily Averin <v...@virtuozzo.com> Date: Mon Dec 28 09:52:43 2020 +0300
nat: allow nft NAT and iptables NAT work on the same node The netfilter NAT core cannot deal with more than one NAT hook per hook location (prerouting, input ...), because the NAT hooks install a NAT null binding in case the iptables nat table (iptable_nat hooks) or the corresponding nftables chain (nft nat hooks) doesn't specify a nat transformation. Currently iptables NAT hook is called in all netns, even if according iptables NAT tables (vpv4 and ipv6 have separate tables) are empty and does nothing. this block execution of corrsponding nft NAT hook, even if it is present. This is true in reverted direction: if nft NAT hook was called first it blocks execution of iptbles nat hook, because corresponding conntrack already have NAT null binding. This patch allows nft nat hook to be sure if coresponding NAT kind is enabled and is in use in current net namespace. The patch does not allow nft to add new NAT chains if netns already have another one: either iptables nat or an another nft NAT chain with the same priority. Patch does not block the loading of the iptables NAT if nft NAT is already present, because it is quite rare case. In general this patch allows to work NAT both on host and inside iptables-containers and inside centos8 containers where nftables nat is only used without any additional configuration. https://jira.sw.ru/browse/PSBM-123345 Signed-off-by: Vasily Averin <v...@virtuozzo.com> --- include/net/netfilter/nf_nat.h | 24 +++++++++++++++++------- net/ipv4/netfilter/nf_nat_l3proto_ipv4.c | 4 ++-- net/ipv6/netfilter/nf_nat_l3proto_ipv6.c | 4 ++-- net/netfilter/core.c | 13 ++++++++++++- 4 files changed, 33 insertions(+), 12 deletions(-) diff --git a/include/net/netfilter/nf_nat.h b/include/net/netfilter/nf_nat.h index c9ca6ed..4308180 100644 --- a/include/net/netfilter/nf_nat.h +++ b/include/net/netfilter/nf_nat.h @@ -79,23 +79,33 @@ static inline bool nf_nat_oif_changed(unsigned int hooknum, } /* - * Check if nft chain's netns fits conntrack netns. + * Check if netns have enabled NAT. * Uses ops->is_nft_ops flag to detect nft ops. - * If ops is not nft-related, the check is considered passed. + * If ops is not nft-related we need to be sure + * that according ipt nat table is not empty and can be in use. */ -#define is_valid_netns(ops, ct) ({ \ +#define netns_nat_check(ops, pf, __net) ({ \ const struct nft_chain *__chain; \ const struct net *__chain_net; \ - const struct net *__net; \ bool __ret; \ \ if (ops->is_nft_ops) { \ __chain = ops->priv; \ __chain_net = read_pnet(&nft_base_chain(__chain)->pnet);\ - __net = nf_ct_net(ct); \ __ret = net_eq(__net, __chain_net); \ - } else \ - __ret = true; \ + } else { \ + struct xt_table_info *__priv = NULL; \ + if (pf == NFPROTO_IPV4 && \ + !IS_ERR_OR_NULL(__net->ipv4.nat_table)) \ + __priv = __net->ipv4.nat_table->private \ + else if (pf == NFPROTO_IPV6 && \ + !IS_ERR_OR_NULL(__net->ipv6.ip6table_nat)) \ + __priv = __net->ipv6.ip6table_nat->private; \ + if (__priv && __priv->number > __priv->initial_entries) \ + __ret = true; \ + else \ + __ret = false; \ + } \ __ret; \ }) #endif diff --git a/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c b/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c index 3a261e2..a202287 100644 --- a/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c +++ b/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c @@ -292,8 +292,8 @@ nf_nat_ipv4_fn(const struct nf_hook_ops *ops, struct sk_buff *skb, if (!nf_nat_initialized(ct, maniptype)) { unsigned int ret; - /* Ignore nft chains with wrong netns. */ - if (!is_valid_netns(ops, ct)) + /* Ignore if nft/ipt NAT is not used in this netns */ + if (!netns_nat_check(ops, ops->pf, nf_ct_net(ct))) return NF_ACCEPT; ret = do_chain(ops, skb, state, ct); diff --git a/net/ipv6/netfilter/nf_nat_l3proto_ipv6.c b/net/ipv6/netfilter/nf_nat_l3proto_ipv6.c index fdedb60..99a6799 100644 --- a/net/ipv6/netfilter/nf_nat_l3proto_ipv6.c +++ b/net/ipv6/netfilter/nf_nat_l3proto_ipv6.c @@ -305,8 +305,8 @@ nf_nat_ipv6_fn(const struct nf_hook_ops *ops, struct sk_buff *skb, if (!nf_nat_initialized(ct, maniptype)) { unsigned int ret; - /* Ignore nft chains with wrong netns. */ - if (!is_valid_netns(ops, ct)) + /* Ignore if nft/ipt NAT is not used in this netns */ + if (!netns_nat_check(ops, ops->pf, nf_ct_net(ct))) return NF_ACCEPT; ret = do_chain(ops, skb, state, ct); diff --git a/net/netfilter/core.c b/net/netfilter/core.c index 4ad3d13..74dee8c 100644 --- a/net/netfilter/core.c +++ b/net/netfilter/core.c @@ -64,7 +64,8 @@ EXPORT_SYMBOL(nf_hooks_needed); #endif static DEFINE_MUTEX(nf_hook_mutex); - +#include <net/netfilter/nf_tables.h> +#include <net/netfilter/nf_nat.h> int nf_register_hook(struct nf_hook_ops *reg) { struct nf_hook_ops *elem; @@ -73,6 +74,16 @@ int nf_register_hook(struct nf_hook_ops *reg) list_for_each_entry(elem, &nf_hooks[reg->pf][reg->hooknum], list) { if (reg->priority < elem->priority) break; + else if ((reg->priority == elem->priority) && reg->is_nft_ops) { + const struct nft_chain *c = reg->priv; + struct net *net = read_pnet(&nft_base_chain(c)->pnet); + + /* fail if netns already have enabled nft/ipt nat */ + if (netns_nat_check(elem, reg->pf, net)) { + mutex_unlock(&nf_hook_mutex); + return -EBUSY; + } + } } list_add_rcu(®->list, elem->list.prev); mutex_unlock(&nf_hook_mutex); _______________________________________________ Devel mailing list Devel@openvz.org https://lists.openvz.org/mailman/listinfo/devel