The netfilter NAT core cannot deal with more than one NAT hook 
per hook location (prerouting, input ...), because the NAT hooks install
a NAT null binding in case the iptables nat table (iptable_nat hooks)
or the corresponding nftables chain (nft nat hooks) doesn't specify a nat
transformation.

Currently iptables NAT hook is called in all netns, even if according
iptables NAT tables (vpv4 and ipv6 have separate tables) are empty and does 
nothing.
this block execution of corrsponding nft NAT hook, even if it is present.
This is true in reverted direction: if nft NAT hook was called first it blocks
execution of iptbles nat hook, because corresponding conntrack already have
NAT null binding.

This patch allows nft nat hook to be sure if coresponding NAT kind is enabled
and is in use in current net namespace.

The patch does not allow nft to add new NAT chains if netns already have 
another one:
either iptables nat or an another nft NAT chain with the same priority.

Patch does not block the loading of the iptables NAT if nft NAT is already 
present,
because it is quite rare case.

In general this patch allows to work NAT both on host and inside 
iptables-containers
and inside centos8 containers where nftables nat is only used without any 
additional
configuration.

https://jira.sw.ru/browse/PSBM-123345
Signed-off-by: Vasily Averin <v...@virtuozzo.com>
---
 include/net/netfilter/nf_nat.h           | 24 +++++++++++++++++-------
 net/ipv4/netfilter/nf_nat_l3proto_ipv4.c |  4 ++--
 net/ipv6/netfilter/nf_nat_l3proto_ipv6.c |  4 ++--
 net/netfilter/core.c                     | 13 ++++++++++++-
 4 files changed, 33 insertions(+), 12 deletions(-)

diff --git a/include/net/netfilter/nf_nat.h b/include/net/netfilter/nf_nat.h
index c9ca6ed..4308180 100644
--- a/include/net/netfilter/nf_nat.h
+++ b/include/net/netfilter/nf_nat.h
@@ -79,23 +79,33 @@ static inline bool nf_nat_oif_changed(unsigned int hooknum,
 }
 
 /*
- * Check if nft chain's netns fits conntrack netns.
+ * Check if netns have enabled NAT.
  * Uses ops->is_nft_ops flag to detect nft ops.
- * If ops is not nft-related, the check is considered passed.
+ * If ops is not nft-related we need to be sure
+ * that according ipt nat table is not empty and can be in use.
  */
-#define is_valid_netns(ops, ct) ({                                     \
+#define netns_nat_check(ops, pf, __net) ({                             \
        const struct nft_chain *__chain;                                \
        const struct net *__chain_net;                                  \
-       const struct net *__net;                                        \
        bool __ret;                                                     \
                                                                        \
        if (ops->is_nft_ops) {                                          \
                __chain = ops->priv;                                    \
                __chain_net = read_pnet(&nft_base_chain(__chain)->pnet);\
-               __net = nf_ct_net(ct);                                  \
                __ret = net_eq(__net, __chain_net);                     \
-       } else                                                          \
-               __ret = true;                                           \
+       } else {                                                        \
+               struct xt_table_info *__priv = NULL;                    \
+               if (pf == NFPROTO_IPV4 &&                               \
+                   !IS_ERR_OR_NULL(__net->ipv4.nat_table))             \
+                       __priv = __net->ipv4.nat_table->private         \
+               else if (pf == NFPROTO_IPV6 &&                          \
+                        !IS_ERR_OR_NULL(__net->ipv6.ip6table_nat))     \
+                       __priv = __net->ipv6.ip6table_nat->private;     \
+               if (__priv && __priv->number > __priv->initial_entries) \
+                       __ret = true;                                   \
+               else                                                    \
+                       __ret = false;                                  \
+       }                                                               \
        __ret;                                                          \
 })
 #endif
diff --git a/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c 
b/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c
index 3a261e2..a202287 100644
--- a/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c
+++ b/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c
@@ -292,8 +292,8 @@ nf_nat_ipv4_fn(const struct nf_hook_ops *ops, struct 
sk_buff *skb,
                if (!nf_nat_initialized(ct, maniptype)) {
                        unsigned int ret;
 
-                       /* Ignore nft chains with wrong netns. */
-                       if (!is_valid_netns(ops, ct))
+                       /* Ignore if nft/ipt NAT is not used in this netns */
+                       if (!netns_nat_check(ops, ops->pf, nf_ct_net(ct)))
                                return NF_ACCEPT;
 
                        ret = do_chain(ops, skb, state, ct);
diff --git a/net/ipv6/netfilter/nf_nat_l3proto_ipv6.c 
b/net/ipv6/netfilter/nf_nat_l3proto_ipv6.c
index fdedb60..99a6799 100644
--- a/net/ipv6/netfilter/nf_nat_l3proto_ipv6.c
+++ b/net/ipv6/netfilter/nf_nat_l3proto_ipv6.c
@@ -305,8 +305,8 @@ nf_nat_ipv6_fn(const struct nf_hook_ops *ops, struct 
sk_buff *skb,
                if (!nf_nat_initialized(ct, maniptype)) {
                        unsigned int ret;
 
-                       /* Ignore nft chains with wrong netns. */
-                       if (!is_valid_netns(ops, ct))
+                       /* Ignore if nft/ipt NAT is not used in this netns */
+                       if (!netns_nat_check(ops, ops->pf, nf_ct_net(ct)))
                                return NF_ACCEPT;
 
                        ret = do_chain(ops, skb, state, ct);
diff --git a/net/netfilter/core.c b/net/netfilter/core.c
index 4ad3d13..74dee8c 100644
--- a/net/netfilter/core.c
+++ b/net/netfilter/core.c
@@ -64,7 +64,8 @@ EXPORT_SYMBOL(nf_hooks_needed);
 #endif
 
 static DEFINE_MUTEX(nf_hook_mutex);
-
+#include <net/netfilter/nf_tables.h>
+#include <net/netfilter/nf_nat.h>
 int nf_register_hook(struct nf_hook_ops *reg)
 {
        struct nf_hook_ops *elem;
@@ -73,6 +74,16 @@ int nf_register_hook(struct nf_hook_ops *reg)
        list_for_each_entry(elem, &nf_hooks[reg->pf][reg->hooknum], list) {
                if (reg->priority < elem->priority)
                        break;
+               else if ((reg->priority == elem->priority) && reg->is_nft_ops) {
+                       const struct nft_chain *c = reg->priv;
+                       struct net *net = read_pnet(&nft_base_chain(c)->pnet);
+
+                       /* fail if netns already have enabled nft/ipt nat */
+                       if (netns_nat_check(elem, reg->pf, net)) {
+                               mutex_unlock(&nf_hook_mutex);
+                               return -EBUSY;
+                       }
+               }
        }
        list_add_rcu(&reg->list, elem->list.prev);
        mutex_unlock(&nf_hook_mutex);
-- 
1.8.3.1

_______________________________________________
Devel mailing list
Devel@openvz.org
https://lists.openvz.org/mailman/listinfo/devel

Reply via email to