The commit is pushed to "branch-rh7-3.10.0-123.1.2-ovz" and will appear at https://src.openvz.org/scm/ovz/vzkernel.git after rh7-3.10.0-123.1.2.vz7.5.12 ------> commit c158fe3fdf4a6961ba5a010fb03c4c365ca3e9bd Author: Kirill Tkhai <ktk...@odin.com> Date: Wed Jun 10 19:15:01 2015 +0400
ve/net: Track netfilter modules per net-namespace-v3 Porting patch diff-ve-net-track-netfilter-modules-per-net-namespace from 2.6.32: cpt related hunks are separated in diff-cpt-track-netfilter-modules-per-net-namespace-v3 Previously in rhel6 we've been not allowing to create nested net namespaces so all netfilter modules tracking has been done via VE structure struct ve_struct { ... __u64 ipt_mask; __u64 _iptables_modules; ... } Here @ipt_mask stands for features bits which are set up from VE configutarion and represent netfilter modules which are allowed to use inside VE (might be situation when modules are loaded on the node but forbidden inside VE). When some particular module is allowed inside VE and get loaded we're setting up an appropriate bit in @_iptables_modules and clear it back upon module unload: net_ipt_module_set() helper is doing that usually in module net-init() call. Same time this bit is signalling that module specific resource allocated for VE use should be freed once module is unloaded, iow net-exit() is called. For example | static int __net_init iptable_mangle_net_init(struct net *net) | { | if (!net_ipt_permitted(net, VE_IP_MANGLE)) | return 0; | | /* Register table */ | net->ipv4.iptable_mangle = | ipt_register_table(net, &packet_mangler, &initial_table.repl); | if (IS_ERR(net->ipv4.iptable_mangle)) | return PTR_ERR(net->ipv4.iptable_mangle); | | net_ipt_module_set(net, VE_IP_MANGLE); | return 0; | } | | static void __net_exit iptable_mangle_net_exit(struct net *net) | { | if (!net_is_ipt_module_set(net, VE_IP_MANGLE)) | return; | } | | static void __net_exit iptable_mangle_net_exit(struct net *net) | { | if (!net_is_ipt_module_set(net, VE_IP_MANGLE)) | return; | | ipt_unregister_table(net->ipv4.iptable_mangle); | | net_ipt_module_clear(net, VE_IP_MANGLE); | } | | static struct pernet_operations iptable_mangle_net_ops = { | .init = iptable_mangle_net_init, | .exit = iptable_mangle_net_exit, | }; here we allocate @net->ipv4.iptable_mangle which should be cleaned up on exit, moreover the net-namespace engine is designed that way: if somehting if failed in init() call then exit() is called and must clear all data if been allocated. This worked well until we have allowed nested net-namespaces. Every nested net-namespace share same @owner_ve (ie the VE which is a keeper of it). Thus once nested namespace called net_ipt_module_clear() the shared @owner_ve get @_iptables_modules bit clear and finally the primary net namespace no longer see this bit set in @_iptables_modules. Lets move @_iptables_modules to struct net instead. Toplevel VE's net is referred via @ve_netns member which exist all the container's lifetime. Also we've to move it into @init_net thus node's net would be tracked same way automatically. https://jira.sw.ru/browse/PSBM-31451 Signed-off-by: Cyrill Gorcunov <gorcu...@parallels.com> Acked-by: Vladimir Davydov <vdavy...@parallels.com> CC: Andrey Vagin <ava...@parallels.com> CC: Kirill Tkhai <ktk...@parallels.com> Signed-off-by: Kirill Tkhai <ktk...@odin.com> --- include/linux/netfilter.h | 6 +++--- include/linux/ve.h | 1 - include/net/net_namespace.h | 3 +++ kernel/ve/ve.c | 1 - net/core/net_namespace.c | 3 +++ 5 files changed, 9 insertions(+), 5 deletions(-) diff --git a/include/linux/netfilter.h b/include/linux/netfilter.h index 03e6588..fba14ed 100644 --- a/include/linux/netfilter.h +++ b/include/linux/netfilter.h @@ -353,16 +353,16 @@ static inline void nf_ct_attach(struct sk_buff *new, struct sk_buff *skb) {} #define net_ipt_module_set(netns, ipt) \ ({ \ - (netns)->owner_ve->_iptables_modules |= ipt##_MOD; \ + (netns)->_iptables_modules |= ipt##_MOD; \ }) #define net_ipt_module_clear(netns, ipt) \ ({ \ - (netns)->owner_ve->_iptables_modules &= ~ipt##_MOD; \ + (netns)->_iptables_modules &= ~ipt##_MOD; \ }) #define net_is_ipt_module_set(netns, ipt) \ - ((netns)->owner_ve->_iptables_modules & (ipt##_MOD)) + ((netns)->_iptables_modules & (ipt##_MOD)) #else /* CONFIG_VE_IPTABLES */ diff --git a/include/linux/ve.h b/include/linux/ve.h index e48a1a3..e3fc636 100644 --- a/include/linux/ve.h +++ b/include/linux/ve.h @@ -102,7 +102,6 @@ struct ve_struct { #ifdef CONFIG_VE_IPTABLES /* core/netfilter.c virtualization */ __u64 ipt_mask; - __u64 _iptables_modules; #endif /* CONFIG_VE_IPTABLES */ #endif diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h index bcc35c3..e66a928 100644 --- a/include/net/net_namespace.h +++ b/include/net/net_namespace.h @@ -78,6 +78,9 @@ struct net { #ifdef CONFIG_VE struct ve_struct *owner_ve; +#ifdef CONFIG_VE_IPTABLES + __u64 _iptables_modules; +#endif #endif /* core fib_rules */ diff --git a/kernel/ve/ve.c b/kernel/ve/ve.c index e699784..0ffd8d8 100644 --- a/kernel/ve/ve.c +++ b/kernel/ve/ve.c @@ -71,7 +71,6 @@ struct ve_struct ve0 = { .is_running = 1, #ifdef CONFIG_VE_IPTABLES .ipt_mask = VE_IP_ALL, /* everything is allowed */ - ._iptables_modules = VE_IP_NONE, /* but nothing yet loaded */ #endif .features = -1, .fsync_enable = FSYNC_FILTERED, diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c index 0ad9aa1..3f7da6a 100644 --- a/net/core/net_namespace.c +++ b/net/core/net_namespace.c @@ -35,6 +35,9 @@ struct net init_net = { .dev_base_head = LIST_HEAD_INIT(init_net.dev_base_head), #ifdef CONFIG_VE .owner_ve = &ve0, +#ifdef CONFIG_VE_IPTABLES + ._iptables_modules = VE_IP_NONE, +#endif #endif }; EXPORT_SYMBOL(init_net); _______________________________________________ Devel mailing list Devel@openvz.org https://lists.openvz.org/mailman/listinfo/devel