The commit is pushed to "branch-rh7-3.10.0-1127.18.2.vz7.163.x-ovz" and will appear at https://src.openvz.org/scm/ovz/vzkernel.git after rh7-3.10.0-1127.18.2.vz7.163.34 ------> commit dae6dda1c5a172501a53c05d8c3a002f8cfb6e5b Author: Kirill Tkhai <ktk...@virtuozzo.com> Date: Wed Oct 7 08:19:36 2020 +0300
vznetstat: Skip local skb going from !IFF_LOOPBACK interface Local packet may be sent not only by 127.0.0.1. Say, if we have eth0 with 10.94.86.184, and both server and client use this address to communicate, @out interface will be eth0, while in real packets will be transmitted thru loopback inside single net ns. We don't want vznetstat mark such the packets, because these marks conflict with ordinary iptables rules. Since venet_acct_in_ops executed at NF_INET_LOCAL_OUT stage, dst may be NULL (I assume this after ip_queue_xmit(), where skb_rtable() may be NULL before routing). We leave both checks (out->flags and this new). It looks like we should think about making venet_acct_in_ops as NF_INET_POST_ROUTING hook, and kill out->flags check and dst should be not zero there. Lastly, I attach one of paths we come to the hook (for reviewers and history): [76498.851548] [<ffffffffc0dfa0ef>] venet_acct_out_hook+0xef/0x150 [ip_vznetstat] [76498.856342] [<ffffffff922cac08>] nf_iterate+0x98/0xe0 [76498.860179] [<ffffffff922cacf8>] nf_hook_slow+0xa8/0x110 [76498.864098] [<ffffffff922d9532>] __ip_local_out_sk+0x102/0x110 [76498.868028] [<ffffffff922d8b40>] ? ip_forward_options+0x1c0/0x1c0 [76498.872302] [<ffffffff922d955b>] ip_local_out_sk+0x1b/0x40 [76498.876054] [<ffffffff922d9914>] ip_queue_xmit+0x144/0x3c0 [76498.880126] [<ffffffff922f4304>] tcp_transmit_skb+0x4e4/0x9e0 [76498.883983] [<ffffffff922f498a>] tcp_write_xmit+0x18a/0xd40 [76498.888200] [<ffffffff922f57ce>] __tcp_push_pending_frames+0x2e/0xc0 [76498.892368] [<ffffffff922e367c>] tcp_push+0xec/0x120 [76498.896262] [<ffffffff922e71e2>] tcp_sendmsg+0xd2/0xc60 [76498.900257] [<ffffffff923be1c2>] ? __schedule+0x402/0x990 [76498.904251] [<ffffffff92313a99>] inet_sendmsg+0x69/0xb0 [76498.907751] [<ffffffff9226864d>] sock_aio_write+0x15d/0x180 [76498.911435] [<ffffffff91ce0525>] ? try_to_wake_up+0x255/0x470 [76498.915473] [<ffffffff91e70086>] do_sync_write+0x96/0xe0 [76498.919402] [<ffffffff91e70c75>] vfs_write+0x1c5/0x1f0 [76498.922945] [<ffffffff91e7193f>] SyS_write+0x7f/0xf0 [76498.926721] [<ffffffff91c2cd88>] ? sys_rt_sigreturn+0xe8/0x100 [76498.930878] [<ffffffff923cbf92>] system_call_fastpath+0x25/0x2a https://jira.sw.ru/browse/PSBM-120713 Signed-off-by: Kirill Tkhai <ktk...@virtuozzo.com> --- kernel/ve/vznetstat/ip6_vznetstat.c | 4 ++++ kernel/ve/vznetstat/ip_vznetstat.c | 8 ++++++++ 2 files changed, 12 insertions(+) diff --git a/kernel/ve/vznetstat/ip6_vznetstat.c b/kernel/ve/vznetstat/ip6_vznetstat.c index af095ee..1617de3 100644 --- a/kernel/ve/vznetstat/ip6_vznetstat.c +++ b/kernel/ve/vznetstat/ip6_vznetstat.c @@ -21,6 +21,7 @@ #include <linux/if.h> #include <linux/netdevice.h> #include <linux/vznetstat.h> +#include <net/dst.h> static unsigned int venet_acct_in_hook_v6(const struct nf_hook_ops *hook, @@ -46,10 +47,13 @@ venet_acct_out_hook_v6(const struct nf_hook_ops *hook, const struct net_device *out, const struct nf_hook_state *state) { + struct dst_entry *dst = skb_dst(skb); int res = NF_ACCEPT; if (out->flags & IFF_LOOPBACK) goto out; + if (dst && (dst->dev->flags & IFF_LOOPBACK)) + goto out; skb->protocol = __constant_htons(ETH_P_IPV6); venet_acct_classify_add_outgoing(out->nd_net->owner_ve->stat, skb); diff --git a/kernel/ve/vznetstat/ip_vznetstat.c b/kernel/ve/vznetstat/ip_vznetstat.c index d960657..5ea978d 100644 --- a/kernel/ve/vznetstat/ip_vznetstat.c +++ b/kernel/ve/vznetstat/ip_vznetstat.c @@ -77,6 +77,7 @@ static unsigned int venet_acct_out_hook(const struct nf_hook_ops *hook, const struct net_device *out, const struct nf_hook_state *state) { + struct dst_entry *dst = skb_dst(skb); int res; res = NF_ACCEPT; @@ -84,6 +85,13 @@ static unsigned int venet_acct_out_hook(const struct nf_hook_ops *hook, /* Skip loopback dev */ if (out->flags & IFF_LOOPBACK) goto out; + /* + * @skb is routed to loopback. Say, your eth0 has address 10.94.86.184 + * and ip_hdr(skb)->saddr == ip_hdr(skb)->daddr == 10.94.86.184. + * Then, @out is eth0 and we skip @skb in the above check. + */ + if (dst && (dst->dev->flags & IFF_LOOPBACK)) + goto out; /* Paranoia */ if (unlikely(!pskb_may_pull(skb, sizeof(struct iphdr)))) _______________________________________________ Devel mailing list Devel@openvz.org https://lists.openvz.org/mailman/listinfo/devel