[Devel] [PATCH RH7 3/3] vznetstat: Skip local skb going from !IFF_LOOPBACK interface
Local packet may be sent not only by 127.0.0.1. Say, if we have eth0 with 10.94.86.184, and both server and client use this address to communicate, @out interface will be eth0, while in real packets will be transmitted thru loopback inside single net ns. We don't want vznetstat mark such the packets, because these marks conflict with ordinary iptables rules. Since venet_acct_in_ops executed at NF_INET_LOCAL_OUT stage, dst may be NULL (I assume this after ip_queue_xmit(), where skb_rtable() may be NULL before routing). We leave both checks (out->flags and this new). It looks like we should think about making venet_acct_in_ops as NF_INET_POST_ROUTING hook, and kill out->flags check and dst should be not zero there. Lastly, I attach one of paths we come to the hook (for reviewers and history): [76498.851548] [] venet_acct_out_hook+0xef/0x150 [ip_vznetstat] [76498.856342] [] nf_iterate+0x98/0xe0 [76498.860179] [] nf_hook_slow+0xa8/0x110 [76498.864098] [] __ip_local_out_sk+0x102/0x110 [76498.868028] [] ? ip_forward_options+0x1c0/0x1c0 [76498.872302] [] ip_local_out_sk+0x1b/0x40 [76498.876054] [] ip_queue_xmit+0x144/0x3c0 [76498.880126] [] tcp_transmit_skb+0x4e4/0x9e0 [76498.883983] [] tcp_write_xmit+0x18a/0xd40 [76498.888200] [] __tcp_push_pending_frames+0x2e/0xc0 [76498.892368] [] tcp_push+0xec/0x120 [76498.896262] [] tcp_sendmsg+0xd2/0xc60 [76498.900257] [] ? __schedule+0x402/0x990 [76498.904251] [] inet_sendmsg+0x69/0xb0 [76498.907751] [] sock_aio_write+0x15d/0x180 [76498.911435] [] ? try_to_wake_up+0x255/0x470 [76498.915473] [] do_sync_write+0x96/0xe0 [76498.919402] [] vfs_write+0x1c5/0x1f0 [76498.922945] [] SyS_write+0x7f/0xf0 [76498.926721] [] ? sys_rt_sigreturn+0xe8/0x100 [76498.930878] [] system_call_fastpath+0x25/0x2a https://jira.sw.ru/browse/PSBM-120713 Signed-off-by: Kirill Tkhai --- kernel/ve/vznetstat/ip6_vznetstat.c |4 kernel/ve/vznetstat/ip_vznetstat.c |8 2 files changed, 12 insertions(+) diff --git a/kernel/ve/vznetstat/ip6_vznetstat.c b/kernel/ve/vznetstat/ip6_vznetstat.c index af095ee53045..1617de3cf0ad 100644 --- a/kernel/ve/vznetstat/ip6_vznetstat.c +++ b/kernel/ve/vznetstat/ip6_vznetstat.c @@ -21,6 +21,7 @@ #include #include #include +#include static unsigned int venet_acct_in_hook_v6(const struct nf_hook_ops *hook, @@ -46,10 +47,13 @@ venet_acct_out_hook_v6(const struct nf_hook_ops *hook, const struct net_device *out, const struct nf_hook_state *state) { + struct dst_entry *dst = skb_dst(skb); int res = NF_ACCEPT; if (out->flags & IFF_LOOPBACK) goto out; + if (dst && (dst->dev->flags & IFF_LOOPBACK)) + goto out; skb->protocol = __constant_htons(ETH_P_IPV6); venet_acct_classify_add_outgoing(out->nd_net->owner_ve->stat, skb); diff --git a/kernel/ve/vznetstat/ip_vznetstat.c b/kernel/ve/vznetstat/ip_vznetstat.c index d96065768ab3..5ea978d6dd88 100644 --- a/kernel/ve/vznetstat/ip_vznetstat.c +++ b/kernel/ve/vznetstat/ip_vznetstat.c @@ -77,6 +77,7 @@ static unsigned int venet_acct_out_hook(const struct nf_hook_ops *hook, const struct net_device *out, const struct nf_hook_state *state) { + struct dst_entry *dst = skb_dst(skb); int res; res = NF_ACCEPT; @@ -84,6 +85,13 @@ static unsigned int venet_acct_out_hook(const struct nf_hook_ops *hook, /* Skip loopback dev */ if (out->flags & IFF_LOOPBACK) goto out; + /* +* @skb is routed to loopback. Say, your eth0 has address 10.94.86.184 +* and ip_hdr(skb)->saddr == ip_hdr(skb)->daddr == 10.94.86.184. +* Then, @out is eth0 and we skip @skb in the above check. +*/ + if (dst && (dst->dev->flags & IFF_LOOPBACK)) + goto out; /* Paranoia */ if (unlikely(!pskb_may_pull(skb, sizeof(struct iphdr ___ Devel mailing list Devel@openvz.org https://lists.openvz.org/mailman/listinfo/devel
[Devel] [PATCH RH7 2/3] vznetstat: Simplify venet_acct_out_hook()
Signed-off-by: Kirill Tkhai --- kernel/ve/vznetstat/ip_vznetstat.c |2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/ve/vznetstat/ip_vznetstat.c b/kernel/ve/vznetstat/ip_vznetstat.c index 999a93b84cab..d96065768ab3 100644 --- a/kernel/ve/vznetstat/ip_vznetstat.c +++ b/kernel/ve/vznetstat/ip_vznetstat.c @@ -82,7 +82,7 @@ static unsigned int venet_acct_out_hook(const struct nf_hook_ops *hook, res = NF_ACCEPT; /* Skip loopback dev */ - if (out == dev_net(out)->loopback_dev) + if (out->flags & IFF_LOOPBACK) goto out; /* Paranoia */ ___ Devel mailing list Devel@openvz.org https://lists.openvz.org/mailman/listinfo/devel
[Devel] [PATCH RH7 1/3] vznetstat: Kill unused venet_acct_classify_sub_outgoing()
Signed-off-by: Kirill Tkhai --- include/linux/vznetstat.h |3 --- kernel/ve/vznetstat/vznetstat.c | 10 -- 2 files changed, 13 deletions(-) diff --git a/include/linux/vznetstat.h b/include/linux/vznetstat.h index ca5da0b18ed6..1b8bbce7fd24 100644 --- a/include/linux/vznetstat.h +++ b/include/linux/vznetstat.h @@ -59,7 +59,6 @@ void venet_acct_put_stat(struct venet_stat *); void venet_acct_classify_add_incoming(struct venet_stat *, struct sk_buff *skb); void venet_acct_classify_add_outgoing(struct venet_stat *, struct sk_buff *skb); -void venet_acct_classify_sub_outgoing(struct venet_stat *, struct sk_buff *skb); void venet_acct_classify_add_incoming_plain(struct venet_stat *stat, struct ve_addr_struct *src_addr, int data_size); @@ -74,8 +73,6 @@ static inline void venet_acct_classify_add_incoming(struct venet_stat *stat, struct sk_buff *skb) {} static inline void venet_acct_classify_add_outgoing(struct venet_stat *stat, struct sk_buff *skb) {} -static inline void venet_acct_classify_sub_outgoing(struct venet_stat *stat, - struct sk_buff *skb) {} static inline void venet_acct_classify_add_incoming_plain(struct venet_stat *stat, struct ve_addr_struct *src_addr, int data_size) {} diff --git a/kernel/ve/vznetstat/vznetstat.c b/kernel/ve/vznetstat/vznetstat.c index f366325bd91b..aa8d007adbbe 100644 --- a/kernel/ve/vznetstat/vznetstat.c +++ b/kernel/ve/vznetstat/vznetstat.c @@ -753,15 +753,6 @@ void venet_acct_classify_add_outgoing(struct venet_stat *stat, struct sk_buff *s venet_acct_mark(stat, skb, class); } -void venet_acct_classify_sub_outgoing(struct venet_stat *stat, struct sk_buff *skb) -{ - int class; - - class = acct_one_skb(stat, skb, ACCT_OUT, -venet_acct_skb_size(skb)); - /* Do not forget to mark skb for traffic shaper */ - venet_acct_mark(stat, skb, class); -} - void venet_acct_classify_add_incoming_plain(struct venet_stat *stat, struct ve_addr_struct *src_addr, int data_size) { @@ -1190,7 +1181,6 @@ EXPORT_SYMBOL(venet_acct_find_stat); EXPORT_SYMBOL(venet_acct_put_stat); EXPORT_SYMBOL(venet_acct_classify); EXPORT_SYMBOL(venet_acct_classify_add_outgoing); -EXPORT_SYMBOL(venet_acct_classify_sub_outgoing); EXPORT_SYMBOL(venet_acct_classify_add_incoming); EXPORT_SYMBOL(venet_acct_classify_add_incoming_plain); EXPORT_SYMBOL(venet_acct_classify_add_outgoing_plain); ___ Devel mailing list Devel@openvz.org https://lists.openvz.org/mailman/listinfo/devel
Re: [Devel] [PATCH rh7] tun: Silence allocation failer if user asked for too big header
On 10/6/20 11:17 AM, Konstantin Khorenko wrote: > On 10/05/2020 04:42 PM, Andrey Ryabinin wrote: >> Userspace may ask tun device to send packet with ridiculously >> big header and trigger this: >> >> [ cut here ] >> WARNING: CPU: 1 PID: 15366 at mm/page_alloc.c:3548 >> __alloc_pages_nodemask+0x537/0x1200 >> order 19 >= 11, gfp 0x2044d0 >> Call Trace: >> dump_stack+0x19/0x1b >> __warn+0x17f/0x1c0 >> warn_slowpath_fmt+0xad/0xe0 >> __alloc_pages_nodemask+0x537/0x1200 >> kmalloc_large_node+0x5f/0xd0 >> __kmalloc_node_track_caller+0x425/0x630 >> __kmalloc_reserve.isra.33+0x47/0xd0 >> __alloc_skb+0xdd/0x5f0 >> alloc_skb_with_frags+0x8f/0x540 >> sock_alloc_send_pskb+0x5e5/0x940 >> tun_get_user+0x38b/0x24a0 [tun] >> tun_chr_aio_write+0x13a/0x250 [tun] >> do_sync_readv_writev+0xdf/0x1c0 >> do_readv_writev+0x1a5/0x850 >> vfs_writev+0xba/0x190 >> SyS_writev+0x17c/0x340 >> system_call_fastpath+0x25/0x2a >> >> Just add __GFP_NOWARN and silently return -ENOMEM to fix this. >> >> https://jira.sw.ru/browse/PSBM-103639 >> Signed-off-by: Andrey Ryabinin >> --- >> drivers/net/tun.c | 4 ++-- >> include/net/sock.h | 7 +++ >> net/core/sock.c | 9 + >> 3 files changed, 18 insertions(+), 2 deletions(-) >> >> diff --git a/drivers/net/tun.c b/drivers/net/tun.c >> index e95a89ba48b7..c0879c6a9703 100644 >> --- a/drivers/net/tun.c >> +++ b/drivers/net/tun.c >> @@ -1142,8 +1142,8 @@ static struct sk_buff *tun_alloc_skb(struct tun_file >> *tfile, >> if (prepad + len < PAGE_SIZE || !linear) >> linear = len; >> >> - skb = sock_alloc_send_pskb(sk, prepad + linear, len - linear, noblock, >> - , 0); >> + skb = sock_alloc_send_pskb_flags(sk, prepad + linear, len - linear, >> noblock, >> + , 0, __GFP_NOWARN); > > May be __GFP_ORDER_NOWARN ? > __GFP_ORDER_NOWARN doesn't silence the WARN triggered here: if (order >= MAX_ORDER) { WARN_ON_ONCE(!(gfp_mask & __GFP_NOWARN)); return NULL; } ___ Devel mailing list Devel@openvz.org https://lists.openvz.org/mailman/listinfo/devel
[Devel] [PATCH RHEL7 COMMIT] vmscan: don't report reclaim progress if there was no progress.
The commit is pushed to "branch-rh7-3.10.0-1127.18.2.vz7.163.x-ovz" and will appear at https://src.openvz.org/scm/ovz/vzkernel.git after rh7-3.10.0-1127.18.2.vz7.163.33 --> commit 7a23b037273068bf27c72d8cbdfcb6416c001872 Author: Andrey Ryabinin Date: Tue Oct 6 11:19:19 2020 +0300 vmscan: don't report reclaim progress if there was no progress. __alloc_pages_slowpath relies on the direct reclaim and did_some_progress as an indicator that it makes sense to retry allocation rather than declaring OOM. shrink_zones checks if all zones reclaimable and if shrink_zone didn't make any progress it prevents from a premature OOM killer invocation by reporting the progress. This might happen if the LRU is full of dirty or writeback pages and direct reclaim cannot clean those up. zone_reclaimable allows to rescan the reclaimable lists several times and restart if a page is freed. This is really subtle behavior and it might lead to a livelock when a single freed page keeps allocator looping but the current task will not be able to allocate that single page. OOM killer would be more appropriate than looping without any progress for unbounded amount of time. Report no progress even if zones are reclaimable as OOM is more appropiate in that case. https://jira.sw.ru/browse/PSBM-104900 Signed-off-by: Andrey Ryabinin --- mm/vmscan.c | 24 1 file changed, 24 deletions(-) diff --git a/mm/vmscan.c b/mm/vmscan.c index 13ae9bd..85622f2 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -2952,26 +2952,6 @@ static void snapshot_refaults(struct mem_cgroup *root_memcg, struct zone *zone) } while ((memcg = mem_cgroup_iter(root_memcg, memcg, NULL))); } -/* All zones in zonelist are unreclaimable? */ -static bool all_unreclaimable(struct zonelist *zonelist, - struct scan_control *sc) -{ - struct zoneref *z; - struct zone *zone; - - for_each_zone_zonelist_nodemask(zone, z, zonelist, - gfp_zone(sc->gfp_mask), sc->nodemask) { - if (!populated_zone(zone)) - continue; - if (!cpuset_zone_allowed_hardwall(zone, GFP_KERNEL)) - continue; - if (zone_reclaimable(zone)) - return false; - } - - return true; -} - static void shrink_tcrutches(struct scan_control *scan_ctrl) { int nid; @@ -3097,10 +3077,6 @@ out: goto retry; } - /* top priority shrink_zones still had more to do? don't OOM, then */ - if (global_reclaim(sc) && !all_unreclaimable(zonelist, sc)) - return 1; - return 0; } ___ Devel mailing list Devel@openvz.org https://lists.openvz.org/mailman/listinfo/devel
[Devel] [PATCH RHEL7 COMMIT] tun: Silence allocation failer if user asked for too big header
The commit is pushed to "branch-rh7-3.10.0-1127.18.2.vz7.163.x-ovz" and will appear at https://src.openvz.org/scm/ovz/vzkernel.git after rh7-3.10.0-1127.18.2.vz7.163.33 --> commit 1e0ad3477bddaf5621b7cc620e6ed64e405ec8cd Author: Andrey Ryabinin Date: Tue Oct 6 11:19:10 2020 +0300 tun: Silence allocation failer if user asked for too big header Userspace may ask tun device to send packet with ridiculously big header and trigger this: [ cut here ] WARNING: CPU: 1 PID: 15366 at mm/page_alloc.c:3548 __alloc_pages_nodemask+0x537/0x1200 order 19 >= 11, gfp 0x2044d0 Call Trace: dump_stack+0x19/0x1b __warn+0x17f/0x1c0 warn_slowpath_fmt+0xad/0xe0 __alloc_pages_nodemask+0x537/0x1200 kmalloc_large_node+0x5f/0xd0 __kmalloc_node_track_caller+0x425/0x630 __kmalloc_reserve.isra.33+0x47/0xd0 __alloc_skb+0xdd/0x5f0 alloc_skb_with_frags+0x8f/0x540 sock_alloc_send_pskb+0x5e5/0x940 tun_get_user+0x38b/0x24a0 [tun] tun_chr_aio_write+0x13a/0x250 [tun] do_sync_readv_writev+0xdf/0x1c0 do_readv_writev+0x1a5/0x850 vfs_writev+0xba/0x190 SyS_writev+0x17c/0x340 system_call_fastpath+0x25/0x2a Just add __GFP_NOWARN and silently return -ENOMEM to fix this. https://jira.sw.ru/browse/PSBM-103639 Signed-off-by: Andrey Ryabinin --- drivers/net/tun.c | 4 ++-- include/net/sock.h | 7 +++ net/core/sock.c| 9 + 3 files changed, 18 insertions(+), 2 deletions(-) diff --git a/drivers/net/tun.c b/drivers/net/tun.c index e95a89b..c0879c6 100644 --- a/drivers/net/tun.c +++ b/drivers/net/tun.c @@ -1142,8 +1142,8 @@ static struct sk_buff *tun_alloc_skb(struct tun_file *tfile, if (prepad + len < PAGE_SIZE || !linear) linear = len; - skb = sock_alloc_send_pskb(sk, prepad + linear, len - linear, noblock, - , 0); + skb = sock_alloc_send_pskb_flags(sk, prepad + linear, len - linear, noblock, + , 0, __GFP_NOWARN); if (!skb) return ERR_PTR(err); diff --git a/include/net/sock.h b/include/net/sock.h index 4136d2c..1912d85 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -1626,6 +1626,13 @@ extern struct sk_buff *sock_alloc_send_pskb(struct sock *sk, int noblock, int *errcode, int max_page_order); +extern struct sk_buff *sock_alloc_send_pskb_flags(struct sock *sk, + unsigned long header_len, + unsigned long data_len, + int noblock, + int *errcode, + int max_page_order, + gfp_t extra_flags); extern void *sock_kmalloc(struct sock *sk, int size, gfp_t priority); extern void sock_kfree_s(struct sock *sk, void *mem, int size); diff --git a/net/core/sock.c b/net/core/sock.c index 508fc60..07ea42f 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -1964,6 +1964,15 @@ struct sk_buff *sock_alloc_send_pskb(struct sock *sk, unsigned long header_len, } EXPORT_SYMBOL(sock_alloc_send_pskb); +struct sk_buff *sock_alloc_send_pskb_flags(struct sock *sk, unsigned long header_len, +unsigned long data_len, int noblock, +int *errcode, int max_page_order, gfp_t extra_flags) +{ + return __sock_alloc_send_pskb(sk, header_len, data_len, noblock, + errcode, max_page_order, extra_flags); +} +EXPORT_SYMBOL(sock_alloc_send_pskb_flags); + struct sk_buff *sock_alloc_send_skb(struct sock *sk, unsigned long size, int noblock, int *errcode) { ___ Devel mailing list Devel@openvz.org https://lists.openvz.org/mailman/listinfo/devel
Re: [Devel] [PATCH rh7] tun: Silence allocation failer if user asked for too big header
On 10/05/2020 04:42 PM, Andrey Ryabinin wrote: Userspace may ask tun device to send packet with ridiculously big header and trigger this: [ cut here ] WARNING: CPU: 1 PID: 15366 at mm/page_alloc.c:3548 __alloc_pages_nodemask+0x537/0x1200 order 19 >= 11, gfp 0x2044d0 Call Trace: dump_stack+0x19/0x1b __warn+0x17f/0x1c0 warn_slowpath_fmt+0xad/0xe0 __alloc_pages_nodemask+0x537/0x1200 kmalloc_large_node+0x5f/0xd0 __kmalloc_node_track_caller+0x425/0x630 __kmalloc_reserve.isra.33+0x47/0xd0 __alloc_skb+0xdd/0x5f0 alloc_skb_with_frags+0x8f/0x540 sock_alloc_send_pskb+0x5e5/0x940 tun_get_user+0x38b/0x24a0 [tun] tun_chr_aio_write+0x13a/0x250 [tun] do_sync_readv_writev+0xdf/0x1c0 do_readv_writev+0x1a5/0x850 vfs_writev+0xba/0x190 SyS_writev+0x17c/0x340 system_call_fastpath+0x25/0x2a Just add __GFP_NOWARN and silently return -ENOMEM to fix this. https://jira.sw.ru/browse/PSBM-103639 Signed-off-by: Andrey Ryabinin --- drivers/net/tun.c | 4 ++-- include/net/sock.h | 7 +++ net/core/sock.c| 9 + 3 files changed, 18 insertions(+), 2 deletions(-) diff --git a/drivers/net/tun.c b/drivers/net/tun.c index e95a89ba48b7..c0879c6a9703 100644 --- a/drivers/net/tun.c +++ b/drivers/net/tun.c @@ -1142,8 +1142,8 @@ static struct sk_buff *tun_alloc_skb(struct tun_file *tfile, if (prepad + len < PAGE_SIZE || !linear) linear = len; - skb = sock_alloc_send_pskb(sk, prepad + linear, len - linear, noblock, - , 0); + skb = sock_alloc_send_pskb_flags(sk, prepad + linear, len - linear, noblock, + , 0, __GFP_NOWARN); May be __GFP_ORDER_NOWARN ? if (!skb) return ERR_PTR(err); diff --git a/include/net/sock.h b/include/net/sock.h index 4136d2c3080c..1912d85ecc4d 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -1626,6 +1626,13 @@ extern struct sk_buff *sock_alloc_send_pskb(struct sock *sk, int noblock, int *errcode, int max_page_order); +extern struct sk_buff *sock_alloc_send_pskb_flags(struct sock *sk, + unsigned long header_len, + unsigned long data_len, + int noblock, + int *errcode, + int max_page_order, + gfp_t extra_flags); extern void *sock_kmalloc(struct sock *sk, int size, gfp_t priority); extern void sock_kfree_s(struct sock *sk, void *mem, int size); diff --git a/net/core/sock.c b/net/core/sock.c index 508fc6093a26..07ea42f976cf 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -1964,6 +1964,15 @@ struct sk_buff *sock_alloc_send_pskb(struct sock *sk, unsigned long header_len, } EXPORT_SYMBOL(sock_alloc_send_pskb); +struct sk_buff *sock_alloc_send_pskb_flags(struct sock *sk, unsigned long header_len, +unsigned long data_len, int noblock, +int *errcode, int max_page_order, gfp_t extra_flags) +{ + return __sock_alloc_send_pskb(sk, header_len, data_len, noblock, + errcode, max_page_order, extra_flags); +} +EXPORT_SYMBOL(sock_alloc_send_pskb_flags); + struct sk_buff *sock_alloc_send_skb(struct sock *sk, unsigned long size, int noblock, int *errcode) { ___ Devel mailing list Devel@openvz.org https://lists.openvz.org/mailman/listinfo/devel