[Devel] [PATCH RH7 3/3] vznetstat: Skip local skb going from !IFF_LOOPBACK interface

2020-10-06 Thread Kirill Tkhai
Local packet may be sent not only by 127.0.0.1. Say, if we have eth0 with 
10.94.86.184,
and both server and client use this address to communicate, @out interface will 
be eth0,
while in real packets will be transmitted thru loopback inside single net ns.
We don't want vznetstat mark such the packets, because these marks conflict 
with ordinary
iptables rules.

Since venet_acct_in_ops executed at NF_INET_LOCAL_OUT stage, dst may be NULL (I 
assume
this after ip_queue_xmit(), where skb_rtable() may be NULL before routing).
We leave both checks (out->flags and this new). It looks like we should think 
about
making venet_acct_in_ops as NF_INET_POST_ROUTING hook, and kill out->flags 
check and
dst should be not zero there.

Lastly, I attach one of paths we come to the hook (for reviewers and history):

[76498.851548]  [] venet_acct_out_hook+0xef/0x150 
[ip_vznetstat]
[76498.856342]  [] nf_iterate+0x98/0xe0
[76498.860179]  [] nf_hook_slow+0xa8/0x110
[76498.864098]  [] __ip_local_out_sk+0x102/0x110
[76498.868028]  [] ? ip_forward_options+0x1c0/0x1c0
[76498.872302]  [] ip_local_out_sk+0x1b/0x40
[76498.876054]  [] ip_queue_xmit+0x144/0x3c0
[76498.880126]  [] tcp_transmit_skb+0x4e4/0x9e0
[76498.883983]  [] tcp_write_xmit+0x18a/0xd40
[76498.888200]  [] __tcp_push_pending_frames+0x2e/0xc0
[76498.892368]  [] tcp_push+0xec/0x120
[76498.896262]  [] tcp_sendmsg+0xd2/0xc60
[76498.900257]  [] ? __schedule+0x402/0x990
[76498.904251]  [] inet_sendmsg+0x69/0xb0
[76498.907751]  [] sock_aio_write+0x15d/0x180
[76498.911435]  [] ? try_to_wake_up+0x255/0x470
[76498.915473]  [] do_sync_write+0x96/0xe0
[76498.919402]  [] vfs_write+0x1c5/0x1f0
[76498.922945]  [] SyS_write+0x7f/0xf0
[76498.926721]  [] ? sys_rt_sigreturn+0xe8/0x100
[76498.930878]  [] system_call_fastpath+0x25/0x2a

https://jira.sw.ru/browse/PSBM-120713

Signed-off-by: Kirill Tkhai 
---
 kernel/ve/vznetstat/ip6_vznetstat.c |4 
 kernel/ve/vznetstat/ip_vznetstat.c  |8 
 2 files changed, 12 insertions(+)

diff --git a/kernel/ve/vznetstat/ip6_vznetstat.c 
b/kernel/ve/vznetstat/ip6_vznetstat.c
index af095ee53045..1617de3cf0ad 100644
--- a/kernel/ve/vznetstat/ip6_vznetstat.c
+++ b/kernel/ve/vznetstat/ip6_vznetstat.c
@@ -21,6 +21,7 @@
 #include 
 #include 
 #include 
+#include 
 
 static unsigned int
 venet_acct_in_hook_v6(const struct nf_hook_ops *hook,
@@ -46,10 +47,13 @@ venet_acct_out_hook_v6(const struct nf_hook_ops *hook,
const struct net_device *out,
const struct nf_hook_state *state)
 {
+   struct dst_entry *dst = skb_dst(skb);
int res = NF_ACCEPT;
 
if (out->flags & IFF_LOOPBACK)
goto out;
+   if (dst && (dst->dev->flags & IFF_LOOPBACK))
+   goto out;
 
skb->protocol = __constant_htons(ETH_P_IPV6);
venet_acct_classify_add_outgoing(out->nd_net->owner_ve->stat, skb);
diff --git a/kernel/ve/vznetstat/ip_vznetstat.c 
b/kernel/ve/vznetstat/ip_vznetstat.c
index d96065768ab3..5ea978d6dd88 100644
--- a/kernel/ve/vznetstat/ip_vznetstat.c
+++ b/kernel/ve/vznetstat/ip_vznetstat.c
@@ -77,6 +77,7 @@ static unsigned int venet_acct_out_hook(const struct 
nf_hook_ops *hook,
const struct net_device *out,
const struct nf_hook_state *state)
 {
+   struct dst_entry *dst = skb_dst(skb);
int res;
 
res = NF_ACCEPT;
@@ -84,6 +85,13 @@ static unsigned int venet_acct_out_hook(const struct 
nf_hook_ops *hook,
/* Skip loopback dev */
if (out->flags & IFF_LOOPBACK)
goto out;
+   /*
+* @skb is routed to loopback. Say, your eth0 has address 10.94.86.184
+* and ip_hdr(skb)->saddr == ip_hdr(skb)->daddr == 10.94.86.184.
+* Then, @out is eth0 and we skip @skb in the above check.
+*/
+   if (dst && (dst->dev->flags & IFF_LOOPBACK))
+   goto out;
 
/* Paranoia */
if (unlikely(!pskb_may_pull(skb, sizeof(struct iphdr


___
Devel mailing list
Devel@openvz.org
https://lists.openvz.org/mailman/listinfo/devel


[Devel] [PATCH RH7 2/3] vznetstat: Simplify venet_acct_out_hook()

2020-10-06 Thread Kirill Tkhai
Signed-off-by: Kirill Tkhai 
---
 kernel/ve/vznetstat/ip_vznetstat.c |2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/kernel/ve/vznetstat/ip_vznetstat.c 
b/kernel/ve/vznetstat/ip_vznetstat.c
index 999a93b84cab..d96065768ab3 100644
--- a/kernel/ve/vznetstat/ip_vznetstat.c
+++ b/kernel/ve/vznetstat/ip_vznetstat.c
@@ -82,7 +82,7 @@ static unsigned int venet_acct_out_hook(const struct 
nf_hook_ops *hook,
res = NF_ACCEPT;
 
/* Skip loopback dev */
-   if (out == dev_net(out)->loopback_dev)
+   if (out->flags & IFF_LOOPBACK)
goto out;
 
/* Paranoia */


___
Devel mailing list
Devel@openvz.org
https://lists.openvz.org/mailman/listinfo/devel


[Devel] [PATCH RH7 1/3] vznetstat: Kill unused venet_acct_classify_sub_outgoing()

2020-10-06 Thread Kirill Tkhai
Signed-off-by: Kirill Tkhai 
---
 include/linux/vznetstat.h   |3 ---
 kernel/ve/vznetstat/vznetstat.c |   10 --
 2 files changed, 13 deletions(-)

diff --git a/include/linux/vznetstat.h b/include/linux/vznetstat.h
index ca5da0b18ed6..1b8bbce7fd24 100644
--- a/include/linux/vznetstat.h
+++ b/include/linux/vznetstat.h
@@ -59,7 +59,6 @@ void   venet_acct_put_stat(struct venet_stat *);
 
 void venet_acct_classify_add_incoming(struct venet_stat *, struct sk_buff 
*skb);
 void venet_acct_classify_add_outgoing(struct venet_stat *, struct sk_buff 
*skb);
-void venet_acct_classify_sub_outgoing(struct venet_stat *, struct sk_buff 
*skb);
 
 void venet_acct_classify_add_incoming_plain(struct venet_stat *stat,
struct ve_addr_struct *src_addr, int data_size);
@@ -74,8 +73,6 @@ static inline void venet_acct_classify_add_incoming(struct 
venet_stat *stat,
struct sk_buff *skb) {}
 static inline void venet_acct_classify_add_outgoing(struct venet_stat *stat,
struct sk_buff *skb) {}
-static inline void venet_acct_classify_sub_outgoing(struct venet_stat *stat,
-   struct sk_buff *skb) {}
 
 static inline void venet_acct_classify_add_incoming_plain(struct venet_stat 
*stat,
struct ve_addr_struct *src_addr, int data_size) {}
diff --git a/kernel/ve/vznetstat/vznetstat.c b/kernel/ve/vznetstat/vznetstat.c
index f366325bd91b..aa8d007adbbe 100644
--- a/kernel/ve/vznetstat/vznetstat.c
+++ b/kernel/ve/vznetstat/vznetstat.c
@@ -753,15 +753,6 @@ void venet_acct_classify_add_outgoing(struct venet_stat 
*stat, struct sk_buff *s
venet_acct_mark(stat, skb, class);
 }
 
-void venet_acct_classify_sub_outgoing(struct venet_stat *stat, struct sk_buff 
*skb)
-{
-   int class;
-
-   class = acct_one_skb(stat, skb, ACCT_OUT, -venet_acct_skb_size(skb));
-   /* Do not forget to mark skb for traffic shaper */
-   venet_acct_mark(stat, skb, class);
-}
-
 void venet_acct_classify_add_incoming_plain(struct venet_stat *stat,
struct ve_addr_struct *src_addr, int data_size)
 {
@@ -1190,7 +1181,6 @@ EXPORT_SYMBOL(venet_acct_find_stat);
 EXPORT_SYMBOL(venet_acct_put_stat);
 EXPORT_SYMBOL(venet_acct_classify);
 EXPORT_SYMBOL(venet_acct_classify_add_outgoing);
-EXPORT_SYMBOL(venet_acct_classify_sub_outgoing);
 EXPORT_SYMBOL(venet_acct_classify_add_incoming);
 EXPORT_SYMBOL(venet_acct_classify_add_incoming_plain);
 EXPORT_SYMBOL(venet_acct_classify_add_outgoing_plain);


___
Devel mailing list
Devel@openvz.org
https://lists.openvz.org/mailman/listinfo/devel


Re: [Devel] [PATCH rh7] tun: Silence allocation failer if user asked for too big header

2020-10-06 Thread Andrey Ryabinin


On 10/6/20 11:17 AM, Konstantin Khorenko wrote:
> On 10/05/2020 04:42 PM, Andrey Ryabinin wrote:
>> Userspace may ask tun device to send packet with ridiculously
>> big header and trigger this:
>>
>>  [ cut here ]
>>  WARNING: CPU: 1 PID: 15366 at mm/page_alloc.c:3548 
>> __alloc_pages_nodemask+0x537/0x1200
>>  order 19 >= 11, gfp 0x2044d0
>>  Call Trace:
>>    dump_stack+0x19/0x1b
>>    __warn+0x17f/0x1c0
>>    warn_slowpath_fmt+0xad/0xe0
>>    __alloc_pages_nodemask+0x537/0x1200
>>    kmalloc_large_node+0x5f/0xd0
>>    __kmalloc_node_track_caller+0x425/0x630
>>    __kmalloc_reserve.isra.33+0x47/0xd0
>>    __alloc_skb+0xdd/0x5f0
>>    alloc_skb_with_frags+0x8f/0x540
>>    sock_alloc_send_pskb+0x5e5/0x940
>>    tun_get_user+0x38b/0x24a0 [tun]
>>    tun_chr_aio_write+0x13a/0x250 [tun]
>>    do_sync_readv_writev+0xdf/0x1c0
>>    do_readv_writev+0x1a5/0x850
>>    vfs_writev+0xba/0x190
>>    SyS_writev+0x17c/0x340
>>    system_call_fastpath+0x25/0x2a
>>
>> Just add __GFP_NOWARN and silently return -ENOMEM to fix this.
>>
>> https://jira.sw.ru/browse/PSBM-103639
>> Signed-off-by: Andrey Ryabinin 
>> ---
>>  drivers/net/tun.c  | 4 ++--
>>  include/net/sock.h | 7 +++
>>  net/core/sock.c    | 9 +
>>  3 files changed, 18 insertions(+), 2 deletions(-)
>>
>> diff --git a/drivers/net/tun.c b/drivers/net/tun.c
>> index e95a89ba48b7..c0879c6a9703 100644
>> --- a/drivers/net/tun.c
>> +++ b/drivers/net/tun.c
>> @@ -1142,8 +1142,8 @@ static struct sk_buff *tun_alloc_skb(struct tun_file 
>> *tfile,
>>  if (prepad + len < PAGE_SIZE || !linear)
>>  linear = len;
>>
>> -    skb = sock_alloc_send_pskb(sk, prepad + linear, len - linear, noblock,
>> -   , 0);
>> +    skb = sock_alloc_send_pskb_flags(sk, prepad + linear, len - linear, 
>> noblock,
>> +    , 0, __GFP_NOWARN);
> 
> May be __GFP_ORDER_NOWARN ?
> 

__GFP_ORDER_NOWARN doesn't silence the WARN triggered here:
if (order >= MAX_ORDER) {
WARN_ON_ONCE(!(gfp_mask & __GFP_NOWARN));
return NULL;
}




___
Devel mailing list
Devel@openvz.org
https://lists.openvz.org/mailman/listinfo/devel


[Devel] [PATCH RHEL7 COMMIT] vmscan: don't report reclaim progress if there was no progress.

2020-10-06 Thread Vasily Averin
The commit is pushed to "branch-rh7-3.10.0-1127.18.2.vz7.163.x-ovz" and will 
appear at https://src.openvz.org/scm/ovz/vzkernel.git
after rh7-3.10.0-1127.18.2.vz7.163.33
-->
commit 7a23b037273068bf27c72d8cbdfcb6416c001872
Author: Andrey Ryabinin 
Date:   Tue Oct 6 11:19:19 2020 +0300

vmscan: don't report reclaim progress if there was no progress.

__alloc_pages_slowpath relies on the direct reclaim and did_some_progress
as an indicator that it makes sense to retry allocation rather than
declaring OOM. shrink_zones checks if all zones reclaimable and if
shrink_zone didn't make any progress it prevents from a premature OOM
killer invocation by reporting the progress.
This might happen if the LRU is full of dirty or writeback pages
and direct reclaim cannot clean those up.

zone_reclaimable allows to rescan the reclaimable lists several times
and restart if a page is freed.  This is really subtle behavior and it
might lead to a livelock when a single freed page keeps allocator
looping but the current task will not be able to allocate that single
page.  OOM killer would be more appropriate than looping without any
progress for unbounded amount of time.

Report no progress even if zones are reclaimable as OOM is more appropiate
in that case.

https://jira.sw.ru/browse/PSBM-104900
Signed-off-by: Andrey Ryabinin 
---
 mm/vmscan.c | 24 
 1 file changed, 24 deletions(-)

diff --git a/mm/vmscan.c b/mm/vmscan.c
index 13ae9bd..85622f2 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -2952,26 +2952,6 @@ static void snapshot_refaults(struct mem_cgroup 
*root_memcg, struct zone *zone)
} while ((memcg = mem_cgroup_iter(root_memcg, memcg, NULL)));
 }
 
-/* All zones in zonelist are unreclaimable? */
-static bool all_unreclaimable(struct zonelist *zonelist,
-   struct scan_control *sc)
-{
-   struct zoneref *z;
-   struct zone *zone;
-
-   for_each_zone_zonelist_nodemask(zone, z, zonelist,
-   gfp_zone(sc->gfp_mask), sc->nodemask) {
-   if (!populated_zone(zone))
-   continue;
-   if (!cpuset_zone_allowed_hardwall(zone, GFP_KERNEL))
-   continue;
-   if (zone_reclaimable(zone))
-   return false;
-   }
-
-   return true;
-}
-
 static void shrink_tcrutches(struct scan_control *scan_ctrl)
 {
int nid;
@@ -3097,10 +3077,6 @@ out:
goto retry;
}
 
-   /* top priority shrink_zones still had more to do? don't OOM, then */
-   if (global_reclaim(sc) && !all_unreclaimable(zonelist, sc))
-   return 1;
-
return 0;
 }
 
___
Devel mailing list
Devel@openvz.org
https://lists.openvz.org/mailman/listinfo/devel


[Devel] [PATCH RHEL7 COMMIT] tun: Silence allocation failer if user asked for too big header

2020-10-06 Thread Vasily Averin
The commit is pushed to "branch-rh7-3.10.0-1127.18.2.vz7.163.x-ovz" and will 
appear at https://src.openvz.org/scm/ovz/vzkernel.git
after rh7-3.10.0-1127.18.2.vz7.163.33
-->
commit 1e0ad3477bddaf5621b7cc620e6ed64e405ec8cd
Author: Andrey Ryabinin 
Date:   Tue Oct 6 11:19:10 2020 +0300

tun: Silence allocation failer if user asked for too big header

Userspace may ask tun device to send packet with ridiculously
big header and trigger this:

 [ cut here ]
 WARNING: CPU: 1 PID: 15366 at mm/page_alloc.c:3548 
__alloc_pages_nodemask+0x537/0x1200
 order 19 >= 11, gfp 0x2044d0
 Call Trace:
   dump_stack+0x19/0x1b
   __warn+0x17f/0x1c0
   warn_slowpath_fmt+0xad/0xe0
   __alloc_pages_nodemask+0x537/0x1200
   kmalloc_large_node+0x5f/0xd0
   __kmalloc_node_track_caller+0x425/0x630
   __kmalloc_reserve.isra.33+0x47/0xd0
   __alloc_skb+0xdd/0x5f0
   alloc_skb_with_frags+0x8f/0x540
   sock_alloc_send_pskb+0x5e5/0x940
   tun_get_user+0x38b/0x24a0 [tun]
   tun_chr_aio_write+0x13a/0x250 [tun]
   do_sync_readv_writev+0xdf/0x1c0
   do_readv_writev+0x1a5/0x850
   vfs_writev+0xba/0x190
   SyS_writev+0x17c/0x340
   system_call_fastpath+0x25/0x2a

Just add __GFP_NOWARN and silently return -ENOMEM to fix this.

https://jira.sw.ru/browse/PSBM-103639
Signed-off-by: Andrey Ryabinin 
---
 drivers/net/tun.c  | 4 ++--
 include/net/sock.h | 7 +++
 net/core/sock.c| 9 +
 3 files changed, 18 insertions(+), 2 deletions(-)

diff --git a/drivers/net/tun.c b/drivers/net/tun.c
index e95a89b..c0879c6 100644
--- a/drivers/net/tun.c
+++ b/drivers/net/tun.c
@@ -1142,8 +1142,8 @@ static struct sk_buff *tun_alloc_skb(struct tun_file 
*tfile,
if (prepad + len < PAGE_SIZE || !linear)
linear = len;
 
-   skb = sock_alloc_send_pskb(sk, prepad + linear, len - linear, noblock,
-  , 0);
+   skb = sock_alloc_send_pskb_flags(sk, prepad + linear, len - linear, 
noblock,
+   , 0, __GFP_NOWARN);
if (!skb)
return ERR_PTR(err);
 
diff --git a/include/net/sock.h b/include/net/sock.h
index 4136d2c..1912d85 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -1626,6 +1626,13 @@ extern struct sk_buff
*sock_alloc_send_pskb(struct sock *sk,
  int noblock,
  int *errcode,
  int max_page_order);
+extern struct sk_buff  *sock_alloc_send_pskb_flags(struct sock *sk,
+ unsigned long header_len,
+ unsigned long data_len,
+ int noblock,
+ int *errcode,
+ int max_page_order,
+ gfp_t extra_flags);
 extern void *sock_kmalloc(struct sock *sk, int size,
  gfp_t priority);
 extern void sock_kfree_s(struct sock *sk, void *mem, int size);
diff --git a/net/core/sock.c b/net/core/sock.c
index 508fc60..07ea42f 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -1964,6 +1964,15 @@ struct sk_buff *sock_alloc_send_pskb(struct sock *sk, 
unsigned long header_len,
 }
 EXPORT_SYMBOL(sock_alloc_send_pskb);
 
+struct sk_buff *sock_alloc_send_pskb_flags(struct sock *sk, unsigned long 
header_len,
+unsigned long data_len, int noblock,
+int *errcode, int max_page_order, gfp_t 
extra_flags)
+{
+   return __sock_alloc_send_pskb(sk, header_len, data_len, noblock,
+   errcode, max_page_order, extra_flags);
+}
+EXPORT_SYMBOL(sock_alloc_send_pskb_flags);
+
 struct sk_buff *sock_alloc_send_skb(struct sock *sk, unsigned long size,
int noblock, int *errcode)
 {
___
Devel mailing list
Devel@openvz.org
https://lists.openvz.org/mailman/listinfo/devel


Re: [Devel] [PATCH rh7] tun: Silence allocation failer if user asked for too big header

2020-10-06 Thread Konstantin Khorenko

On 10/05/2020 04:42 PM, Andrey Ryabinin wrote:

Userspace may ask tun device to send packet with ridiculously
big header and trigger this:

 [ cut here ]
 WARNING: CPU: 1 PID: 15366 at mm/page_alloc.c:3548 
__alloc_pages_nodemask+0x537/0x1200
 order 19 >= 11, gfp 0x2044d0
 Call Trace:
   dump_stack+0x19/0x1b
   __warn+0x17f/0x1c0
   warn_slowpath_fmt+0xad/0xe0
   __alloc_pages_nodemask+0x537/0x1200
   kmalloc_large_node+0x5f/0xd0
   __kmalloc_node_track_caller+0x425/0x630
   __kmalloc_reserve.isra.33+0x47/0xd0
   __alloc_skb+0xdd/0x5f0
   alloc_skb_with_frags+0x8f/0x540
   sock_alloc_send_pskb+0x5e5/0x940
   tun_get_user+0x38b/0x24a0 [tun]
   tun_chr_aio_write+0x13a/0x250 [tun]
   do_sync_readv_writev+0xdf/0x1c0
   do_readv_writev+0x1a5/0x850
   vfs_writev+0xba/0x190
   SyS_writev+0x17c/0x340
   system_call_fastpath+0x25/0x2a

Just add __GFP_NOWARN and silently return -ENOMEM to fix this.

https://jira.sw.ru/browse/PSBM-103639
Signed-off-by: Andrey Ryabinin 
---
 drivers/net/tun.c  | 4 ++--
 include/net/sock.h | 7 +++
 net/core/sock.c| 9 +
 3 files changed, 18 insertions(+), 2 deletions(-)

diff --git a/drivers/net/tun.c b/drivers/net/tun.c
index e95a89ba48b7..c0879c6a9703 100644
--- a/drivers/net/tun.c
+++ b/drivers/net/tun.c
@@ -1142,8 +1142,8 @@ static struct sk_buff *tun_alloc_skb(struct tun_file 
*tfile,
if (prepad + len < PAGE_SIZE || !linear)
linear = len;

-   skb = sock_alloc_send_pskb(sk, prepad + linear, len - linear, noblock,
-  , 0);
+   skb = sock_alloc_send_pskb_flags(sk, prepad + linear, len - linear, 
noblock,
+   , 0, __GFP_NOWARN);


May be __GFP_ORDER_NOWARN ?


if (!skb)
return ERR_PTR(err);

diff --git a/include/net/sock.h b/include/net/sock.h
index 4136d2c3080c..1912d85ecc4d 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -1626,6 +1626,13 @@ extern struct sk_buff
*sock_alloc_send_pskb(struct sock *sk,
  int noblock,
  int *errcode,
  int max_page_order);
+extern struct sk_buff  *sock_alloc_send_pskb_flags(struct sock *sk,
+ unsigned long header_len,
+ unsigned long data_len,
+ int noblock,
+ int *errcode,
+ int max_page_order,
+ gfp_t extra_flags);
 extern void *sock_kmalloc(struct sock *sk, int size,
  gfp_t priority);
 extern void sock_kfree_s(struct sock *sk, void *mem, int size);
diff --git a/net/core/sock.c b/net/core/sock.c
index 508fc6093a26..07ea42f976cf 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -1964,6 +1964,15 @@ struct sk_buff *sock_alloc_send_pskb(struct sock *sk, 
unsigned long header_len,
 }
 EXPORT_SYMBOL(sock_alloc_send_pskb);

+struct sk_buff *sock_alloc_send_pskb_flags(struct sock *sk, unsigned long 
header_len,
+unsigned long data_len, int noblock,
+int *errcode, int max_page_order, gfp_t 
extra_flags)
+{
+   return __sock_alloc_send_pskb(sk, header_len, data_len, noblock,
+   errcode, max_page_order, extra_flags);
+}
+EXPORT_SYMBOL(sock_alloc_send_pskb_flags);
+
 struct sk_buff *sock_alloc_send_skb(struct sock *sk, unsigned long size,
int noblock, int *errcode)
 {


___
Devel mailing list
Devel@openvz.org
https://lists.openvz.org/mailman/listinfo/devel