[Devel] [PATCH rh7] net: Kill hold_net release_net

2015-11-24 Thread Stanislav Kinsburskiy
hold_net and release_net were an idea that turned out to be useless.
The code has been disabled since 2008.  Kill the code it is long past due.

Upstead commit efd7ef1c1929d7a0329d4349252863c04d6f1729

Signed-off-by: "Eric W. Biederman" 
Acked-by: Eric Dumazet 
Signed-off-by: David S. Miller 
Signed-off-by: Stanislav Kinsbursky 
---
 include/linux/netdevice.h |3 +--
 include/net/fib_rules.h   |9 +
 include/net/net_namespace.h   |   30 --
 include/net/sock.h|2 +-
 net/core/dev.c|2 --
 net/core/fib_rules.c  |   17 +++--
 net/core/neighbour.c  |9 ++---
 net/core/net_namespace.c  |   11 ---
 net/core/sock.c   |1 -
 net/ipv4/fib_semantics.c  |3 +--
 net/ipv4/inet_hashtables.c|3 +--
 net/ipv4/inet_timewait_sock.c |3 +--
 net/ipv6/addrlabel.c  |5 +
 net/ipv6/ip6_flowlabel.c  |3 +--
 net/openvswitch/datapath.c|4 +---
 15 files changed, 14 insertions(+), 91 deletions(-)

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 1ed3795..a796a50 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1574,8 +1574,7 @@ static inline
 void dev_net_set(struct net_device *dev, struct net *net)
 {
 #ifdef CONFIG_NET_NS
-   release_net(dev->nd_net);
-   dev->nd_net = hold_net(net);
+   dev->nd_net = net;
 #endif
 }
 
diff --git a/include/net/fib_rules.h b/include/net/fib_rules.h
index e361f48..5b0bbc7 100644
--- a/include/net/fib_rules.h
+++ b/include/net/fib_rules.h
@@ -87,17 +87,10 @@ static inline void fib_rule_get(struct fib_rule *rule)
atomic_inc(&rule->refcnt);
 }
 
-static inline void fib_rule_put_rcu(struct rcu_head *head)
-{
-   struct fib_rule *rule = container_of(head, struct fib_rule, rcu);
-   release_net(rule->fr_net);
-   kfree(rule);
-}
-
 static inline void fib_rule_put(struct fib_rule *rule)
 {
if (atomic_dec_and_test(&rule->refcnt))
-   call_rcu(&rule->rcu, fib_rule_put_rcu);
+   kfree_rcu(rule, rcu);
 }
 
 static inline u32 frh_get_table(struct fib_rule_hdr *frh, struct nlattr **nla)
diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h
index ea17176..9bc1813 100644
--- a/include/net/net_namespace.h
+++ b/include/net/net_namespace.h
@@ -47,11 +47,6 @@ struct net {
atomic_tcount;  /* To decided when the network
 *  namespace should be shut 
down.
 */
-#ifdef NETNS_REFCNT_DEBUG
-   atomic_tuse_count;  /* To track references we
-* destroy on demand
-*/
-#endif
spinlock_t  rules_mod_lock;
 
struct list_headlist;   /* list of network namespaces */
@@ -244,31 +239,6 @@ static inline int net_access_allowed(const struct net 
*net, const struct net *cu
 #endif
 
 
-#ifdef NETNS_REFCNT_DEBUG
-static inline struct net *hold_net(struct net *net)
-{
-   if (net)
-   atomic_inc(&net->use_count);
-   return net;
-}
-
-static inline void release_net(struct net *net)
-{
-   if (net)
-   atomic_dec(&net->use_count);
-}
-
-#else
-static inline struct net *hold_net(struct net *net)
-{
-   return net;
-}
-
-static inline void release_net(struct net *net)
-{
-}
-#endif
-
 #ifdef CONFIG_NET_NS
 
 static inline void write_pnet(struct net **pnet, struct net *net)
diff --git a/include/net/sock.h b/include/net/sock.h
index 315d866..3c5b816 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -2269,7 +2269,7 @@ void sock_net_set(struct sock *sk, struct net *net)
 static inline void sk_change_net(struct sock *sk, struct net *net)
 {
put_net(sock_net(sk));
-   sock_net_set(sk, hold_net(net));
+   sock_net_set(sk, net);
 }
 
 static inline void sk_change_net_get(struct sock *sk, struct net *net)
diff --git a/net/core/dev.c b/net/core/dev.c
index a8dd551..cae648d 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -6019,8 +6019,6 @@ void free_netdev(struct net_device *dev)
return;
}
 
-   release_net(dev_net(dev));
-
kfree(dev->_tx);
 #ifdef CONFIG_RPS
kfree(dev->_rx);
diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c
index bc69cef..66be38b 100644
--- a/net/core/fib_rules.c
+++ b/net/core/fib_rules.c
@@ -31,7 +31,7 @@ int fib_default_rule_add(struct fib_rules_ops *ops,
r->pref = pref;
r->table = table;
r->flags = flags;
-   r->fr_net = hold_net(ops->fro_net);
+   r->fr_net = ops->fro_net;
 
/* The lock is not required here, the list in unreacheable
 * at the moment this function is called */
@@ -117,7 +117,6 @@ static int __fib_rules_register(struct fib_rules_ops *ops)
  

[Devel] [PATCH rh7 v2] tun: expose "owner", "group" and "tun_flags" attributes in container

2015-11-25 Thread Stanislav Kinsburskiy
https://jira.sw.ru/browse/PSBM-39726

Signed-off-by: Stanislav Kinsburskiy 
---
 drivers/net/tun.c |5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/drivers/net/tun.c b/drivers/net/tun.c
index dd14140..e6735c8 100644
--- a/drivers/net/tun.c
+++ b/drivers/net/tun.c
@@ -1742,10 +1742,9 @@ static int tun_set_iff(struct net *net, struct file 
*file, struct ifreq *ifr)
if (err < 0)
goto err_detach;
 
-   if ((dev_net(tun->dev) == &init_net) &&
-   (device_create_file(&tun->dev->dev, &dev_attr_tun_flags) ||
+   if (device_create_file(&tun->dev->dev, &dev_attr_tun_flags) ||
device_create_file(&tun->dev->dev, &dev_attr_owner) ||
-   device_create_file(&tun->dev->dev, &dev_attr_group)))
+   device_create_file(&tun->dev->dev, &dev_attr_group))
pr_err("Failed to create tun sysfs files\n");
}
 

___
Devel mailing list
Devel@openvz.org
https://lists.openvz.org/mailman/listinfo/devel


Re: [Devel] [PATCH rh7] vtty: Register devices in sysfs

2015-12-01 Thread Stanislav Kinsburskiy



01.12.2015 11:17, Cyrill Gorcunov пишет:

On Mon, Nov 30, 2015 at 05:58:15PM +0300, Cyrill Gorcunov wrote:

On Mon, Nov 30, 2015 at 05:47:19PM +0300, Cyrill Gorcunov wrote:

and dev_match_devt helper.

Why not do it from userspace?

You mean via udev?

Drop this question. I got what you mean. Letme check.

Aftre a few conversations it seems the better option is to
merge this fix for Beta2 but for Beta2.5 simply

  - revert all our tty device-namespace code leaving with
native code instead
  - setup proper rules (such as sys/devices/virtual/tty[0-12] rx
in default permissions)
  - drop legacy pty support

which would make code easier to support in future. Actually I
already testing this changes.


How per-ve switch on file open will be managed?
By using get_exec_env()?


___
Devel mailing list
Devel@openvz.org
https://lists.openvz.org/mailman/listinfo/devel


___
Devel mailing list
Devel@openvz.org
https://lists.openvz.org/mailman/listinfo/devel


[Devel] [PATCH rh7] autofs: show pipe inode in mount options

2015-12-03 Thread Stanislav Kinsburskiy
This is required for CRIU to migrate a mount point, when write end in user
space is closed.

https://jira.sw.ru/browse/PSBM-41217

Signed-off-by: Stanislav Kinsburskiy 
---
 fs/autofs4/inode.c |4 
 1 file changed, 4 insertions(+)

diff --git a/fs/autofs4/inode.c b/fs/autofs4/inode.c
index e9f2597..80cc5d7 100644
--- a/fs/autofs4/inode.c
+++ b/fs/autofs4/inode.c
@@ -81,6 +81,10 @@ static int autofs4_show_options(struct seq_file *m, struct 
dentry *root)
return 0;
 
seq_printf(m, ",fd=%d", sbi->pipefd);
+   if (sbi->pipe)
+   seq_printf(m, ",pipe_ino=%ld", sbi->pipe->f_inode->i_ino);
+   else
+   seq_printf(m, ",pipe_ino=-1");
if (!uid_eq(root_inode->i_uid, GLOBAL_ROOT_UID))
seq_printf(m, ",uid=%u",
from_kuid_munged(&init_user_ns, root_inode->i_uid));

___
Devel mailing list
Devel@openvz.org
https://lists.openvz.org/mailman/listinfo/devel


Re: [Devel] [PATCH rh7] autofs: show pipe inode in mount options

2015-12-03 Thread Stanislav Kinsburskiy

https://jira.sw.ru/browse/PSBM-41961

03.12.2015 15:34, Stanislav Kinsburskiy пишет:

This is required for CRIU to migrate a mount point, when write end in user
space is closed.

https://jira.sw.ru/browse/PSBM-41217

Signed-off-by: Stanislav Kinsburskiy 
---
  fs/autofs4/inode.c |4 
  1 file changed, 4 insertions(+)

diff --git a/fs/autofs4/inode.c b/fs/autofs4/inode.c
index e9f2597..80cc5d7 100644
--- a/fs/autofs4/inode.c
+++ b/fs/autofs4/inode.c
@@ -81,6 +81,10 @@ static int autofs4_show_options(struct seq_file *m, struct 
dentry *root)
return 0;
  
  	seq_printf(m, ",fd=%d", sbi->pipefd);

+   if (sbi->pipe)
+   seq_printf(m, ",pipe_ino=%ld", sbi->pipe->f_inode->i_ino);
+   else
+   seq_printf(m, ",pipe_ino=-1");
if (!uid_eq(root_inode->i_uid, GLOBAL_ROOT_UID))
seq_printf(m, ",uid=%u",
from_kuid_munged(&init_user_ns, root_inode->i_uid));

___
Devel mailing list
Devel@openvz.org
https://lists.openvz.org/mailman/listinfo/devel


___
Devel mailing list
Devel@openvz.org
https://lists.openvz.org/mailman/listinfo/devel


[Devel] [PATCH rh7] fcntl: allow pipes with O_DIRECT regardless "odirect_enable" status

2015-12-15 Thread Stanislav Kinsburskiy
This flag represents packetized mode for pipes, which only changes how pipe
holds packets, and shouldn't be affected by "odirect_enable" toggle.

Signed-off-by: Stanislav Kinsburskiy 
---
 fs/fcntl.c |2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fs/fcntl.c b/fs/fcntl.c
index cfa349c..1df1ffa 100644
--- a/fs/fcntl.c
+++ b/fs/fcntl.c
@@ -70,7 +70,7 @@ static int setfl(int fd, struct file * filp, unsigned long 
arg)
struct inode * inode = file_inode(filp);
int error = 0;
 
-   if (!may_use_odirect())
+   if (!S_ISFIFO(filp->f_mode) && !may_use_odirect(filp))
arg &= ~O_DIRECT;
if (ve_fsync_behavior() == FSYNC_NEVER)
arg &= ~O_SYNC;

___
Devel mailing list
Devel@openvz.org
https://lists.openvz.org/mailman/listinfo/devel


Re: [Devel] [RFC rhel7] Disabling mounting cgroups from inside of container

2016-01-18 Thread Stanislav Kinsburskiy



18.01.2016 16:16, Cyrill Gorcunov пишет:

On Mon, Jan 18, 2016 at 01:33:39PM +0300, Konstantin Khorenko wrote:

JFYI: i'm not going to drop rh7-revert-ve-mark because otherwise cgroups 
mounted in 1 CT will be visible from other Containers as well.
(those cgroups which are mounted during CT start or restore).

If we won't revert it we still continue failing with -EBUSY.
As I said it's CRIU specific that we're mounting toplevel cgroups
on the restore (but libvzctl already mounted them as well).

That said without revert we won't able to restore.


It's not clear yet, what is the _exact_ reason of EBUSY.
More investigation have to be performed.
___
Devel mailing list
Devel@openvz.org
https://lists.openvz.org/mailman/listinfo/devel


Re: [Devel] [RFC rhel7] Disabling mounting cgroups from inside of container

2016-01-18 Thread Stanislav Kinsburskiy



18.01.2016 16:16, Cyrill Gorcunov пишет:

On Mon, Jan 18, 2016 at 01:33:39PM +0300, Konstantin Khorenko wrote:

JFYI: i'm not going to drop rh7-revert-ve-mark because otherwise cgroups 
mounted in 1 CT will be visible from other Containers as well.
(those cgroups which are mounted during CT start or restore).

If we won't revert it we still continue failing with -EBUSY.
As I said it's CRIU specific that we're mounting toplevel cgroups
on the restore (but libvzctl already mounted them as well).

That said without revert we won't able to restore.


Ok, now it's clear.
Kostya, please, drop my patch.
It's the only simple way to forbid cgroups mounts completely.
___
Devel mailing list
Devel@openvz.org
https://lists.openvz.org/mailman/listinfo/devel


Re: [Devel] [Q] Connecting a physical HDD/partition to a container

2016-01-27 Thread Stanislav Kinsburskiy



27.01.2016 12:58, Evgenii Shatokhin пишет:

27.01.2016 13:29, Stanislav Kinsburskiу пишет:


27 янв. 2016 г. 11:12 AM пользователь Evgenii Shatokhin 
 написал:


Hi,

Is there a way to make a partition of a physical HDD, say, sdb1 
directly

available to a container, bypassing ploop?



Hi,

There is a way of exposing a device to a container.
First of all, you'll need device node in /dev.
You can create it manually.


I created it with
vzctl set 101 --devnodes sdb1:rw --save

The node is now present in the CT. However, if I try to mount it in 
the CT (mount /dev/sdb1 /mnt), no error is reported but mount seems to 
have no effect. No files are seen in /mnt that are present on that 
partition.


So, I guess, additional steps are needed?



You might need sysfs layout.
Please, strace your mount call and send it as a reply (attached).


___
Devel mailing list
Devel@openvz.org
https://lists.openvz.org/mailman/listinfo/devel


Re: [Devel] [Q] Connecting a physical HDD/partition to a container

2016-01-27 Thread Stanislav Kinsburskiy



27.01.2016 16:02, Evgenii Shatokhin пишет:

27.01.2016 16:38, Igor Sukhih пишет:

On 01/27/2016 04:29 PM, Stanislav Kinsburskiу wrote:

27 янв. 2016 г. 14:24 пользователь Evgenii Shatokhin
 написал:

I created it with
vzctl set 101 --devnodes sdb1:rw --save

The node is now present in the CT. However, if I try to mount it in
the CT (mount /dev/sdb1 /mnt), no error is reported but mount seems to
have no effect. No files are seen in /mnt that are present on that
partition.

So, I guess, additional steps are needed?


You might need sysfs layout.
Please, strace your mount call and send it as a reply (attached).


Here it is.

As far as I can see in strace log, mount tries to open
/sys/dev/block/8:17, indeed. This file is present on the host but 
not in

the CT.


Expectable.
Then you have to expose sysfs for this device to the container.
Igor, could you advice, please?



Lets try to use secondary disk feature first

vzctl set VEID --device-add hdd --device /dev/sda --save


No luck. The CT fails to start with "Failed to configure disk" after 
that.




You can do it manually.
There is a file:

/sys/fs/cgroup/ve/c/ve.sysfs_permissions

which controls sysfs layout.

Please, reply with results of:

# ls -l /sys/dev/block/8:17

I'll send you, what you have to write to this file.


Details:
---
[root@vz-test2]# vzctl set 101 --device-add hdd --device /dev/sdb --save
The device /dev/sdb uuid={d6f09c24-99f3-4ead-8504-4a8ff810490b} has 
been successfully added.


[root@vz-test2]# # vzctl start 101
Starting Container ...
Mount image: /vz/private/6bd8d99b-e20c-4204-931a-f12dea70ea95/root.hdd
Opening delta 
/vz/private/6bd8d99b-e20c-4204-931a-f12dea70ea95/root.hdd/root.hds
Adding delta dev=/dev/ploop41020 
img=/vz/private/6bd8d99b-e20c-4204-931a-f12dea70ea95/root.hdd/root.hds 
(rw)
Mounting /dev/ploop41020p1 at 
/vz/root/6bd8d99b-e20c-4204-931a-f12dea70ea95 fstype=ext4 
data='balloon_ino=12,pfcache_csum,,'

Container is mounted
Os release: 3.10.0-327.3.1.vz7.10.4
Create /etc/tmpfiles.d/device-ploop41020p1.conf
Setting permissions for 
image=/vz/private/6bd8d99b-e20c-4204-931a-f12dea70ea95/root.hdd

Setting permissions 104 dev 0xa030b6c1
Setting mount options for image=/dev/sdb opts=8:17;1 
balloon_ino=12,pfcache_csum,,

Setting permissions for image=/dev/sdb
Setting permissions 104 dev 0x811
Create /etc/tmpfiles.d/device-ploop1p1.conf
Can't open /sys/class/block/ploop1p1/uevent for writing: No such file 
or directory

Failed to configure disk
Cancel init execution
Unmount image: /vz/private/6bd8d99b-e20c-4204-931a-f12dea70ea95/root.hdd
Unmounting file system at /vz/root/6bd8d99b-e20c-4204-931a-f12dea70ea95
Unmounting device /dev/ploop41020
Container is unmounted
Failed to start the Container
---

Regards,
Eugene


___
Devel mailing list
Devel@openvz.org
https://lists.openvz.org/mailman/listinfo/devel


Re: [Devel] call_usermodehelper in containers

2016-02-13 Thread Stanislav Kinsburskiy



13.02.2016 00:39, Ian Kent пишет:

On Fri, 2013-11-15 at 15:54 +0400, Stanislav Kinsbursky wrote:

15.11.2013 15:03, Eric W. Biederman пишет:

Stanislav Kinsbursky  writes:


12.11.2013 17:30, Jeff Layton пишет:

On Tue, 12 Nov 2013 17:02:36 +0400
Stanislav Kinsbursky  wrote:


12.11.2013 15:12, Jeff Layton пишет:

On Mon, 11 Nov 2013 16:47:03 -0800
Greg KH  wrote:


On Mon, Nov 11, 2013 at 07:18:25AM -0500, Jeff Layton
wrote:

We have a bit of a problem wrt to upcalls that use
call_usermodehelper
with containers and I'd like to bring this to some sort
of resolution...

A particularly problematic case (though there are
others) is the
nfsdcltrack upcall. It basically uses
call_usermodehelper to run a
program in userland to track some information on stable
storage for
nfsd.

I thought the discussion at the kernel summit about this
issue was:
- don't do this.
- don't do it.
- if you really need to do this, fix nfsd


Sorry, I couldn't make the kernel summit so I missed that
discussion. I
guess LWN didn't cover it?

In any case, I guess then that we'll either have to come up
with some
way to fix nfsd here, or simply ensure that nfsd can never
be started
unless root in the container has a full set of a full set of
capabilities.

One sort of Rube Goldberg possibility to fix nfsd is:

- when we start nfsd in a container, fork off an extra
kernel thread
  that just sits idle. That thread would need to be a
descendant of the
  userland process that started nfsd, so we'd need to
create it with
  kernel_thread().

- Have the kernel just start up the UMH program in the
init_ns mount
  namespace as it currently does, but also pass the pid
of the idle
  kernel thread to the UMH upcall.

- The program will then use /proc//root and
/proc//ns/* to set
  itself up for doing things properly.

Note that with this mechanism we can't actually run a
different binary
per container, but that's probably fine for most purposes.


Hmmm... Why we can't? We can go a bit further with userspace
idea.

We use UMH some very limited number of user programs. For 2,
actually:
1) /sbin/nfs_cache_getent
2) /sbin/nfsdcltrack


No, the kernel uses them for a lot more than that. Pretty much
all of
the keys API upcalls use it. See all of the callers of
call_usermodehelper. All of them are running user binaries out
of the
kernel, and almost all of them are certainly broken wrt
containers.


If we convert them into proxies, which use /proc//root
and /proc//ns/*, this will allow us to lookup the right
binary.
The only limitation here is presence of this "proxy" binaries
on "host".


Suppose I spawn my own container as a user, using all of this
spiffy
new user namespace stuff. Then I make the kernel use
call_usermodehelper to call the upcall in the init_ns, and then
trick
it into running my new "escape_from_namespace" program with
"real" root
privileges.

I don't think we can reasonably assume that having the kernel
exec an
arbitrary binary inside of a container is safe. Doing so inside
of the
init_ns is marginally more safe, but only marginally so...


And we don't need any significant changes in kernel.

BTW, Jeff, could you remind me, please, why exactly we need to
use UMH to run the binary?
What are this capabilities, which force us to do so?


Nothing _forces_ us to do so, but upcalls are very difficult to
handle,
and UMH has a lot of advantages over a long-running daemon
launched by
userland.

Originally, I created the nfsdcltrack upcall as a running daemon
called
nfsdcld, and the kernel used rpc_pipefs to communicate with it.

Everyone hated it because no one likes to have to run daemons
for
infrequently used upcalls. It's a pain for users to ensure that
it's
running and it's a pain to handle when it isn't. So, I was
encouraged
to turn that instead into a UMH upcall.

But leaving that aside, this problem is a lot larger than just
nfsd. We
have a *lot* of UMH upcalls in the kernel, so this problem is
more
general than just "fixing" nfsd's.


Ok. So we are talking about generic approach to UMH support in a
container (and/or namespace).

Actually, as far as I can see, there are more that one aspect,
which is not supported.
One one them is executing of the right binary. Another one is
capabilities (and maybe there are more, like user namespaces), but
I
don't really care about them for now.
Executing the right binary, actually, is not about namespaces at
all. This is about lookup implementation in VFS
(do_execve_common).




Would be great to unshare FS for forked UHM kthread and swap it to
desired root. This will solve the problem with proper lookup.
However,
as far as I understand, this approach is not welcome by the
community.

I don't understand that one.  Having a preforked thread with the
proper
environment that can act like kthreadd in terms of spawning user
mode
helpers works and is simple.  The only downside I can see is that
there
is extra overhead.


What do you mean by "simple" here? Simp

Re: [Devel] CRIU-2.1.0.1 is out

2016-04-15 Thread Stanislav Kinsburskiy

Hi,

Nothing what I need to develop NFS is present in repo.
NFS development is still based on criu-1.8. And the reason for this, is 
that Makefile is broken starting from 2.0.


14.04.2016 18:36, Cyrill Gorcunov пишет:

Guys, I'm pleased to announce criu 2.1.0.1, it's based on top
of vanilla criu-dev branch which in turn based on official 2.1
release. I've had to rebase on dev branch due to active development
on AIO and NFS area (Kirill and Stanislav, btw, please check that
everything you need is present in repo).

Give it a shot.


___
Devel mailing list
Devel@openvz.org
https://lists.openvz.org/mailman/listinfo/devel


[Devel] [TRD] Autofs migration

2016-04-19 Thread Stanislav Kinsburskiy

1. Feature

Autofs mount points migration via CRIU
https://jira.sw.ru/browse/PSBM-41217

2. Description

CRIU now supports autofs file system migration, including direct, 
indirect and offset mount types.


3. Products

Virtuozzo 7

Packages:
criu-2.1.0.4.vz7
libvzctl-7.0.199

4. Testing

4.1 Basics
** Install criu and libvzctl rpm packages
** Create a container, and check
** Check, that autofs is listed in /proc/filesystems in the container
** Check, that /dev/autofs is accessible
** Install autofs package inside the container
** Follow autofs guide to create an autofs _direct_ mount point 
with some file system, mounted on top (tmpfs, for example). Command "man 
autofs" might help
** Follow autofs guide to create an autofs _indirect_ mount point 
with some file system, mounted on top (tmpfs, for example).
 ** Follow autofs guide to create an autofs _offset_ mount point with 
some file system, mounted on top (tmpfs, for example).

** Suspend and restore container
** Check, that autofs mounts and nested were mounts migrated 
successfully (via /proc, for example).


4.2 Systemd autofs services
** Start any systemds autofs service (for example, 
proc-sys-fs-binfmt_misc.automount) in the container

** Check, that service started successfully
** Suspend and restore container
** Check, that autofs and nested mount points were migrated 
successfully.

** Check, that systemd service has active status
** Unmount nested file system manually
** Access systemd autofs mount point and check, that nested file 
system is re-mounted again


4.3 Automount expiration
** setup autofs mount  with short timeout (10 seconds, for example) 
in a container via any master: automount, systemd or else
** Activate autofs mount point (nested mount point should be 
mounted by autofs master)

** Migrate (or suspend/resume) the container.
** Check, that nested mount point is unmounted after restore within 
timeout.


5. Known issues

Autofs migration has an issue, related to systemd-controlled autofs 
mount points. Systemd saves autofs mount point device number in it's 
internals and compare this number to actual one, taken  from mount 
point, on each autofs request from kernel (mount, umount, expire, etc).
The problem is that after migration all mount points are created 
manually and has _another_ device id, which leads to ignorance of kernel 
requests from systemd side.
This problem can't be solved without some kind of "device namespaces" 
abstraction. However, some of the systemd services like 
proc-sys-fs-binfmt_misc.automount can be painlessly restarted after 
restore, thus illuminating this issue.
Restart of proc-sys-fs-binfmt_misc.automount service is done by CRIU via 
action script, provided by vzctl.


6. What was checked by developer

Both 4.1 and 4.2 test sequences

7. Feature owners

skinsbur...@virtuozzo.com

___
Devel mailing list
Devel@openvz.org
https://lists.openvz.org/mailman/listinfo/devel


[Devel] [PATCH libvzctl 1/4] env setup: alllow fuse device on start

2016-05-13 Thread Stanislav Kinsburskiy
Fuse will be used for network file systems migration

Signed-off-by: Stanislav Kinsburskiy 
---
 lib/env_nsops.c |1 +
 1 file changed, 1 insertion(+)

diff --git a/lib/env_nsops.c b/lib/env_nsops.c
index dc8b062..40eba60 100644
--- a/lib/env_nsops.c
+++ b/lib/env_nsops.c
@@ -452,6 +452,7 @@ static int init_env_cgroup(struct vzctl_env_handle *h)
"c 1:11 mw",/* kmsg */
"c 10:200 rmw", /* tun */
"c 10:235 rwm", /* autofs */
+   "c 10:229 rwm", /* fuse */
};
char *cpu[] = {
"cpuset.cpus",

___
Devel mailing list
Devel@openvz.org
https://lists.openvz.org/mailman/listinfo/devel


[Devel] [PATCH libvzctl 2/4] vz-cpt: NFS suppress added

2016-05-13 Thread Stanislav Kinsburskiy
This patch add usage of new special toggle, which suppresses NFS requests to
server when enabled.
Required to allow criu to dump NFS mounts.

Signed-off-by: Stanislav Kinsburskiy 
---
 scripts/vz-cpt.in |   12 
 1 file changed, 12 insertions(+)

diff --git a/scripts/vz-cpt.in b/scripts/vz-cpt.in
index f79f665..3d7198a 100755
--- a/scripts/vz-cpt.in
+++ b/scripts/vz-cpt.in
@@ -56,8 +56,19 @@ function cg_dump_props {
return 0
 }
 
+function suppress_nfs {
+   value=0
+   path=/proc/$VE_PID/net/nfsfs/suppress_revalidate
+
+   [ "$1" == "true" ] && value=1
+   [ -w  $path ] || return 0
+
+   echo $value > /proc/$VE_PID/net/nfsfs/suppress_revalidate
+}
+
 mkdir -p $dumpdir &&
 cg_dump_props $dumpdir &&
+suppress_nfs true &&
 criu dump -v$CRIU_LOGLEVEL -o dump.log \
--file-locks\
--tcp-established   \
@@ -74,6 +85,7 @@ criu dump -v$CRIU_LOGLEVEL -o dump.log
\
$CRIU_EXTRA_ARGS
 
 if [ $? -ne 0 ]; then
+   suppress_nfs false
 [ -d $VE_DUMP_DIR.fail ] && rm -rf $VE_DUMP_DIR.fail
 mv -f $dumpdir $VE_DUMP_DIR.fail
 echo Failed to checkpoint the Container

___
Devel mailing list
Devel@openvz.org
https://lists.openvz.org/mailman/listinfo/devel


[Devel] [PATCH libvzctl 3/4] vz-rst: SPFS to stub mode script added

2016-05-13 Thread Stanislav Kinsburskiy
This script is used to switch all the spfs mounts to Stub mode during restore.
Required to make sure, that all requests to NFS files will stuck once
processes are released.

Signed-off-by: Stanislav Kinsburskiy 
---
 scripts/vz-rst.in |7 +++
 1 file changed, 7 insertions(+)

diff --git a/scripts/vz-rst.in b/scripts/vz-rst.in
index da0126d..0e9b884 100755
--- a/scripts/vz-rst.in
+++ b/scripts/vz-rst.in
@@ -74,6 +74,12 @@ if [ -f "$autofs_actions_path" ]; then
autofs_actions="--action-script $autofs_actions_path"
 fi
 
+# NFS actions
+nfs_actions_path=/root/fuse-stub-fs/scripts/spfs-change-mode.sh
+if [ -f "$nfs_actions_path" ]; then
+   nfs_actions="--action-script $nfs_actions_path"
+fi
+
 criu restore -v$CRIU_LOGLEVEL  \
--file-locks\
--tcp-established   \
@@ -90,6 +96,7 @@ criu restore -v$CRIU_LOGLEVEL \
--cgroup-root /$VEID\
$ext_mount_map  \
$autofs_actions \
+   $nfs_actions\
$veth_args
 
 if [ $? -eq 0 ]; then

___
Devel mailing list
Devel@openvz.org
https://lists.openvz.org/mailman/listinfo/devel


[Devel] [PATCH libvzctl 0/4] NFS migration precursor patches

2016-05-13 Thread Stanislav Kinsburskiy
This patch set is required to allow criu to migrated NFS mounts and processes,
using it.
___
Devel mailing list
Devel@openvz.org
https://lists.openvz.org/mailman/listinfo/devel


[Devel] [PATCH libvzctl 4/4] vz-cpt: treat /var/run/rpcbind.sock as external

2016-05-13 Thread Stanislav Kinsburskiy
This socket appears when portmapper is running.
Mark it as an external, so criu can skip it during dump.

Signed-off-by: Stanislav Kinsburskiy 
---
 scripts/vz-cpt.in |1 +
 1 file changed, 1 insertion(+)

diff --git a/scripts/vz-cpt.in b/scripts/vz-cpt.in
index 3d7198a..b8f84e7 100755
--- a/scripts/vz-cpt.in
+++ b/scripts/vz-cpt.in
@@ -79,6 +79,7 @@ criu dump -v$CRIU_LOGLEVEL -o dump.log
\
--freeze-cgroup $VE_FREEZE_CG   \
--ghost-limit 50M   \
--action-script $action_script  \
+   --ext-unix-sk /var/run/rpcbind.sock \
-t $VE_PID  \
-D $dumpdir \
$ext_mount_map  \

___
Devel mailing list
Devel@openvz.org
https://lists.openvz.org/mailman/listinfo/devel


Re: [Devel] [PATCH libvzctl 0/4] NFS migration precursor patches

2016-05-18 Thread Stanislav Kinsburskiy

Please ignore.
Another version will be sent.

13.05.2016 19:31, Stanislav Kinsburskiy пишет:

This patch set is required to allow criu to migrated NFS mounts and processes,
using it.


___
Devel mailing list
Devel@openvz.org
https://lists.openvz.org/mailman/listinfo/devel


[Devel] [PATCH libvzctl v2 1/3] vz-rst: SPFS to stub mode script added

2016-05-18 Thread Stanislav Kinsburskiy
This script is used to switch all the spfs mounts to Stub mode during restore.
Required to make sure, that all requests to NFS files will stuck once
processes are released.

Signed-off-by: Stanislav Kinsburskiy 
---
 scripts/vz-rst.in |7 +++
 1 file changed, 7 insertions(+)

diff --git a/scripts/vz-rst.in b/scripts/vz-rst.in
index da0126d..e6851c7 100755
--- a/scripts/vz-rst.in
+++ b/scripts/vz-rst.in
@@ -74,6 +74,12 @@ if [ -f "$autofs_actions_path" ]; then
autofs_actions="--action-script $autofs_actions_path"
 fi
 
+# SPFS actions
+spfs_actions_path=/usr/libexec/criu/scripts/spfs-change-mode.sh
+if [ -f "$spfs_actions_path" ]; then
+   spfs_actions="--action-script $spfs_actions_path"
+fi
+
 criu restore -v$CRIU_LOGLEVEL  \
--file-locks\
--tcp-established   \
@@ -90,6 +96,7 @@ criu restore -v$CRIU_LOGLEVEL \
--cgroup-root /$VEID\
$ext_mount_map  \
$autofs_actions \
+   $spfs_actions   \
$veth_args
 
 if [ $? -eq 0 ]; then

___
Devel mailing list
Devel@openvz.org
https://lists.openvz.org/mailman/listinfo/devel


[Devel] [PATCH libvzctl v2 3/3] env setup: allow fuse device on start

2016-05-18 Thread Stanislav Kinsburskiy
Fuse is be used for network file systems restore

Signed-off-by: Stanislav Kinsburskiy 
---
 lib/env_nsops.c |1 +
 1 file changed, 1 insertion(+)

diff --git a/lib/env_nsops.c b/lib/env_nsops.c
index dc8b062..40eba60 100644
--- a/lib/env_nsops.c
+++ b/lib/env_nsops.c
@@ -452,6 +452,7 @@ static int init_env_cgroup(struct vzctl_env_handle *h)
"c 1:11 mw",/* kmsg */
"c 10:200 rmw", /* tun */
"c 10:235 rwm", /* autofs */
+   "c 10:229 rwm", /* fuse */
};
char *cpu[] = {
"cpuset.cpus",

___
Devel mailing list
Devel@openvz.org
https://lists.openvz.org/mailman/listinfo/devel


[Devel] [PATCH libvzctl v2 2/3] vz-cpt: NFS port allow script added

2016-05-18 Thread Stanislav Kinsburskiy
This script is used to allow back NFS-related ports on dump stage.
Without it CRIU won't be able to access NFS files.

Signed-off-by: Stanislav Kinsburskiy 
---
 scripts/vz-cpt.in |7 +++
 1 file changed, 7 insertions(+)

diff --git a/scripts/vz-cpt.in b/scripts/vz-cpt.in
index f79f665..21a225e 100755
--- a/scripts/vz-cpt.in
+++ b/scripts/vz-cpt.in
@@ -56,6 +56,12 @@ function cg_dump_props {
return 0
 }
 
+# NFS actions
+nfs_actions_path=/usr/libexec/criu/scripts/nfs-ports-allow.sh
+if [ -f "$nfs_actions_path" ]; then
+   nfs_actions="--action-script $nfs_actions_path"
+fi
+
 mkdir -p $dumpdir &&
 cg_dump_props $dumpdir &&
 criu dump -v$CRIU_LOGLEVEL -o dump.log \
@@ -70,6 +76,7 @@ criu dump -v$CRIU_LOGLEVEL -o dump.log
\
--action-script $action_script  \
-t $VE_PID  \
-D $dumpdir \
+   $nfs_actions\
$ext_mount_map  \
$CRIU_EXTRA_ARGS
 

___
Devel mailing list
Devel@openvz.org
https://lists.openvz.org/mailman/listinfo/devel


[Devel] [PATCH libvzctl v2 0/3] NFS migration precursor patches

2016-05-18 Thread Stanislav Kinsburskiy
This patch set is required to allow criu to migrate NFS mounts and processes,
using it.
___
Devel mailing list
Devel@openvz.org
https://lists.openvz.org/mailman/listinfo/devel


[Devel] [PATCH] scripts: add /usr/libexec/criu/scripts/spfs-release-replace.sh to vz-rst

2016-05-25 Thread Stanislav Kinsburskiy
Add one more spfs-related restore script (this time for "post-restore" stage),
which is required to release spfs manager replace processes (they are waiting
for it to proceed with actual remount and replacement).

Signed-off-by: Stanislav Kinsburskiy 
---
 scripts/vz-rst.in |   11 ---
 1 file changed, 8 insertions(+), 3 deletions(-)

diff --git a/scripts/vz-rst.in b/scripts/vz-rst.in
index 654cdd3..bfd804a 100755
--- a/scripts/vz-rst.in
+++ b/scripts/vz-rst.in
@@ -78,9 +78,14 @@ if [ -f "$autofs_actions_path" ]; then
 fi
 
 # SPFS actions
-spfs_actions_path=/usr/libexec/criu/scripts/spfs-change-mode.sh
-if [ -f "$spfs_actions_path" ]; then
-   spfs_actions="--action-script $spfs_actions_path"
+spfs_actions=""
+spfs_change_mode=/usr/libexec/criu/scripts/spfs-change-mode.sh
+if [ -f "$spfs_change_mode" ]; then
+   spfs_actions="--action-script $spfs_change_mode"
+fi
+spfs_release_replace=/usr/libexec/criu/scripts/spfs-release-replace.sh
+if [ -f "$spfs_release_replace" ]; then
+   spfs_actions="$spfs_actions --action-script $spfs_release_replace"
 fi
 
 criu restore -v$CRIU_LOGLEVEL  \

___
Devel mailing list
Devel@openvz.org
https://lists.openvz.org/mailman/listinfo/devel


[Devel] [PATCH] scripts: add /usr/libexec/criu/scripts/spfs-release-replace.sh to vz-rst

2016-05-25 Thread Stanislav Kinsburskiy
Add one more spfs-related restore script (this time for "post-restore" stage),
which is required to release spfs manager replace processes (they are waiting
for it to proceed with actual remount and replacement).

Signed-off-by: Stanislav Kinsburskiy 
---
 scripts/vz-rst.in |   11 ---
 1 file changed, 8 insertions(+), 3 deletions(-)

diff --git a/scripts/vz-rst.in b/scripts/vz-rst.in
index 654cdd3..bfd804a 100755
--- a/scripts/vz-rst.in
+++ b/scripts/vz-rst.in
@@ -78,9 +78,14 @@ if [ -f "$autofs_actions_path" ]; then
 fi
 
 # SPFS actions
-spfs_actions_path=/usr/libexec/criu/scripts/spfs-change-mode.sh
-if [ -f "$spfs_actions_path" ]; then
-   spfs_actions="--action-script $spfs_actions_path"
+spfs_actions=""
+spfs_change_mode=/usr/libexec/criu/scripts/spfs-change-mode.sh
+if [ -f "$spfs_change_mode" ]; then
+   spfs_actions="--action-script $spfs_change_mode"
+fi
+spfs_release_replace=/usr/libexec/criu/scripts/spfs-release-replace.sh
+if [ -f "$spfs_release_replace" ]; then
+   spfs_actions="$spfs_actions --action-script $spfs_release_replace"
 fi
 
 criu restore -v$CRIU_LOGLEVEL  \

___
Devel mailing list
Devel@openvz.org
https://lists.openvz.org/mailman/listinfo/devel


[Devel] [PATCH v2] scripts: add /usr/libexec/criu/scripts/spfs-release-replace.sh to vz-rst

2016-05-26 Thread Stanislav Kinsburskiy
Add one more spfs-related restore script (this time for "post-restore" stage),
which is required to release spfs manager replace processes (they are waiting
for it to proceed with actual remount and replacement).

v2:
Use for loop over scripts

Signed-off-by: Stanislav Kinsburskiy 
---
 scripts/vz-rst.in |9 +
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/scripts/vz-rst.in b/scripts/vz-rst.in
index 654cdd3..bfff71a 100755
--- a/scripts/vz-rst.in
+++ b/scripts/vz-rst.in
@@ -78,10 +78,11 @@ if [ -f "$autofs_actions_path" ]; then
 fi
 
 # SPFS actions
-spfs_actions_path=/usr/libexec/criu/scripts/spfs-change-mode.sh
-if [ -f "$spfs_actions_path" ]; then
-   spfs_actions="--action-script $spfs_actions_path"
-fi
+spfs_scripts="/usr/libexec/criu/scripts/spfs-change-mode.sh\
+   /usr/libexec/criu/scripts/spfs-release-replace.sh"
+for script in $spfs_scripts; do
+   [ -f "$script" ] && spfs_actions=$spfs_actions"--action-script $script "
+done
 
 criu restore -v$CRIU_LOGLEVEL  \
--file-locks\

___
Devel mailing list
Devel@openvz.org
https://lists.openvz.org/mailman/listinfo/devel


[Devel] [PATCH -E] scripts: add dump files save script

2016-05-26 Thread Stanislav Kinsburskiy
This script can be added to vz-rst as action script and will save dump.log and
restore.log in format:

   dump--.tgz

in /vz/private//dump directory.

Signed-off-by: Stanislav Kinsburskiy 
---
 scripts/vz-save-dump.sh |   10 ++
 1 file changed, 10 insertions(+)
 create mode 100755 scripts/vz-save-dump.sh

diff --git a/scripts/vz-save-dump.sh b/scripts/vz-save-dump.sh
new file mode 100755
index 000..830629c
--- /dev/null
+++ b/scripts/vz-save-dump.sh
@@ -0,0 +1,10 @@
+#!/bin/bash
+
+[[ "post-resume" == "$CRTOOLS_SCRIPT_ACTION" ]] || exit 0
+
+set -o pipefail
+
+dir="/vz/private/$VEID/dump"
+suffix=$(date +%F-%T)
+
+tar -C ${dir}/Dump/ -czf ${dir}/dump-$suffix.tgz dump.log restore.log > 
/dev/null

___
Devel mailing list
Devel@openvz.org
https://lists.openvz.org/mailman/listinfo/devel


[Devel] [TRD] NFS migration

2016-05-27 Thread Stanislav Kinsburskiy

1. Feature

NFS mount point migration via CRIU
https://jira.sw.ru/browse/PSBM-26967

2. Description

CRIU now supports NFS file system migration (all versions).
CRIU itself does mount of SPFS file system (fuse based) instead of NFS 
and starts SPFS manager program.
SPFS-manager does NFS remount and replaces SPFS by NFS on a running 
container.


3. Products

Virtuozzo 7

Packages versions (or later):
vzkernel-3.10.0-327.18.2.vz7.14.5
criu-2.1.0.31-1.vz7
spfs-0.03.2-1.vz7
libvzctl-7.0.217

4. Testing

4.1 Basics
** Install criu, libvzctl and spfs packages
** Create a container
** Enable NFS support in container (see man vzctl)
** Install NFS package inside the container (nfs-utils in case of 
rpm-based distribution)

** Mount NFS share within container
** Suspend and resume container.
** Wait till all SPFS-related processes exited ("spfs" and 
"spfs-manager").

** Check, that NFS mount point is restored.

4.2 Process, using NFS
   ** Mount NFS share
   ** Start some program, using NFS files (criu has zdtm test suite for 
such purpose)

   ** Suspend and restore container
   ** Wait till all SPFS-related processes exited ("spfs" and 
"spfs-manager").
   ** Check, that processes, using NFS, are running correctly (in case 
of zdtm test suite, stop the test and check result)


4.3. Multiple suspend/resume
   ** Mount NFS share
   ** Start some program, using NFS files (criu has zdtm test suite for 
such purpose)

   ** Do suspend and restore of a container multiple times
   ** Wait till all SPFS-related processes exited ("spfs" and 
"spfs-manager").
   ** Check, that processes, using NFS, are running correctly (in case 
of zdtm test suite, stop the test and check result)


4.4. Multiple different NFS shares
   ** Mount NFS share A
   ** Mount NFS share B
   ** Start some program, using NFS A files (criu has zdtm test suite 
for such purpose)
   ** Start some program, using NFS B files (criu has zdtm test suite 
for such purpose)

   ** Do suspend and restore of a container
   ** Wait till all SPFS-related processes exited ("spfs" and 
"spfs-manager").
   ** Check, that processes, using NFS, are running correctly (in case 
of zdtm test suite, stop the test and check result)


4.5. Multiple equal NFS shares
   ** Mount NFS share A
   ** Bind-mount NFS A to some other directory (NFS B) inside the container
   ** Start some program, using NFS A files (criu has zdtm test suite 
for such purpose)
   ** Start some program, using NFS B files (criu has zdtm test suite 
for such purpose)

   ** Do suspend and restore of a container
   ** Wait till all SPFS-related processes exited ("spfs" and 
"spfs-manager").
   ** Check, that processes, using NFS, are running correctly (in case 
of zdtm test suite, stop the test and check result)

   ** Check, that NFS A and NFS B have equal super block (device id)

4.6. Processes, using NFS
   ** Mount NFS share
   ** Start all the programs supported by SPFS (see below), using NFS 
files (criu has zdtm test suite for such purpose)

   ** Suspend and restore container
   ** Wait till all SPFS-related processes exited ("spfs" and 
"spfs-manager").
   ** Check, that processes, using NFS, are running correctly (in case 
of zdtm test suite, stop the test and check result)


4.7. MIgrate different NFS versions of NFS (v3, v4, v4.1) without locks 
support

   ** Mount NFS share
   ** Start all the programs supported by SPFS (see below), using NFS 
files (criu has zdtm test suite for such purpose)

   ** Suspend and restore container
   ** Wait till all SPFS-related processes exited ("spfs" and 
"spfs-manager").
   ** Check, that processes, using NFS, are running correctly (in case 
of zdtm test suite, stop the test and check result)

   ** Repeat with different version

4.8. Migrate different NFS versions of NFS (v3, v4, v4.1) with locks support
   ** Mount NFS share
   ** Start all the programs supported by SPFS (see below), using NFS 
files (criu has zdtm test suite for such purpose)

   ** Suspend and restore container
   ** Wait till all SPFS-related processes exited ("spfs" and 
"spfs-manager").
   ** Check, that processes, using NFS, are running correctly (in case 
of zdtm test suite, stop the test and check result)

   ** Repeat with different version

5. Known issues

Most of NFS restore is done by SPFS (and not by CRIU).
Currently, SPFS doesn't support:

1) Fifo migration (will be restored without content)
2) Unix socket migration (will be restored without content)
3) Unlinked files (will be restored, but files won't be unlinked)

6. What was checked by developer

From 4.1 to 4.5.

7. Feature owners

skinsbur...@virtuozzo.com

___
Devel mailing list
Devel@openvz.org
https://lists.openvz.org/mailman/listinfo/devel


Re: [Devel] [TRD] NFS migration

2016-05-31 Thread Stanislav Kinsburskiy

FIFOs are supported starting from version spfs-0.06.0-1.vz7

27.05.2016 12:09, Stanislav Kinsburskiy пишет:

1. Feature

NFS mount point migration via CRIU
https://jira.sw.ru/browse/PSBM-26967

2. Description

CRIU now supports NFS file system migration (all versions).
CRIU itself does mount of SPFS file system (fuse based) instead of NFS 
and starts SPFS manager program.
SPFS-manager does NFS remount and replaces SPFS by NFS on a running 
container.


3. Products

Virtuozzo 7

Packages versions (or later):
vzkernel-3.10.0-327.18.2.vz7.14.5
criu-2.1.0.31-1.vz7
spfs-0.03.2-1.vz7
libvzctl-7.0.217

4. Testing

4.1 Basics
** Install criu, libvzctl and spfs packages
** Create a container
** Enable NFS support in container (see man vzctl)
** Install NFS package inside the container (nfs-utils in case of 
rpm-based distribution)

** Mount NFS share within container
** Suspend and resume container.
** Wait till all SPFS-related processes exited ("spfs" and 
"spfs-manager").

** Check, that NFS mount point is restored.

4.2 Process, using NFS
   ** Mount NFS share
   ** Start some program, using NFS files (criu has zdtm test suite 
for such purpose)

   ** Suspend and restore container
   ** Wait till all SPFS-related processes exited ("spfs" and 
"spfs-manager").
   ** Check, that processes, using NFS, are running correctly (in case 
of zdtm test suite, stop the test and check result)


4.3. Multiple suspend/resume
   ** Mount NFS share
   ** Start some program, using NFS files (criu has zdtm test suite 
for such purpose)

   ** Do suspend and restore of a container multiple times
   ** Wait till all SPFS-related processes exited ("spfs" and 
"spfs-manager").
   ** Check, that processes, using NFS, are running correctly (in case 
of zdtm test suite, stop the test and check result)


4.4. Multiple different NFS shares
   ** Mount NFS share A
   ** Mount NFS share B
   ** Start some program, using NFS A files (criu has zdtm test suite 
for such purpose)
   ** Start some program, using NFS B files (criu has zdtm test suite 
for such purpose)

   ** Do suspend and restore of a container
   ** Wait till all SPFS-related processes exited ("spfs" and 
"spfs-manager").
   ** Check, that processes, using NFS, are running correctly (in case 
of zdtm test suite, stop the test and check result)


4.5. Multiple equal NFS shares
   ** Mount NFS share A
   ** Bind-mount NFS A to some other directory (NFS B) inside the 
container
   ** Start some program, using NFS A files (criu has zdtm test suite 
for such purpose)
   ** Start some program, using NFS B files (criu has zdtm test suite 
for such purpose)

   ** Do suspend and restore of a container
   ** Wait till all SPFS-related processes exited ("spfs" and 
"spfs-manager").
   ** Check, that processes, using NFS, are running correctly (in case 
of zdtm test suite, stop the test and check result)

   ** Check, that NFS A and NFS B have equal super block (device id)

4.6. Processes, using NFS
   ** Mount NFS share
   ** Start all the programs supported by SPFS (see below), using NFS 
files (criu has zdtm test suite for such purpose)

   ** Suspend and restore container
   ** Wait till all SPFS-related processes exited ("spfs" and 
"spfs-manager").
   ** Check, that processes, using NFS, are running correctly (in case 
of zdtm test suite, stop the test and check result)


4.7. MIgrate different NFS versions of NFS (v3, v4, v4.1) without 
locks support

   ** Mount NFS share
   ** Start all the programs supported by SPFS (see below), using NFS 
files (criu has zdtm test suite for such purpose)

   ** Suspend and restore container
   ** Wait till all SPFS-related processes exited ("spfs" and 
"spfs-manager").
   ** Check, that processes, using NFS, are running correctly (in case 
of zdtm test suite, stop the test and check result)

   ** Repeat with different version

4.8. Migrate different NFS versions of NFS (v3, v4, v4.1) with locks 
support

   ** Mount NFS share
   ** Start all the programs supported by SPFS (see below), using NFS 
files (criu has zdtm test suite for such purpose)

   ** Suspend and restore container
   ** Wait till all SPFS-related processes exited ("spfs" and 
"spfs-manager").
   ** Check, that processes, using NFS, are running correctly (in case 
of zdtm test suite, stop the test and check result)

   ** Repeat with different version

5. Known issues

Most of NFS restore is done by SPFS (and not by CRIU).
Currently, SPFS doesn't support:

1) Fifo migration (will be restored without content)
2) Unix socket migration (will be restored without content)
3) Unlinked files (will be restored, but files won't be unlinked)

6. What was checked by developer

From 4.1 to 4.5.

7. Feature owners

skinsbur...@virtuozzo.com

__

[Devel] [PATCH rh7] ve: drop xattr policy toggle

2016-06-01 Thread Stanislav Kinsburskiy
This toggle was introduced to solve this bug (rhel5-based kernels):

https://bugs.openvz.org/browse/OVZ-6409

The root of the bug briefly: customer could add immutable attribute to a file in
a container, thus preventing container removal, becuase this attribute was
bypassed (via simfs) to the actual file system.
The toggle introduced 3-fold policy:
1) "Allow": set of extended attrributes is granted (default in RHEL7 !!!)
2) "Ignore": set of extended attributes is skipped (do nothing and return 0).
3) "Forbid": set of extended attributes is not allowed.

Maybe this approach was applicable to rhel5 kernel, but it's absolutely
useless and harmful in rhel7 because:
1) Current ve xattr policy is "Allow", thus it doesn't prevent to set
immutable attribute.
2) Immutable attribute is set via ioctl, and doesn't pass vfs_setxattr callback.
3) Set of immutable attribute is protected by CAP_LINUX_IMMUTABLE, which is
dropped in containers.

Signed-off-by: Stanislav Kinsburskiy 
---
 fs/xattr.c |9 -
 include/uapi/linux/xattr.h |7 ---
 kernel/ve/veowner.c|8 
 3 files changed, 24 deletions(-)

diff --git a/fs/xattr.c b/fs/xattr.c
index 2319cf8..a5be48c 100644
--- a/fs/xattr.c
+++ b/fs/xattr.c
@@ -126,15 +126,6 @@ vfs_setxattr(struct dentry *dentry, const char *name, 
const void *value,
struct inode *inode = dentry->d_inode;
int error;
 
-#if defined(CONFIG_VE) && defined(CONFIG_SYSCTL)
-   if (!get_exec_env()->is_pseudosuper) {
-   if (ve_xattr_policy == VE_XATTR_POLICY_IGNORE)
-   return 0;
-   else if (ve_xattr_policy == VE_XATTR_POLICY_REJECT)
-   return -EPERM;
-   }
-#endif
-
error = xattr_permission(inode, name, MAY_WRITE);
if (error)
return error;
diff --git a/include/uapi/linux/xattr.h b/include/uapi/linux/xattr.h
index bb0c657..40bbc04 100644
--- a/include/uapi/linux/xattr.h
+++ b/include/uapi/linux/xattr.h
@@ -10,13 +10,6 @@
 #ifndef _UAPI_LINUX_XATTR_H
 #define _UAPI_LINUX_XATTR_H
 
-#ifdef CONFIG_VE
-extern int ve_xattr_policy;
-#define VE_XATTR_POLICY_ACCEPT 0
-#define VE_XATTR_POLICY_IGNORE 1
-#define VE_XATTR_POLICY_REJECT 2
-#endif
-
 #define XATTR_CREATE   0x1 /* set value, fail if attr already exists */
 #define XATTR_REPLACE  0x2 /* set value, fail if attr does not exist */
 
diff --git a/kernel/ve/veowner.c b/kernel/ve/veowner.c
index 1a7e735..005cdaf 100644
--- a/kernel/ve/veowner.c
+++ b/kernel/ve/veowner.c
@@ -52,7 +52,6 @@ static void prepare_proc(void)
  * OpenVZ sysctl
  * 
  */
-int ve_xattr_policy = VE_XATTR_POLICY_ACCEPT;
 static int ve_area_access_check;
 
 /*
@@ -72,13 +71,6 @@ static struct ctl_table vz_fs_table[] = {
.proc_handler   = proc_dointvec,
},
{
-   .procname   = "ve-xattr-policy",
-   .data   = &ve_xattr_policy,
-   .maxlen = sizeof(int),
-   .mode   = 0644,
-   .proc_handler   = &proc_dointvec,
-   },
-   {
.procname   = "fsync-enable",
.data   = &ve0.fsync_enable,
.maxlen = sizeof(int),

___
Devel mailing list
Devel@openvz.org
https://lists.openvz.org/mailman/listinfo/devel


<    1   2   3   4   5