[Devel] [PATCH] vz7: arch, x86 -- Add support for vz7 specific cpuid masking

2016-06-09 Thread Cyrill Gorcunov
In pcs7 we support that named cpuid masking via
procfs "/proc/vz/cpuid_override" interface. Thus
any attempt to do cpuid() instruciton inside container
cause kernel fault which provides results requested
from the entry above.

This works well inside container but we do checkpoint
via criu on ve0 where masking is not happening. Thus
on restore we may hit a situation where cpu features
saved in CRIU image is not supported inside container.

There was a conversation how to make it more convenient:

 - modify kernel to cause cpuid fault even on node
 - modify criu code so it could take cpu feature options
   from the command line
 - run criu inside container when checkpointing solely for
   cpu feature testing, then jump out and proceed a natural
   suspend

All the above looks too complicated from my POV: what we
need is simply teach criu to read former "/proc/vz/cpuid_override"
procfs entry and use this information when we do fetch cpu
features.

https://jira.sw.ru/browse/PSBM-47748

Signed-off-by: Cyrill Gorcunov 
CC: Dmitry Mishin 
CC: Andrey Vagin 
CC: Pavel Emelianov 
CC: Vladimir Davydov 
CC: Konstantin Khorenko 
CC: Igor Sukhih 
---
 criu/arch/x86/cpu.c | 172 
 1 file changed, 161 insertions(+), 11 deletions(-)

diff --git a/criu/arch/x86/cpu.c b/criu/arch/x86/cpu.c
index 0b90c0c..44f3e18 100644
--- a/criu/arch/x86/cpu.c
+++ b/criu/arch/x86/cpu.c
@@ -52,6 +52,153 @@ bool cpu_has_feature(unsigned int feature)
return test_cpu_cap(_cpu_info, feature);
 }
 
+/*
+ * VZ specific cpuid VE masking: the kernel provides
+ * the following entry /proc/vz/cpuid_override which
+ * carries text representation of cpuid masking which
+ * which works via cpuid faulting inside kernel in the
+ * next format:
+ *
+ * op count   eaxebxecxedx
+ * 0x%08x 0x%08x: 0x%08x 0x%08x 0x%08x 0x%08x
+ *
+ * the @count is optional.
+ */
+
+typedef struct {
+   unsigned intop;
+   unsigned intcount;
+   boolhas_count;
+   unsigned inteax;
+   unsigned intebx;
+   unsigned intecx;
+   unsigned intedx;
+} vz_cpuid_override_entry_t;
+
+static vz_cpuid_override_entry_t *vz_cpuid_override_entries;
+static unsigned int nr_vz_cpuid_override_entries;
+
+static int vz_cpu_parse_cpuid_override(void)
+{
+   static const char path[] = "/proc/vz/cpuid_override";
+   int ret = -1;
+   char s[256];
+   FILE *f;
+
+   pr_debug("Parsing %s\n", path);
+
+   f = fopen(path, "r");
+   if (!f) {
+   pr_info("Can't access %s, ignoring\n", path);
+   return 0;
+   }
+
+   while (fgets(s, sizeof(s), f)) {
+   vz_cpuid_override_entry_t e;
+
+   if (sscanf(s, "%x %x: %x %x %x %x",
+  , , ,
+  , , ) == 6)
+   e.has_count = true;
+   else if (sscanf(s, "%x: %x %x %x %x",
+   , , ,
+   , ) == 5)
+   e.has_count = false;
+   else {
+   pr_warn("Unexpected format in %s (%s)\n", path, s);
+   break;
+   }
+
+   if (xrealloc_safe(_cpuid_override_entries,
+ (nr_vz_cpuid_override_entries + 1) * 
sizeof(e))) {
+   goto out;
+   }
+
+   pr_debug("Got cpuid override: %x %x: %x %x %x %x\n",
+  e.op, e.count, e.eax, e.ebx, e.ecx, e.edx);
+
+   vz_cpuid_override_entries[nr_vz_cpuid_override_entries++] = e;
+   }
+
+   ret = 0;
+out:
+   fclose(f);
+   return ret;
+}
+
+static vz_cpuid_override_entry_t *vz_cpuid_override_lookup(unsigned int op,
+  bool has_count,
+  unsigned int count)
+{
+   size_t i;
+
+   for (i = 0; i < nr_vz_cpuid_override_entries; i++) {
+   if (vz_cpuid_override_entries[i].op != op ||
+   vz_cpuid_override_entries[i].has_count != has_count ||
+   count != vz_cpuid_override_entries[i].count)
+   continue;
+   return _cpuid_override_entries[i];
+   }
+
+   return NULL;
+}
+
+static inline void vz_cpuid(unsigned int op,
+   unsigned int *eax, unsigned int *ebx,
+   unsigned int *ecx, unsigned int *edx)
+{
+   vz_cpuid_override_entry_t *e;
+
+   e = vz_cpuid_override_lookup(op, false, 0);
+   if (e) {
+   *eax = e->eax;
+   *ebx = e->ebx;
+   *ecx = e->ecx;
+   *edx = e->edx;
+   } else
+   

Re: [Devel] [PATCH rh7] vtty: Allow to wait until container's console appear

2016-06-09 Thread Cyrill Gorcunov
On Thu, Jun 09, 2016 at 01:34:31PM +0300, Vladimir Davydov wrote:
> On Mon, Jun 06, 2016 at 07:26:57PM +0300, Cyrill Gorcunov wrote:
> > After tty code redesing we've been requiring container to start
> > first before be able to connect into it via vzctl console command.
> > Here we rather allow userspace tool to wait until container brought
> > to life and proceed connecting into console.
> > 
> > https://jira.sw.ru/browse/PSBM-39463
> > 
> > Signed-off-by: Cyrill Gorcunov 
> > CC: Vladimir Davydov 
> > CC: Konstantin Khorenko 
> > CC: Igor Sukhih 
> > CC: Pavel Emelyanov 
> > ---
> >  include/linux/ve.h  |2 ++
> >  kernel/ve/ve.c  |   48 
> >  kernel/ve/vecalls.c |   23 +--
> >  3 files changed, 71 insertions(+), 2 deletions(-)
> > 
> > Index: linux-pcs7.git/include/linux/ve.h
> > ===
> > --- linux-pcs7.git.orig/include/linux/ve.h
> > +++ linux-pcs7.git/include/linux/ve.h
> > @@ -215,6 +215,8 @@ void ve_stop_ns(struct pid_namespace *ns
> >  void ve_exit_ns(struct pid_namespace *ns);
> >  int ve_start_container(struct ve_struct *ve);
> >  
> > +int ve_console_wait(envid_t veid);
> > +
> >  extern bool current_user_ns_initial(void);
> >  struct user_namespace *ve_init_user_ns(void);
> >  
> > Index: linux-pcs7.git/kernel/ve/ve.c
> > ===
> > --- linux-pcs7.git.orig/kernel/ve/ve.c
> > +++ linux-pcs7.git/kernel/ve/ve.c
> > @@ -260,6 +260,49 @@ struct user_namespace *ve_init_user_ns(v
> >  }
> >  EXPORT_SYMBOL(ve_init_user_ns);
> >  
> > +static DEFINE_IDR(ve_idr_console);
> > +static DECLARE_RWSEM(ve_console_sem);
> > +
> > +int ve_console_wait(envid_t veid)
> > +{
> > +   DECLARE_COMPLETION_ONSTACK(console_work);
> > +   int ret;
> > +
> > +   down_write(_console_sem);
> > +   if (idr_find(_idr_console, veid)) {
> > +   up_write(_console_sem);
> > +   return -EEXIST;
> > +   }
> > +
> > +   ret = idr_alloc(_idr_console, _work, veid, veid + 1, 
> > GFP_KERNEL);
> > +   if (ret < 0) {
> > +   if (ret == -ENOSPC)
> > +   ret = -EEXIST;
> > +   } else
> > +   ret = 0;
> > +   downgrade_write(_console_sem);
> > +
> > +   if (!ret) {
> > +   ret = wait_for_completion_interruptible(_work);
> > +   idr_remove(_idr_console, veid);
> > +   }
> > +
> > +   up_read(_console_sem);
> > +   return ret;
> > +}
> > +EXPORT_SYMBOL(ve_console_wait);
> > +
> > +static void ve_console_notify(struct ve_struct *ve)
> > +{
> > +   struct completion *console_work;
> > +
> > +   down_read(_console_sem);
> > +   console_work = idr_find(_idr_console, ve->veid);
> > +   if (console_work)
> > +   complete(console_work);
> > +   up_read(_console_sem);
> > +}
> > +
> >  int nr_threads_ve(struct ve_struct *ve)
> >  {
> > return cgroup_task_count(ve->css.cgroup);
> > @@ -494,6 +537,11 @@ int ve_start_container(struct ve_struct
> >  
> > get_ve(ve); /* for ve_exit_ns() */
> >  
> > +   /*
> > +* Console waiter are to be notified at the very
> > +* end when everything else is ready.
> > +*/
> > +   ve_console_notify(ve);
> > return 0;
> >  
> >  err_iterate:
> > Index: linux-pcs7.git/kernel/ve/vecalls.c
> > ===
> > --- linux-pcs7.git.orig/kernel/ve/vecalls.c
> > +++ linux-pcs7.git/kernel/ve/vecalls.c
> > @@ -991,8 +991,27 @@ static int ve_configure(envid_t veid, un
> > int err = -ENOKEY;
> >  
> > ve = get_ve_by_id(veid);
> > -   if (!ve)
> > -   return -EINVAL;
> > +   if (!ve) {
> > +
> > +   if (key != VE_CONFIGURE_OPEN_TTY)
> > +   return -EINVAL;
> > +   /*
> > +* Offline console management:
> > +* wait until ve is up and proceed.
> > +*/
> 
> What if a VE is created right here, before we call ve_console_wait()?
> Looks like the caller will hang forever...

Yeah, could be a race. Thanks!

> > +   err = ve_console_wait(veid);
> > +   if (err)
> > +   return err;
> > +
> > +   /*
> > +* A container should not exit immediately once
> > +* started but if it does, for any reason, simply
> > +* exit out gracefully.
> > +*/
> > +   ve = get_ve_by_id(veid);
> > +   if (!ve)
> > +   return -ENOENT;
> > +   }
> 
> Can't we fold this into vtty_open_master()? The latter doesn't need ve
> object, it only needs veid, which is known here.

Will do.
___
Devel mailing list
Devel@openvz.org
https://lists.openvz.org/mailman/listinfo/devel


Re: [Devel] [PATCH rh7] vtty: Allow to wait until container's console appear

2016-06-09 Thread Vladimir Davydov
On Mon, Jun 06, 2016 at 07:26:57PM +0300, Cyrill Gorcunov wrote:
> After tty code redesing we've been requiring container to start
> first before be able to connect into it via vzctl console command.
> Here we rather allow userspace tool to wait until container brought
> to life and proceed connecting into console.
> 
> https://jira.sw.ru/browse/PSBM-39463
> 
> Signed-off-by: Cyrill Gorcunov 
> CC: Vladimir Davydov 
> CC: Konstantin Khorenko 
> CC: Igor Sukhih 
> CC: Pavel Emelyanov 
> ---
>  include/linux/ve.h  |2 ++
>  kernel/ve/ve.c  |   48 
>  kernel/ve/vecalls.c |   23 +--
>  3 files changed, 71 insertions(+), 2 deletions(-)
> 
> Index: linux-pcs7.git/include/linux/ve.h
> ===
> --- linux-pcs7.git.orig/include/linux/ve.h
> +++ linux-pcs7.git/include/linux/ve.h
> @@ -215,6 +215,8 @@ void ve_stop_ns(struct pid_namespace *ns
>  void ve_exit_ns(struct pid_namespace *ns);
>  int ve_start_container(struct ve_struct *ve);
>  
> +int ve_console_wait(envid_t veid);
> +
>  extern bool current_user_ns_initial(void);
>  struct user_namespace *ve_init_user_ns(void);
>  
> Index: linux-pcs7.git/kernel/ve/ve.c
> ===
> --- linux-pcs7.git.orig/kernel/ve/ve.c
> +++ linux-pcs7.git/kernel/ve/ve.c
> @@ -260,6 +260,49 @@ struct user_namespace *ve_init_user_ns(v
>  }
>  EXPORT_SYMBOL(ve_init_user_ns);
>  
> +static DEFINE_IDR(ve_idr_console);
> +static DECLARE_RWSEM(ve_console_sem);
> +
> +int ve_console_wait(envid_t veid)
> +{
> + DECLARE_COMPLETION_ONSTACK(console_work);
> + int ret;
> +
> + down_write(_console_sem);
> + if (idr_find(_idr_console, veid)) {
> + up_write(_console_sem);
> + return -EEXIST;
> + }
> +
> + ret = idr_alloc(_idr_console, _work, veid, veid + 1, 
> GFP_KERNEL);
> + if (ret < 0) {
> + if (ret == -ENOSPC)
> + ret = -EEXIST;
> + } else
> + ret = 0;
> + downgrade_write(_console_sem);
> +
> + if (!ret) {
> + ret = wait_for_completion_interruptible(_work);
> + idr_remove(_idr_console, veid);
> + }
> +
> + up_read(_console_sem);
> + return ret;
> +}
> +EXPORT_SYMBOL(ve_console_wait);
> +
> +static void ve_console_notify(struct ve_struct *ve)
> +{
> + struct completion *console_work;
> +
> + down_read(_console_sem);
> + console_work = idr_find(_idr_console, ve->veid);
> + if (console_work)
> + complete(console_work);
> + up_read(_console_sem);
> +}
> +
>  int nr_threads_ve(struct ve_struct *ve)
>  {
>   return cgroup_task_count(ve->css.cgroup);
> @@ -494,6 +537,11 @@ int ve_start_container(struct ve_struct
>  
>   get_ve(ve); /* for ve_exit_ns() */
>  
> + /*
> +  * Console waiter are to be notified at the very
> +  * end when everything else is ready.
> +  */
> + ve_console_notify(ve);
>   return 0;
>  
>  err_iterate:
> Index: linux-pcs7.git/kernel/ve/vecalls.c
> ===
> --- linux-pcs7.git.orig/kernel/ve/vecalls.c
> +++ linux-pcs7.git/kernel/ve/vecalls.c
> @@ -991,8 +991,27 @@ static int ve_configure(envid_t veid, un
>   int err = -ENOKEY;
>  
>   ve = get_ve_by_id(veid);
> - if (!ve)
> - return -EINVAL;
> + if (!ve) {
> +
> + if (key != VE_CONFIGURE_OPEN_TTY)
> + return -EINVAL;
> + /*
> +  * Offline console management:
> +  * wait until ve is up and proceed.
> +  */

What if a VE is created right here, before we call ve_console_wait()?
Looks like the caller will hang forever...

> + err = ve_console_wait(veid);
> + if (err)
> + return err;
> +
> + /*
> +  * A container should not exit immediately once
> +  * started but if it does, for any reason, simply
> +  * exit out gracefully.
> +  */
> + ve = get_ve_by_id(veid);
> + if (!ve)
> + return -ENOENT;
> + }

Can't we fold this into vtty_open_master()? The latter doesn't need ve
object, it only needs veid, which is known here.

>  
>   switch(key) {
>   case VE_CONFIGURE_OS_RELEASE:
___
Devel mailing list
Devel@openvz.org
https://lists.openvz.org/mailman/listinfo/devel


[Devel] [PATCH RHEL7 COMMIT] cbt: blk_cbt_update_size() should not copy uninitialized data

2016-06-09 Thread Konstantin Khorenko
The commit is pushed to "branch-rh7-3.10.0-327.18.2.vz7.14.x-ovz" and will 
appear at https://src.openvz.org/scm/ovz/vzkernel.git
after rh7-3.10.0-327.18.2.vz7.14.13
-->
commit dd56c75a4d6149fb1247064ca581777220e9983f
Author: Maxim Patlasov 
Date:   Thu Jun 9 12:42:55 2016 +0400

cbt: blk_cbt_update_size() should not copy uninitialized data

to_cpy is the number of page pointers to copy from current cbt to new.
The following check:

>   if ((new_sz + bsz) >> cbt->block_bits <= cbt->block_max)
>   goto err_mtx;

ensures that the copy will be done only for new cbt bigger than current. So,
we have to calculate to_cpy based on the current (smaller) cbt. The rest of
new cbt is OK because it was nullified by do_cbt_alloc().

The bug existed since the very first version of CBT (commit ad7ba3dfe).

https://jira.sw.ru/browse/PSBM-48120

Signed-off-by: Maxim Patlasov 
Acked-by: Dmitry Monakhov 
---
 block/blk-cbt.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/block/blk-cbt.c b/block/blk-cbt.c
index 001dbfd..3a2b197 100644
--- a/block/blk-cbt.c
+++ b/block/blk-cbt.c
@@ -448,7 +448,7 @@ void blk_cbt_update_size(struct block_device *bdev)
set_bit(CBT_ERROR, >flags);
goto err_mtx;
}
-   to_cpy = NR_PAGES(new->block_max);
+   to_cpy = NR_PAGES(cbt->block_max);
set_bit(CBT_NOCACHE, >flags);
cbt_flush_cache(cbt);
spin_lock_irq(>lock);
___
Devel mailing list
Devel@openvz.org
https://lists.openvz.org/mailman/listinfo/devel


[Devel] [PATCH RHEL7 COMMIT] ms/kmod: remove false positive warning "waiting module removal not supported: please upgrade"

2016-06-09 Thread Konstantin Khorenko
The commit is pushed to "branch-rh7-3.10.0-327.18.2.vz7.14.x-ovz" and will 
appear at https://src.openvz.org/scm/ovz/vzkernel.git
after rh7-3.10.0-327.18.2.vz7.14.13
-->
commit 78612e86dbbf2114115bb6475c6734bd9f1f60b6
Author: Rusty Russell 
Date:   Thu Jun 9 13:17:18 2016 +0400

ms/kmod: remove false positive warning "waiting module removal not 
supported: please upgrade"

commit 79465d2fd48e68940c2bdecddbdecd45bbba06fe
Date:   Mon Apr 28 11:05:43 2014 +0930

module: remove warning about waiting module removal.

We remove the waiting module removal in commit 3f2b9c9cdf38 (September
2013), but it turns out that modprobe in kmod (< version 16) was
asking for waiting module removal.  No one noticed since modprobe would
check for 0 usage immediately before trying to remove the module, and
the race is unlikely.

However, it means that anyone running old (but not ancient) kmod
versions is hitting the printk designed to see if anyone was running
"rmmod -w".  All reports so far have been false positives, so remove
the warning.

Fixes: 3f2b9c9cdf389e303b2273679af08aab5f153517
Reported-by: Valerio Vanni 
Cc: Elliott, Robert (Server Storage) 
Cc: sta...@kernel.org
Acked-by: Lucas De Marchi 
Signed-off-by: Rusty Russell 

Backported to vz7 kernel,
fixes mainline patch 3f2b9c9cd ("module: remove rmmod --wait option.")
backported by Kirill Tkhai for PSBM-27580

https://bugs.openvz.org/browse/OVZ-6748
https://jira.sw.ru/browse/PSBM-47877

Signed-off-by:  Vasily Averin 
---
 kernel/module.c | 5 -
 1 file changed, 5 deletions(-)

diff --git a/kernel/module.c b/kernel/module.c
index 10e1384..762716f 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -892,11 +892,6 @@ SYSCALL_DEFINE2(delete_module, const char __user *, 
name_user,
return -EFAULT;
name[MODULE_NAME_LEN-1] = '\0';
 
-   if (!(flags & O_NONBLOCK)) {
-   printk(KERN_WARNING
-  "waiting module removal not supported: please upgrade");
-   }
-
if (mutex_lock_interruptible(_mutex) != 0)
return -EINTR;
 
___
Devel mailing list
Devel@openvz.org
https://lists.openvz.org/mailman/listinfo/devel


[Devel] [PATCH RHEL7 COMMIT] ploop: push_backup: PLOOP_PEEK mode of ioctl(PLOOP_IOC_PUSH_BACKUP_IO)

2016-06-09 Thread Konstantin Khorenko
The commit is pushed to "branch-rh7-3.10.0-327.18.2.vz7.14.x-ovz" and will 
appear at https://src.openvz.org/scm/ovz/vzkernel.git
after rh7-3.10.0-327.18.2.vz7.14.13
-->
commit 042ec3eb9733a65e6a6eff45e23a78aca48149a9
Author: Maxim Patlasov 
Date:   Thu Jun 9 13:09:45 2016 +0400

ploop: push_backup: PLOOP_PEEK mode of ioctl(PLOOP_IOC_PUSH_BACKUP_IO)

Now, before stopping push_backup, userspace backup tool can ask ploop
about blocks needed to backup, but not reported as backed up:

ctl->direction = PLOOP_PEEK;
ctl->n_extents = n; /* where n >= 1*/
ret = ioctl(pfd, PLOOP_IOC_PUSH_BACKUP_IO, ctl);

If push_backup was really done completely (i.e. all blocks in main bitmask
were reported as backed up), ret will be zero, and ctl->n_extents too.

Overwise, if some blocks were missed to backup, the ioctl will fill
ctl->extents[] with info about such "missed" blocks.

https://jira.sw.ru/browse/PSBM-47764

Signed-off-by: Maxim Patlasov 
Acked-by: Dmitry Monakhov 
---
 drivers/block/ploop/dev.c | 47 ++
 drivers/block/ploop/push_backup.c | 84 ++-
 drivers/block/ploop/push_backup.h |  2 +
 include/linux/ploop/ploop_if.h|  1 +
 4 files changed, 125 insertions(+), 9 deletions(-)

diff --git a/drivers/block/ploop/dev.c b/drivers/block/ploop/dev.c
index 27827a8..db55be3 100644
--- a/drivers/block/ploop/dev.c
+++ b/drivers/block/ploop/dev.c
@@ -4643,24 +4643,27 @@ pb_init_done:
return rc;
 }
 
-static int ploop_push_backup_io_read(struct ploop_device *plo, unsigned long 
arg,
-struct ploop_push_backup_io_ctl *ctl)
+static int ploop_push_backup_io_get(struct ploop_device *plo,
+   unsigned long arg, struct ploop_push_backup_io_ctl *ctl,
+   int (*get)(struct ploop_pushbackup_desc *, cluster_t *,
+  cluster_t *, unsigned))
 {
struct ploop_push_backup_ctl_extent *e;
unsigned n_extents = 0;
int rc = 0;
+   cluster_t clu = 0;
+   cluster_t len = 0;
 
e = kmalloc(sizeof(*e) * ctl->n_extents, GFP_KERNEL);
if (!e)
return -ENOMEM;
 
while (n_extents < ctl->n_extents) {
-   cluster_t clu, len;
-   rc = ploop_pb_get_pending(plo->pbd, , , n_extents);
+   rc = get(plo->pbd, , , n_extents);
if (rc == -ENOENT && n_extents)
break;
else if (rc)
-   goto io_read_done;
+   goto io_get_done;
 
e[n_extents].clu = clu;
e[n_extents].len = len;
@@ -4670,18 +4673,44 @@ static int ploop_push_backup_io_read(struct 
ploop_device *plo, unsigned long arg
rc = -EFAULT;
ctl->n_extents = n_extents;
if (copy_to_user((void*)arg, ctl, sizeof(*ctl)))
-   goto io_read_done;
+   goto io_get_done;
if (n_extents &&
copy_to_user((void*)(arg + sizeof(*ctl)), e,
 n_extents * sizeof(*e)))
-   goto io_read_done;
+   goto io_get_done;
rc = 0;
 
-io_read_done:
+io_get_done:
kfree(e);
return rc;
 }
 
+static int ploop_push_backup_io_read(struct ploop_device *plo,
+   unsigned long arg, struct ploop_push_backup_io_ctl *ctl)
+{
+   return ploop_push_backup_io_get(plo, arg, ctl, ploop_pb_get_pending);
+}
+
+static int ploop_push_backup_io_peek(struct ploop_device *plo,
+   unsigned long arg, struct ploop_push_backup_io_ctl *ctl)
+{
+   int rc;
+
+   ploop_quiesce(plo);
+   rc = ploop_push_backup_io_get(plo, arg, ctl, ploop_pb_peek);
+   ploop_relax(plo);
+
+   if (rc == -ENOENT) {
+   ctl->n_extents = 0;
+   if (copy_to_user((void*)arg, ctl, sizeof(*ctl)))
+   rc = -EFAULT;
+   else
+   rc = 0;
+   }
+
+   return rc;
+}
+
 static int ploop_push_backup_io_write(struct ploop_device *plo, unsigned long 
arg,
  struct ploop_push_backup_io_ctl *ctl)
 {
@@ -4737,6 +4766,8 @@ static int ploop_push_backup_io(struct ploop_device *plo, 
unsigned long arg)
return ploop_push_backup_io_read(plo, arg, );
case PLOOP_WRITE:
return ploop_push_backup_io_write(plo, arg, );
+   case PLOOP_PEEK:
+   return ploop_push_backup_io_peek(plo, arg, );
}
 
return -EINVAL;
diff --git a/drivers/block/ploop/push_backup.c 
b/drivers/block/ploop/push_backup.c
index 376052d..e8fa88d 100644
--- a/drivers/block/ploop/push_backup.c
+++ b/drivers/block/ploop/push_backup.c
@@ -303,9 +303,17 @@ int ploop_pb_init(struct ploop_pushbackup_desc *pbd, __u8 
*uuid, bool full)

Re: [Devel] [vzlin-dev] [PATCH rh7] ploop: push_backup: PLOOP_PEEK mode of ioctl(PLOOP_IOC_PUSH_BACKUP_IO)

2016-06-09 Thread Dmitry Monakhov

> Now, before stopping push_backup, userspace backup tool can ask ploop
> about blocks needed to backup, but not reported as backed up:
>
>   ctl->direction = PLOOP_PEEK;
>   ctl->n_extents = n; /* where n >= 1*/
>   ret = ioctl(pfd, PLOOP_IOC_PUSH_BACKUP_IO, ctl);
>
> If push_backup was really done completely (i.e. all blocks in main bitmask
> were reported as backed up), ret will be zero, and ctl->n_extents too.
>
> Overwise, if some blocks were missed to backup, the ioctl will fill
> ctl->extents[] with info about such "missed" blocks.
>
> https://jira.sw.ru/browse/PSBM-47764
ACK
>
> Signed-off-by: Maxim Patlasov 
> ---
>  drivers/block/ploop/dev.c |   47 +
>  drivers/block/ploop/push_backup.c |   84 
> +
>  drivers/block/ploop/push_backup.h |2 +
>  include/linux/ploop/ploop_if.h|1 
>  4 files changed, 125 insertions(+), 9 deletions(-)
>
> diff --git a/drivers/block/ploop/dev.c b/drivers/block/ploop/dev.c
> index 27827a8..db55be3 100644
> --- a/drivers/block/ploop/dev.c
> +++ b/drivers/block/ploop/dev.c
> @@ -4643,24 +4643,27 @@ pb_init_done:
>   return rc;
>  }
>  
> -static int ploop_push_backup_io_read(struct ploop_device *plo, unsigned long 
> arg,
> -  struct ploop_push_backup_io_ctl *ctl)
> +static int ploop_push_backup_io_get(struct ploop_device *plo,
> + unsigned long arg, struct ploop_push_backup_io_ctl *ctl,
> + int (*get)(struct ploop_pushbackup_desc *, cluster_t *,
> +cluster_t *, unsigned))
>  {
>   struct ploop_push_backup_ctl_extent *e;
>   unsigned n_extents = 0;
>   int rc = 0;
> + cluster_t clu = 0;
> + cluster_t len = 0;
>  
>   e = kmalloc(sizeof(*e) * ctl->n_extents, GFP_KERNEL);
>   if (!e)
>   return -ENOMEM;
>  
>   while (n_extents < ctl->n_extents) {
> - cluster_t clu, len;
> - rc = ploop_pb_get_pending(plo->pbd, , , n_extents);
> + rc = get(plo->pbd, , , n_extents);
>   if (rc == -ENOENT && n_extents)
>   break;
>   else if (rc)
> - goto io_read_done;
> + goto io_get_done;
>  
>   e[n_extents].clu = clu;
>   e[n_extents].len = len;
> @@ -4670,18 +4673,44 @@ static int ploop_push_backup_io_read(struct 
> ploop_device *plo, unsigned long arg
>   rc = -EFAULT;
>   ctl->n_extents = n_extents;
>   if (copy_to_user((void*)arg, ctl, sizeof(*ctl)))
> - goto io_read_done;
> + goto io_get_done;
>   if (n_extents &&
>   copy_to_user((void*)(arg + sizeof(*ctl)), e,
>n_extents * sizeof(*e)))
> - goto io_read_done;
> + goto io_get_done;
>   rc = 0;
>  
> -io_read_done:
> +io_get_done:
>   kfree(e);
>   return rc;
>  }
>  
> +static int ploop_push_backup_io_read(struct ploop_device *plo,
> + unsigned long arg, struct ploop_push_backup_io_ctl *ctl)
> +{
> + return ploop_push_backup_io_get(plo, arg, ctl, ploop_pb_get_pending);
> +}
> +
> +static int ploop_push_backup_io_peek(struct ploop_device *plo,
> + unsigned long arg, struct ploop_push_backup_io_ctl *ctl)
> +{
> + int rc;
> +
> + ploop_quiesce(plo);
> + rc = ploop_push_backup_io_get(plo, arg, ctl, ploop_pb_peek);
> + ploop_relax(plo);
> +
> + if (rc == -ENOENT) {
> + ctl->n_extents = 0;
> + if (copy_to_user((void*)arg, ctl, sizeof(*ctl)))
> + rc = -EFAULT;
> + else
> + rc = 0;
> + }
> +
> + return rc;
> +}
> +
>  static int ploop_push_backup_io_write(struct ploop_device *plo, unsigned 
> long arg,
> struct ploop_push_backup_io_ctl *ctl)
>  {
> @@ -4737,6 +4766,8 @@ static int ploop_push_backup_io(struct ploop_device 
> *plo, unsigned long arg)
>   return ploop_push_backup_io_read(plo, arg, );
>   case PLOOP_WRITE:
>   return ploop_push_backup_io_write(plo, arg, );
> + case PLOOP_PEEK:
> + return ploop_push_backup_io_peek(plo, arg, );
>   }
>  
>   return -EINVAL;
> diff --git a/drivers/block/ploop/push_backup.c 
> b/drivers/block/ploop/push_backup.c
> index 376052d..e8fa88d 100644
> --- a/drivers/block/ploop/push_backup.c
> +++ b/drivers/block/ploop/push_backup.c
> @@ -303,9 +303,17 @@ int ploop_pb_init(struct ploop_pushbackup_desc *pbd, 
> __u8 *uuid, bool full)
>   memcpy(pbd->cbt_uuid, uuid, sizeof(pbd->cbt_uuid));
>  
>   if (full) {
> - int i;
> + int i, off;
>   for (i = 0; i < NR_PAGES(pbd->ppb_block_max); i++)
>   memset(page_address(pbd->ppb_map[i]), 0xff, PAGE_SIZE);
> +
> + /* nullify bits beyond [0, pbd->ppb_block_max) range */
> + 

Re: [Devel] [PATCH rh7] cbt: blk_cbt_update_size() must return if cbt->block_max not changed

2016-06-09 Thread Dmitry Monakhov
Maxim Patlasov  writes:

> It's useless to recreate cbt every time as we called for the same
> block-device size. Actually, it's worthy only if cbt->block_max
> increases.
>
> Since commit b8e560a299 (fix cbt->block_max calculation), we calculate
> cbt->block_max precisely:
>
>>   cbt->block_max  = (size + blocksize - 1) >> cbt->block_bits;
>
> Hence, the following check:
>
>   if ((new_sz + bsz) >> cbt->block_bits <= cbt->block_max)
>   goto err_mtx;
>
> must be corrected accordingly.
>
ACK
> Signed-off-by: Maxim Patlasov 
> ---
>  block/blk-cbt.c |2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
>
> diff --git a/block/blk-cbt.c b/block/blk-cbt.c
> index 3a2b197..4f2ce26 100644
> --- a/block/blk-cbt.c
> +++ b/block/blk-cbt.c
> @@ -440,7 +440,7 @@ void blk_cbt_update_size(struct block_device *bdev)
>   return;
>   }
>   bsz = 1 << cbt->block_bits;
> - if ((new_sz + bsz) >> cbt->block_bits <= cbt->block_max)
> + if ((new_sz + bsz - 1) >> cbt->block_bits <= cbt->block_max)
>   goto err_mtx;
>  
>   new = do_cbt_alloc(q, cbt->uuid, new_sz, bsz);


signature.asc
Description: PGP signature
___
Devel mailing list
Devel@openvz.org
https://lists.openvz.org/mailman/listinfo/devel


Re: [Devel] [PATCH rh7] cbt: blk_cbt_update_size() should not copy uninitialized data

2016-06-09 Thread Dmitry Monakhov
Maxim Patlasov  writes:

> to_cpy is the number of page pointers to copy from current cbt to new.
> The following check:
>
>>  if ((new_sz + bsz) >> cbt->block_bits <= cbt->block_max)
>>  goto err_mtx;
>
> ensures that the copy will be done only for new cbt bigger than current. So,
> we have to calculate to_cpy based on the current (smaller) cbt. The rest of
> new cbt is OK because it was nullified by do_cbt_alloc().
>
> The bug existed since the very first version of CBT (commit ad7ba3dfe).
>
> https://jira.sw.ru/browse/PSBM-48120
>
ACK
> Signed-off-by: Maxim Patlasov 
> ---
>  block/blk-cbt.c |2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
>
> diff --git a/block/blk-cbt.c b/block/blk-cbt.c
> index 001dbfd..3a2b197 100644
> --- a/block/blk-cbt.c
> +++ b/block/blk-cbt.c
> @@ -448,7 +448,7 @@ void blk_cbt_update_size(struct block_device *bdev)
>   set_bit(CBT_ERROR, >flags);
>   goto err_mtx;
>   }
> - to_cpy = NR_PAGES(new->block_max);
> + to_cpy = NR_PAGES(cbt->block_max);
>   set_bit(CBT_NOCACHE, >flags);
>   cbt_flush_cache(cbt);
>   spin_lock_irq(>lock);


signature.asc
Description: PGP signature
___
Devel mailing list
Devel@openvz.org
https://lists.openvz.org/mailman/listinfo/devel


[Devel] [PATCH RHEL7 COMMIT] cbt: blk_cbt_update_size() must return if cbt->block_max not changed

2016-06-09 Thread Konstantin Khorenko
The commit is pushed to "branch-rh7-3.10.0-327.18.2.vz7.14.x-ovz" and will 
appear at https://src.openvz.org/scm/ovz/vzkernel.git
after rh7-3.10.0-327.18.2.vz7.14.13
-->
commit e60b95961323b14d74d2effeeb43b490bd5ecd1a
Author: Maxim Patlasov 
Date:   Thu Jun 9 12:42:56 2016 +0400

cbt: blk_cbt_update_size() must return if cbt->block_max not changed

It's useless to recreate cbt every time as we called for the same
block-device size. Actually, it's worthy only if cbt->block_max
increases.

Since commit b8e560a299 (fix cbt->block_max calculation), we calculate
cbt->block_max precisely:

>   cbt->block_max  = (size + blocksize - 1) >> cbt->block_bits;

Hence, the following check:

if ((new_sz + bsz) >> cbt->block_bits <= cbt->block_max)
goto err_mtx;

must be corrected accordingly.

Signed-off-by: Maxim Patlasov 
Acked-by: Dmitry Monakhov 
---
 block/blk-cbt.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/block/blk-cbt.c b/block/blk-cbt.c
index 3a2b197..4f2ce26 100644
--- a/block/blk-cbt.c
+++ b/block/blk-cbt.c
@@ -440,7 +440,7 @@ void blk_cbt_update_size(struct block_device *bdev)
return;
}
bsz = 1 << cbt->block_bits;
-   if ((new_sz + bsz) >> cbt->block_bits <= cbt->block_max)
+   if ((new_sz + bsz - 1) >> cbt->block_bits <= cbt->block_max)
goto err_mtx;
 
new = do_cbt_alloc(q, cbt->uuid, new_sz, bsz);
___
Devel mailing list
Devel@openvz.org
https://lists.openvz.org/mailman/listinfo/devel


[Devel] [patch vz7 v2] do not allow rootfs umount

2016-06-09 Thread Vasily Averin
In mainline rootfs is marked always as MNT_LOCKED,
sys_umount checks this flag and fails its processing.
Our kernels lacks for MNT_LOCKED flag, so we use another kind of check
to prevent incorrect operation.

v2: use mnt_has_parent()

https://jira.sw.ru/browse/PSBM-46437

Signed-off-by: Vasily Averin 
diff --git a/fs/namespace.c b/fs/namespace.c
index 988320b..4fb935a 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -1355,6 +1355,8 @@ SYSCALL_DEFINE2(umount, char __user *, name, int, flags)
goto dput_and_out;
if (!check_mnt(mnt))
goto dput_and_out;
+   if (!mnt_has_parent(mnt))
+   goto dput_and_out;
 
retval = do_umount(mnt, flags);
 dput_and_out:
___
Devel mailing list
Devel@openvz.org
https://lists.openvz.org/mailman/listinfo/devel