[Devel] [PATCH vz8 2/2] ve/fs/devmnt: process mount options

2020-10-26 Thread Andrey Ryabinin
From: Kirill Tkhai 

Port patch diff-ve-fs-process-mount-options-check-and-insert by Maxim Patlasov:

The patch implements two kinds of processing mount options: check and insert.
Check is OK if and only if each option supplied by CT-user is present
among options listed in allowed_options.

Insert transforms mount options supplied by CT-user like this:

 =  + 

Check is performed both for mount and remount. Insert - only for mount. All
this happens only for mount/remount inside CT and if proper ve_devmnt struct
is found in ve->devmnt_list (searched by 'dev').

https://jira.sw.ru/browse/PSBM-32273

Signed-off-by: Kirill Tkhai 
Acked-by: Maxim Patlasov 

+++
ve/fs/devmnt: allow more than one mount option inside a CT

strsep() changes provided string: puts '\0' instead of separators,
thus after successful call to ve_devmnt_check() we insert
only first provided mount options, ignoring others.

mFixes: bc4143b ("ve/fs/devmnt: process mount options")

Found during implementation of
https://jira.sw.ru/browse/PSBM-40075

Signed-off-by: Konstantin Khorenko 
Reviewed-by: Kirill Tkhai 

https://jira.sw.ru/browse/PSBM-108196
Signed-off-by: Andrey Ryabinin 
---
 fs/namespace.c | 146 -
 fs/super.c |  16 +
 include/linux/fs.h |   2 +
 3 files changed, 163 insertions(+), 1 deletion(-)

diff --git a/fs/namespace.c b/fs/namespace.c
index d355b5921d1e..c24ab7597a39 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -28,6 +28,8 @@
 #include 
 #include 
 
+#include 
+
 #include "pnode.h"
 #include "internal.h"
 
@@ -2344,6 +2346,148 @@ static int change_mount_flags(struct vfsmount *mnt, int 
ms_flags)
return error;
 }
 
+#ifdef CONFIG_VE
+/*
+ * Returns first occurrence of needle in haystack separated by sep,
+ * or NULL if not found
+ */
+static char *strstr_separated(char *haystack, char *needle, char sep)
+{
+   int needle_len = strlen(needle);
+
+   while (haystack) {
+   if (!strncmp(haystack, needle, needle_len) &&
+   (haystack[needle_len] == 0 || /* end-of-line or */
+haystack[needle_len] == sep)) /* separator */
+   return haystack;
+
+   haystack = strchr(haystack, sep);
+   if (haystack)
+   haystack++;
+   }
+
+   return NULL;
+}
+
+static int ve_devmnt_check(char *options, char *allowed)
+{
+   char *p;
+   char *tmp_options;
+
+   if (!options || !*options)
+   return 0;
+
+   if (!allowed)
+   return -EPERM;
+
+   /* strsep() changes provided string: puts '\0' instead of separators */
+   tmp_options = kstrdup(options, GFP_KERNEL);
+   if (!tmp_options)
+   return -ENOMEM;
+
+   while ((p = strsep(_options, ",")) != NULL) {
+   if (!*p)
+   continue;
+
+   if (!strstr_separated(allowed, p, ',')) {
+   kfree(tmp_options);
+   return -EPERM;
+   }
+   }
+
+   kfree(tmp_options);
+   return 0;
+}
+
+static int ve_devmnt_insert(char *options, char *hidden)
+{
+   int options_len;
+   int hidden_len;
+
+   if (!hidden)
+   return 0;
+
+   if (!options)
+   return -EAGAIN;
+
+   options_len = strlen(options);
+   hidden_len = strlen(hidden);
+
+   if (hidden_len + options_len + 2 > PAGE_SIZE)
+   return -EPERM;
+
+   memmove(options + hidden_len + 1, options, options_len);
+   memcpy(options, hidden, hidden_len);
+
+   options[hidden_len] = ',';
+   options[hidden_len + options_len + 1] = 0;
+
+   return 0;
+}
+
+int ve_devmnt_process(struct ve_struct *ve, dev_t dev, void **data_pp, int 
remount)
+{
+   void *data = *data_pp;
+   struct ve_devmnt *devmnt;
+   int err;
+again:
+   err = 1;
+   mutex_lock(>devmnt_mutex);
+   list_for_each_entry(devmnt, >devmnt_list, link) {
+   if (devmnt->dev == dev) {
+   err = ve_devmnt_check(data, devmnt->allowed_options);
+
+   if (!err && !remount)
+   err = ve_devmnt_insert(data, 
devmnt->hidden_options);
+
+   break;
+   }
+   }
+   mutex_unlock(>devmnt_mutex);
+
+   switch (err) {
+   case -EAGAIN:
+   if (!(data = (void *)__get_free_page(GFP_KERNEL)))
+   return -ENOMEM;
+   *(char *)data = 0; /* the string must be zero-terminated */
+   goto again;
+   case 1:
+   if (*data_pp) {
+   ve_printk(VE_LOG_BOTH, KERN_WARNING "VE%s: no allowed "
+ "mount options found for device %u:%u\n",
+ ve->ve_name, MAJOR(dev), MINOR(dev));
+   err = -EPERM;
+   } else
+   err = 0;
+   

[Devel] [PATCH vz8 1/2] ve/devmnt: Introduce ve::devmnt list #PSBM-108196

2020-10-26 Thread Andrey Ryabinin
From: Kirill Tkhai 

1)Porting patch "ve: mount option list" by Maxim Patlasov:

The patch adds new fields to ve_struct: devmnt_list and devmnt_mutex.
devmnt_list is the head of list of ve_devmnt structs. Each host block device
visible from CT can have no more than one struct ve_devmnt linked in
ve->devmnt_list. If ve_devmnt is present, it can be found by 'dev' field.

Each ve_devmnt struct may bear two strings: hidden and allowed options.
hidden_options will be automatically added to CT-user-supplied mount options
after checking allowed_options. Only options listed in allowed_options are
allowed.

devmnt_mutex is to protect operations on the list of ve_devmnt structs.

2)Porting patch "vecalls: VE_CONFIGURE_MOUNT_OPTIONS" by Maxim Patlasov.

Reworking the interface using cgroups. Each CT now has a file:

[ve_cgroup_mnt_pnt]/[CTID]/ve.mount_opts

for configuring permittions for a block device. Below is permittions line
example:

"0 major:minor;1 balloon_ino=12,pfcache_csum,pfcache=/vz/pfcache;2 barrier=1"

Here, major:minor is a device, '1' starts comma-separated list of
hidden options, and '2' is allowed ones.

https://jira.sw.ru/browse/PSBM-32273

Signed-off-by: Kirill Tkhai 
Acked-by: Maxim Patlasov 

+++
ve/cgroups: Align ve_cftypes assignments

For readability sake. We've other aligned already.

Signed-off-by: Cyrill Gorcunov 
Rebase: ktkhai@: Merged "ve: increase max length of ve.mount_opts string"

ve/devmnt: Add a ability to show ve.mount_opts

A user may want to see allowed mount options.
This patch allows that.

khorenko@:
* by default ve cgroup is not visible from inside a CT

* currently it's possible to mount ve cgroup inside a CT, but this is
  temporarily, we'll disable this in the scope of
  https://jira.sw.ru/browse/PSBM-34291

* this patch allows to see mount options via ve cgroup =>
  after PSBM-34291 is fixed, mount options will be visible only from ve0 (host)

* for host it's OK to see all hidden options

Signed-off-by: Kirill Tkhai 
Rebase: ktkhai@: Merged "ve: Strip unset options in ve.mount_opts"

[aryabinin: vz8 rebase]
https://jira.sw.ru/browse/PSBM-108196
Signed-off-by: Andrey Ryabinin 
---
 include/linux/ve.h |  11 +++
 kernel/ve/ve.c | 175 +
 2 files changed, 186 insertions(+)

diff --git a/include/linux/ve.h b/include/linux/ve.h
index 5b1962ff4c66..1b6317275ca2 100644
--- a/include/linux/ve.h
+++ b/include/linux/ve.h
@@ -96,6 +96,17 @@ struct ve_struct {
 #endif
struct vdso_image   *vdso_64;
struct vdso_image   *vdso_32;
+
+   struct list_headdevmnt_list;
+   struct mutexdevmnt_mutex;
+};
+
+struct ve_devmnt {
+   struct list_headlink;
+
+   dev_t   dev;
+   char*allowed_options;
+   char*hidden_options; /* balloon_ino, etc. */
 };
 
 #define VE_MEMINFO_DEFAULT 1   /* default behaviour */
diff --git a/kernel/ve/ve.c b/kernel/ve/ve.c
index ac3dda55e9ae..935e13340051 100644
--- a/kernel/ve/ve.c
+++ b/kernel/ve/ve.c
@@ -9,6 +9,7 @@
  * 've.c' helper file performing VE sub-system initialization
  */
 
+#include 
 #include 
 #include 
 #include 
@@ -643,6 +644,8 @@ static struct cgroup_subsys_state *ve_create(struct 
cgroup_subsys_state *parent_
 #ifdef CONFIG_COREDUMP
strcpy(ve->core_pattern, "core");
 #endif
+   INIT_LIST_HEAD(>devmnt_list);
+   mutex_init(>devmnt_mutex);
 
return >css;
 
@@ -687,10 +690,33 @@ static void ve_offline(struct cgroup_subsys_state *css)
ve->ve_name = NULL;
 }
 
+static void ve_devmnt_free(struct ve_devmnt *devmnt)
+{
+   if (!devmnt)
+   return;
+
+   kfree(devmnt->allowed_options);
+   kfree(devmnt->hidden_options);
+   kfree(devmnt);
+}
+
+static void free_ve_devmnts(struct ve_struct *ve)
+{
+   while (!list_empty(>devmnt_list)) {
+   struct ve_devmnt *devmnt;
+
+   devmnt = list_first_entry(>devmnt_list, struct ve_devmnt, 
link);
+   list_del(>link);
+   ve_devmnt_free(devmnt);
+   }
+}
+
 static void ve_destroy(struct cgroup_subsys_state *css)
 {
struct ve_struct *ve = css_to_ve(css);
 
+   free_ve_devmnts(ve);
+
kmapset_unlink(>sysfs_perms_key, _ve_perms_set);
ve_log_destroy(ve);
ve_free_vdso(ve);
@@ -1085,6 +,148 @@ static u64 ve_netns_avail_nr_read(struct 
cgroup_subsys_state *css, struct cftype
return atomic_read(_to_ve(css)->netns_avail_nr);
 }
 
+static int ve_mount_opts_read(struct seq_file *sf, void *v)
+{
+   struct ve_struct *ve = css_to_ve(seq_css(sf));
+   struct ve_devmnt *devmnt;
+
+   if (ve_is_super(ve))
+   return -ENODEV;
+
+   mutex_lock(>devmnt_mutex);
+   list_for_each_entry(devmnt, >devmnt_list, link) {
+   dev_t dev = devmnt->dev;
+
+   seq_printf(sf, "0 %u:%u;", MAJOR(dev), MINOR(dev));
+   if 

[Devel] [PATCH RH7 1/2] ploop: Preallocate clusters before nullifying on grow

2020-10-26 Thread Kirill Tkhai
In case of future BAT clusters are data clusters and
there is a hole, nullifying code fails since there is
no extent. Fix that by future BAT preallocation.

https://jira.sw.ru/browse/PSBM-121772

Signed-off-by: Kirill Tkhai 
---
 drivers/block/ploop/fmt_ploop1.c |8 
 drivers/block/ploop/io_direct.c  |   26 ++
 include/linux/ploop/ploop.h  |2 ++
 3 files changed, 36 insertions(+)

diff --git a/drivers/block/ploop/fmt_ploop1.c b/drivers/block/ploop/fmt_ploop1.c
index 99acad2d6994..316f62621c33 100644
--- a/drivers/block/ploop/fmt_ploop1.c
+++ b/drivers/block/ploop/fmt_ploop1.c
@@ -10,6 +10,7 @@
 #include 
 #include 
 #include 
+#include 
 
 #include 
 #include "ploop1_image.h"
@@ -775,6 +776,13 @@ ploop1_prepare_grow(struct ploop_delta * delta, u64 
*new_size, int *reloc)
delta->io.plo->grow_start = n_present;
delta->io.plo->grow_end = n_needed - n_alloced - 1;
 
+   if (delta->io.ops->prepare_reloc) {
+   err = delta->io.ops->prepare_reloc(>io,
+   delta->io.plo->grow_start, *reloc);
+   if (err)
+   return err;
+   }
+
/* Does not use rellocated data clusters during grow. */
if (delta->holes_bitmap) {
i = delta->io.plo->grow_start;
diff --git a/drivers/block/ploop/io_direct.c b/drivers/block/ploop/io_direct.c
index f67ce47e0562..dff12e2276bb 100644
--- a/drivers/block/ploop/io_direct.c
+++ b/drivers/block/ploop/io_direct.c
@@ -1666,6 +1666,31 @@ static int dio_start_merge(struct ploop_io * io, struct 
ploop_snapdata *sd)
return 0;
 }
 
+/*
+ * There may be a hole in a place, which will be new BAT clusters
+ * after grow (before grow these clusters are data). Nullifying code
+ * expects there is no a hole, so we preallocate them here.
+ */
+static int dio_prepare_reloc(struct ploop_io *io, unsigned int start_clu,
+unsigned int nr)
+{
+   struct file *file = io->files.file;
+   int log = io->plo->cluster_log;
+   loff_t start, len;
+   int err;
+
+   start = start_clu << (log + 9);
+   len = nr << (log + 9);
+
+   err = file->f_op->fallocate(file, 0, start, len);
+   if (err)
+   return err;
+   err = file->f_op->fallocate(file, FALLOC_FL_CONVERT_UNWRITTEN,
+   start, len);
+   return err;
+
+}
+
 static void dio_unplug(struct ploop_io * io)
 {  
/* Need more thinking how to implement unplug */
@@ -1802,6 +1827,7 @@ static struct ploop_io_ops ploop_io_ops_direct =
.complete_snapshot =dio_complete_snapshot,
.io_prepare_merge = dio_prepare_merge,
.start_merge=   dio_start_merge,
+   .prepare_reloc  =   dio_prepare_reloc,
.truncate   =   dio_truncate,
 
.queue_settings =   dio_queue_settings,
diff --git a/include/linux/ploop/ploop.h b/include/linux/ploop/ploop.h
index 16a30076704a..bffd83b08764 100644
--- a/include/linux/ploop/ploop.h
+++ b/include/linux/ploop/ploop.h
@@ -198,6 +198,8 @@ struct ploop_io_ops
int (*complete_snapshot)(struct ploop_io *, struct ploop_snapdata 
*);
int (*io_prepare_merge)(struct ploop_io *, struct ploop_snapdata *);
int (*start_merge)(struct ploop_io *, struct ploop_snapdata *);
+   int (*prepare_reloc)(struct ploop_io *, unsigned int, unsigned int);
+
int (*truncate)(struct ploop_io *, struct file *, __u32 alloc_head);
void(*queue_settings)(struct ploop_io *, struct request_queue *q);
 


___
Devel mailing list
Devel@openvz.org
https://lists.openvz.org/mailman/listinfo/devel


[Devel] [PATCH RH7 2/2] ploop: Add more debug on error

2020-10-26 Thread Kirill Tkhai
Signed-off-by: Kirill Tkhai 
---
 include/linux/ploop/ploop.h |5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/include/linux/ploop/ploop.h b/include/linux/ploop/ploop.h
index bffd83b08764..766a0e9f3d89 100644
--- a/include/linux/ploop/ploop.h
+++ b/include/linux/ploop/ploop.h
@@ -732,10 +732,11 @@ static inline void ploop_req_set_error(struct 
ploop_request * preq, int err)
do {
\
if ((err)) {
\
printk("%s() %d ploop%d set error %d: clu=%u, sec=%lu, 
"\
-   "size=%u, rw=0x%x, iblock=%u\n",
\
+   "size=%u, rw=0x%x, iblock=%u, state=0x%lx, 
eng=0x%lx\n",\
__FUNCTION__, __LINE__, (preq)->plo->index, (int)(err), 
\
(preq)->req_cluster, (preq)->req_sector,
\
-   (preq)->req_size, (preq)->req_rw, (preq)->iblock);  
\
+   (preq)->req_size, (preq)->req_rw, (preq)->iblock,   
\
+   (preq)->state, (preq)->eng_state);  
\
PLOOP_TRACE_ERROR_DUMP_STACK(); 
\
}   
\
} while (0);


___
Devel mailing list
Devel@openvz.org
https://lists.openvz.org/mailman/listinfo/devel


Re: [Devel] [PATCH RH7] cgroup: rework reference acquisition for cgroup_find_inode

2020-10-26 Thread Pavel Tikhomirov




On 10/26/20 9:48 AM, Andrey Zhadchenko wrote:

Use more generic igrab instead of atomic inc. Move cgroup_hash_del to eviction
stage to avoid deadlock.

Signed-off-by: Andrey Zhadchenko 
---
  kernel/cgroup.c | 25 -
  1 file changed, 8 insertions(+), 17 deletions(-)

diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index 27d7a5e..3bcbae9 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -1522,21 +1522,10 @@ static struct inode *cgroup_find_inode(unsigned long 
fh[2], char take_ref)
struct inode *ret = NULL;
  
  	spin_lock(_inode_table_lock);

-   item = cgroup_find_item_no_lock(fh);
  
-	/*

-* If we need to increase refcount, we should be aware of possible
-* deadlock. Another thread may have started deleting this inode:
-* iput->iput_final->cgroup_delete_inode->cgroup_hash_del
-* If we just call igrab, it will try to take i_lock and this will
-* result in deadlock, because deleting thread has already taken it
-* and waits on cgroup_inode_table_lock to find inode in hashtable.
-*
-* If i_count is zero, someone is deleting it -> skip.
-*/
-   if (take_ref && item)
-   if (!atomic_inc_not_zero(>inode->i_count))
-   item = NULL;
+   item = cgroup_find_item_no_lock(fh);
+   if (item && take_ref && !igrab(item->inode))
+   item = NULL;
  
  	spin_unlock(_inode_table_lock);
  
@@ -1634,15 +1623,17 @@ static const struct export_operations cgroup_export_ops = {

.fh_to_dentry   = cgroup_fh_to_dentry,
  };
  
-static int cgroup_delete_inode(struct inode *inode)

+static void cgroup_evict_inode(struct inode *inode)
  {
cgroup_hash_del(inode);
-   return generic_delete_inode(inode);
+   clear_inode(inode);
+   truncate_inode_pages_final(>i_data);


Why clear_inode/truncate_inode_pages_final order is different from the 
order in evict()? And probably also we need to call cgroup_hash_del 
after clear_inode to be consistent with mqueue_evict_inode.


I don't like different order of calls in different places, though it is 
probably ok now, this can hurt in future.



  }
  
  static const struct super_operations cgroup_ops = {

.statfs = simple_statfs,
-   .drop_inode = cgroup_delete_inode,
+   .drop_inode = generic_delete_inode,
+   .evict_inode = cgroup_evict_inode,
.show_options = cgroup_show_options,
  #ifdef CONFIG_VE
.show_path = cgroup_show_path,



--
Best regards, Tikhomirov Pavel
Software Developer, Virtuozzo.
___
Devel mailing list
Devel@openvz.org
https://lists.openvz.org/mailman/listinfo/devel


[Devel] [PATCH RH7] cgroup: rework reference acquisition for cgroup_find_inode

2020-10-26 Thread Andrey Zhadchenko
Use more generic igrab instead of atomic inc. Move cgroup_hash_del to eviction
stage to avoid deadlock.

Signed-off-by: Andrey Zhadchenko 
---
 kernel/cgroup.c | 25 -
 1 file changed, 8 insertions(+), 17 deletions(-)

diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index 27d7a5e..3bcbae9 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -1522,21 +1522,10 @@ static struct inode *cgroup_find_inode(unsigned long 
fh[2], char take_ref)
struct inode *ret = NULL;
 
spin_lock(_inode_table_lock);
-   item = cgroup_find_item_no_lock(fh);
 
-   /*
-* If we need to increase refcount, we should be aware of possible
-* deadlock. Another thread may have started deleting this inode:
-* iput->iput_final->cgroup_delete_inode->cgroup_hash_del
-* If we just call igrab, it will try to take i_lock and this will
-* result in deadlock, because deleting thread has already taken it
-* and waits on cgroup_inode_table_lock to find inode in hashtable.
-*
-* If i_count is zero, someone is deleting it -> skip.
-*/
-   if (take_ref && item)
-   if (!atomic_inc_not_zero(>inode->i_count))
-   item = NULL;
+   item = cgroup_find_item_no_lock(fh);
+   if (item && take_ref && !igrab(item->inode))
+   item = NULL;
 
spin_unlock(_inode_table_lock);
 
@@ -1634,15 +1623,17 @@ static const struct export_operations cgroup_export_ops 
= {
.fh_to_dentry   = cgroup_fh_to_dentry,
 };
 
-static int cgroup_delete_inode(struct inode *inode)
+static void cgroup_evict_inode(struct inode *inode)
 {
cgroup_hash_del(inode);
-   return generic_delete_inode(inode);
+   clear_inode(inode);
+   truncate_inode_pages_final(>i_data);
 }
 
 static const struct super_operations cgroup_ops = {
.statfs = simple_statfs,
-   .drop_inode = cgroup_delete_inode,
+   .drop_inode = generic_delete_inode,
+   .evict_inode = cgroup_evict_inode,
.show_options = cgroup_show_options,
 #ifdef CONFIG_VE
.show_path = cgroup_show_path,
-- 
1.8.3.1

___
Devel mailing list
Devel@openvz.org
https://lists.openvz.org/mailman/listinfo/devel


Re: [Devel] [PATCH RH7 v4] cgroup: add export_operations to cgroup super block

2020-10-26 Thread Andrey Zhadchenko
This patch is not needed for vz8, because cgroup will use kernfs, which
has export_operations implemented.

Best regards,
Andrey Zhadchenko

On Thu, 24 Sep 2020 10:41:31 +0300
Konstantin Khorenko  wrote:

> Please, port this patch to vz8 as well, branch
> branch-rh8-4.18.0-193.6.3.vz8.4.x-ovz
> 
> --
> Best regards,
> 
> Konstantin Khorenko,
> Virtuozzo Linux Kernel Team
> 
> On 07/30/2020 04:01 PM, Andrey Zhadchenko wrote:
> > criu uses fhandle from fdinfo to dump inotify objects. cgroup super
> > block has no export operations, but .encode_fh and .fh_to_dentry
> > are needed for inotify_fdinfo function and open_by_handle_at
> > syscall in order to correctly open files located on cgroupfs by
> > fhandle. Add hash table as a storage for inodes with exported
> > fhandle.
> >
> > v3: use inode->i_gen to protect from i_ino reusage. increase
> > fhandle size to 2 * u32.
> > Add an option to take reference of inode in cgroup_find_inode, so
> > no one can delete recently found inode.
> > v4: introduced hashtable helper functions to avoid races.
> > changed i_gen generation from get_seconds to prandom_u32.
> >
> > https://jira.sw.ru/browse/PSBM-105889
> > Signed-off-by: Andrey Zhadchenko 
> > ---
> >  kernel/cgroup.c | 168
> > +++- 1 file
> > changed, 167 insertions(+), 1 deletion(-)
> >
> > diff --git a/kernel/cgroup.c b/kernel/cgroup.c
> > index 9fdba79..956a9ac 100644
> > --- a/kernel/cgroup.c
> > +++ b/kernel/cgroup.c
> > @@ -62,6 +62,8 @@
> >  #include 
> >  #include 
> >  #include 
> > +#include 
> > +#include 
> >
> >  #include 
> >
> > @@ -765,6 +767,7 @@ static struct inode *cgroup_new_inode(umode_t
> > mode, struct super_block *sb)
> >
> > if (inode) {
> > inode->i_ino = get_next_ino();
> > +   inode->i_generation = prandom_u32();
> > inode->i_mode = mode;
> > inode->i_uid = current_fsuid();
> > inode->i_gid = current_fsgid();
> > @@ -1390,9 +1393,171 @@ out:
> >  }
> >  #endif
> >
> > +/*
> > + * hashtable for inodes that have exported fhandles.
> > + * When we export fhandle, we add it's inode into
> > + * hashtable so we can find it fast
> > + */
> > +
> > +#define CGROUP_INODE_HASH_BITS 10
> > +static DEFINE_HASHTABLE(cgroup_inode_table,
> > CGROUP_INODE_HASH_BITS); +static
> > DEFINE_SPINLOCK(cgroup_inode_table_lock); +
> > +struct cg_inode_hitem {
> > +   struct inode *inode;
> > +   struct hlist_node hlist;
> > +};
> > +
> > +static inline unsigned long cgroup_inode_get_hash(unsigned int
> > i_ino) +{
> > +   return hash_32(i_ino, CGROUP_INODE_HASH_BITS);
> > +}
> > +
> > +static struct cg_inode_hitem *cgroup_find_item_no_lock(unsigned
> > long fh[2]) +{
> > +   struct cg_inode_hitem *i;
> > +   struct hlist_head *head = cgroup_inode_table
> > +   + cgroup_inode_get_hash(fh[1]);
> > +   struct cg_inode_hitem *found = NULL;
> > +
> > +   hlist_for_each_entry(i, head, hlist) {
> > +   if (i->inode->i_generation == fh[0] &&
> > +   i->inode->i_ino == fh[1]) {
> > +   found = i;
> > +   break;
> > +   }
> > +   }
> > +
> > +   return found;
> > +}
> > +
> > +static struct inode *cgroup_find_inode(unsigned long fh[2], char
> > take_ref) +{
> > +   struct cg_inode_hitem *item;
> > +   struct inode *ret = NULL;
> > +
> > +   spin_lock(_inode_table_lock);
> > +   item = cgroup_find_item_no_lock(fh);
> > +
> > +   /*
> > +* If we need to increase refcount, we should be aware of
> > possible
> > +* deadlock. Another thread may have started deleting this
> > inode:
> > +* iput->iput_final->cgroup_delete_inode->cgroup_hash_del
> > +* If we just call igrab, it will try to take i_lock and
> > this will
> > +* result in deadlock, because deleting thread has already
> > taken it
> > +* and waits on cgroup_inode_table_lock to find inode in
> > hashtable.
> > +*
> > +* If i_count is zero, someone is deleting it -> skip.
> > +*/
> > +   if (take_ref && item)
> > +   if (!atomic_inc_not_zero(>inode->i_count))
> > +   item = NULL;
> > +
> > +   spin_unlock(_inode_table_lock);
> > +
> > +   if (item)
> > +   ret = item->inode;
> > +
> > +   return ret;
> > +}
> > +
> > +static int cgroup_hash_add(struct inode *inode)
> > +{
> > +   unsigned long fh[2] = {inode->i_generation, inode->i_ino};
> > +
> > +   if (!cgroup_find_inode(fh, 0)) {
> > +   struct cg_inode_hitem *item;
> > +   struct cg_inode_hitem *existing_item = 0;
> > +   struct hlist_head *head = cgroup_inode_table
> > +   + cgroup_inode_get_hash(inode->i_ino);
> > +
> > +   item = kmalloc(sizeof(struct cg_inode_hitem),
> > GFP_KERNEL);
> > +   if (!item)
> > +   return -ENOMEM;
> > +   item->inode = inode;
> > +
> > +   spin_lock(_inode_table_lock);
> > +   existing_item = cgroup_find_item_no_lock(fh);
> > +   if 

[Devel] [PATCH RHEL7 COMMIT] kvm: fix AMD IBRS/IBPB/STIBP/SSBD reporting #PSBM-120787

2020-10-26 Thread Vasily Averin
The commit is pushed to "branch-rh7-3.10.0-1127.18.2.vz7.163.x-ovz" and will 
appear at https://src.openvz.org/scm/ovz/vzkernel.git
after rh7-3.10.0-1127.18.2.vz7.163.40
-->
commit da3fe794241c1c644e2ff0b136ed42deb40c8a8e
Author: Denis V. Lunev 
Date:   Mon Oct 26 07:50:23 2020 +0300

kvm: fix AMD IBRS/IBPB/STIBP/SSBD reporting #PSBM-120787

We should report these bits in 8008 EBX on AMD only, i.e. when AMD
specific feature bits are enabled.

Signed-off-by: Denis V. Lunev 
CC: Vasily Averin 
CC: Konstantin Khorenko 

https://jira.sw.ru/browse/PSBM-120787
Signed-off-by: "Denis V. Lunev" 
---
 arch/x86/kvm/cpuid.c | 8 
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
index 96a6bac..d876f18 100644
--- a/arch/x86/kvm/cpuid.c
+++ b/arch/x86/kvm/cpuid.c
@@ -642,13 +642,13 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 
*entry, u32 function,
 * arch/x86/kernel/cpu/bugs.c is kind enough to
 * record that in cpufeatures so use them.
 */
-   if (boot_cpu_has(X86_FEATURE_IBPB))
+   if (boot_cpu_has(X86_FEATURE_AMD_IBPB))
entry->ebx |= F(AMD_IBPB);
-   if (boot_cpu_has(X86_FEATURE_IBRS))
+   if (boot_cpu_has(X86_FEATURE_AMD_IBRS))
entry->ebx |= F(AMD_IBRS);
-   if (boot_cpu_has(X86_FEATURE_STIBP))
+   if (boot_cpu_has(X86_FEATURE_AMD_STIBP))
entry->ebx |= F(AMD_STIBP);
-   if (boot_cpu_has(X86_FEATURE_SSBD))
+   if (boot_cpu_has(X86_FEATURE_AMD_SSBD))
entry->ebx |= F(AMD_SSBD);
if (!boot_cpu_has_bug(X86_BUG_SPEC_STORE_BYPASS))
entry->ebx |= F(AMD_SSB_NO);
___
Devel mailing list
Devel@openvz.org
https://lists.openvz.org/mailman/listinfo/devel