On 10/14/20 2:05 AM, Andrey Zhadchenko wrote:
Overlayfs temporary override credentials in copy_up function to ones which was
used to create mount.

Unfortunately vfs_setxattr requires CAP_SYS_ADMIN
capability in current user namespace.

No, if it was so, it would be no error =) To be correct we should say:

Function vfs_setxattr for "trusted." attrs requires CAP_SYS_ADMIN in current ve's userns.

It is done so to mimic mainstream behaviour for containers(ves), so that container user can't set "trusted." xattrs if it is in non-root container userns.

This leads to strange situations.
For example, if overlayfs mount was made inside ve it is impossible to use
copy_up from init_user_ns even with CAP_SYS_ADMIN. This is because overriden
credentials are not sufficient in init_user_ns to set xattr to file.
This is also required for criu since copy_up can be triggered on dump stage:
reading inotify fhandle from /proc may start copy_up.

I hope that overlayfs overrides credentials exactly to be able to pass those checks. In mainstream kernel overlayfs can used from any userns, but can be only mounted from init_user_ns, so credentials always change to "more permissive". So it should be safe to skip override in case we are already "more permissive" than superblocks credentials.


Add an option to avoid vfs_setxattr CAP_SYS_ADMIN check if current credentials
have CAP_SYS_ADMIN in namespace that is recorded in overlayfs mount superblock.

Sorry but looking on the code I don't see how it works... There are only three codepaths here:

  +-< ovl_do_setxattr_ext
    +-< ovl_do_setxattr #1 sets propagate_cap to false
    +-< ovl_check_setxattr_ext
    | +-< ovl_set_origin_ext
    | | +-< ovl_set_origin #2 sets propagate_cap to false
    | +-< ovl_check_setxattr #3 sets propagate_cap to false

And on all of them we don't "propagate_cap". Probably I'm missing something though.


https://jira.sw.ru/browse/PSBM-108122
Signed-off-by: Andrey Zhadchenko <andrey.zhadche...@virtuozzo.com>
---
  fs/overlayfs/copy_up.c   | 25 +++++++++++++++++++------
  fs/overlayfs/overlayfs.h | 39 ++++++++++++++++++++++++++-------------
  fs/overlayfs/util.c      | 32 ++++++++++++++++++++++++++++----
  fs/xattr.c               |  2 +-
  4 files changed, 74 insertions(+), 24 deletions(-)

diff --git a/fs/overlayfs/copy_up.c b/fs/overlayfs/copy_up.c
index 1564a35..d6b285f 100644
--- a/fs/overlayfs/copy_up.c
+++ b/fs/overlayfs/copy_up.c
@@ -20,6 +20,7 @@
  #include <linux/fdtable.h>
  #include <linux/ratelimit.h>
  #include <linux/exportfs.h>
+#include <linux/capability.h>
  #include "overlayfs.h"
#define OVL_COPY_UP_CHUNK_SIZE (1 << 20)
@@ -321,8 +322,8 @@ out:
        return fh;
  }
-int ovl_set_origin(struct dentry *dentry, struct dentry *lower,
-                  struct dentry *upper)
+int ovl_set_origin_ext(struct dentry *dentry, struct dentry *lower,
+                  struct dentry *upper, int propagate_cap)
  {
        const struct ovl_fh *fh = NULL;
        int err;
@@ -341,8 +342,8 @@ int ovl_set_origin(struct dentry *dentry, struct dentry 
*lower,
        /*
         * Do not fail when upper doesn't support xattrs.
         */
-       err = ovl_check_setxattr(dentry, upper, OVL_XATTR_ORIGIN, fh,
-                                fh ? fh->len : 0, 0);
+       err = ovl_check_setxattr_ext(dentry, upper, OVL_XATTR_ORIGIN, fh,
+                                fh ? fh->len : 0, 0, propagate_cap);
        kfree(fh);
return err;
@@ -433,6 +434,7 @@ struct ovl_copy_up_ctx {
        struct dentry *destdir;
        struct qstr destname;
        struct dentry *workdir;
+       int propagate_cap;
        bool tmpfile;
        bool origin;
        bool indexed;
@@ -711,7 +713,7 @@ out:
  }
static int ovl_copy_up_one(struct dentry *parent, struct dentry *dentry,
-                          int flags)
+                          int flags, int propagate_cap)
  {
        int err;
        struct path parentpath;
@@ -719,6 +721,7 @@ static int ovl_copy_up_one(struct dentry *parent, struct 
dentry *dentry,
                .parent = parent,
                .dentry = dentry,
                .workdir = ovl_workdir(dentry),
+               .propagate_cap = propagate_cap,
        };
if (WARN_ON(!ctx.workdir))
@@ -768,9 +771,19 @@ static int ovl_copy_up_one(struct dentry *parent, struct 
dentry *dentry,
        return err;
  }
+static int ovl_can_propagate_cap(struct dentry *dentry)
+{
+       struct super_block *sb = dentry->d_sb;
+       struct ovl_fs *ofs = sb->s_fs_info;
+       struct user_namespace *ovl_ns = ofs->creator_cred->user_ns;
+
+       return ns_capable(ovl_ns, CAP_SYS_ADMIN);
+}
+
  int ovl_copy_up_flags(struct dentry *dentry, int flags)
  {
        int err = 0;
+       int propagate_cap = ovl_can_propagate_cap(dentry);
        const struct cred *old_cred = ovl_override_creds(dentry->d_sb);
        bool disconnected = (dentry->d_flags & DCACHE_DISCONNECTED);
@@ -815,7 +828,7 @@ int ovl_copy_up_flags(struct dentry *dentry, int flags)
                        next = parent;
                }
- err = ovl_copy_up_one(parent, next, flags);
+               err = ovl_copy_up_one(parent, next, flags, propagate_cap);
dput(parent);
                dput(next);
diff --git a/fs/overlayfs/overlayfs.h b/fs/overlayfs/overlayfs.h
index 7052938..6917acd 100644
--- a/fs/overlayfs/overlayfs.h
+++ b/fs/overlayfs/overlayfs.h
@@ -149,15 +149,6 @@ static inline int ovl_do_symlink(struct inode *dir, struct 
dentry *dentry,
        return err;
  }
-static inline int ovl_do_setxattr(struct dentry *dentry, const char *name,
-                                 const void *value, size_t size, int flags)
-{
-       int err = vfs_setxattr(dentry, name, value, size, flags);
-       pr_debug("setxattr(%pd2, \"%s\", \"%*pE\", %zu, 0x%x) = %i\n",
-                dentry, name, min((int)size, 48), value, size, flags, err);
-       return err;
-}
-
  static inline int ovl_do_removexattr(struct dentry *dentry, const char *name)
  {
        int err = vfs_removexattr(dentry, name);
@@ -245,9 +236,12 @@ int ovl_copy_up_start(struct dentry *dentry);
  void ovl_copy_up_end(struct dentry *dentry);
  bool ovl_check_origin_xattr(struct dentry *dentry);
  bool ovl_check_dir_xattr(struct dentry *dentry, const char *name);
-int ovl_check_setxattr(struct dentry *dentry, struct dentry *upperdentry,
+int ovl_check_setxattr_ext(struct dentry *dentry, struct dentry *upperdentry,
                       const char *name, const void *value, size_t size,
-                      int xerr);
+                      int xerr, int propagate_cap);
+int ovl_do_setxattr_ext(struct dentry *dentry, const char *name,
+                       const void *value, size_t size, int flags,
+                       int propagate_cap);
  int ovl_set_impure(struct dentry *dentry, struct dentry *upperdentry);
  void ovl_set_flag(unsigned long flag, struct inode *inode);
  void ovl_clear_flag(unsigned long flag, struct inode *inode);
@@ -279,6 +273,19 @@ static inline unsigned int ovl_xino_bits(struct 
super_block *sb)
        return ofs->xino_bits;
  }
+static inline int ovl_check_setxattr(struct dentry *dentry,
+               struct dentry *upperdentry, const char *name,
+               const void *value, size_t size, int xerr)
+{
+       return ovl_check_setxattr_ext(dentry, upperdentry, name, value, size,
+                                     xerr, 0);
+}
+
+static inline int ovl_do_setxattr(struct dentry *dentry, const char *name,
+               const void *value, size_t size, int flags)
+{
+       return ovl_do_setxattr_ext(dentry, name, value, size, flags, 0);
+}
/* namei.c */
  int ovl_check_fh_len(struct ovl_fh *fh, int fh_len);
@@ -400,8 +407,14 @@ int ovl_copy_up_flags(struct dentry *dentry, int flags);
  int ovl_copy_xattr(struct dentry *old, struct dentry *new);
  int ovl_set_attr(struct dentry *upper, struct kstat *stat);
  struct ovl_fh *ovl_encode_real_fh(struct dentry *real, bool is_upper);
-int ovl_set_origin(struct dentry *dentry, struct dentry *lower,
-                  struct dentry *upper);
+int ovl_set_origin_ext(struct dentry *dentry, struct dentry *lower,
+                  struct dentry *upper, int propagate_cap);
+
+static inline int ovl_set_origin(struct dentry *dentry, struct dentry *lower,
+                  struct dentry *upper)
+{
+       return ovl_set_origin_ext(dentry, lower, upper, 0);
+}
/* export.c */
  extern const struct export_operations ovl_export_operations;
diff --git a/fs/overlayfs/util.c b/fs/overlayfs/util.c
index 52a5116..30c10f1 100644
--- a/fs/overlayfs/util.c
+++ b/fs/overlayfs/util.c
@@ -419,9 +419,32 @@ bool ovl_check_dir_xattr(struct dentry *dentry, const char 
*name)
        return false;
  }
-int ovl_check_setxattr(struct dentry *dentry, struct dentry *upperdentry,
-                      const char *name, const void *value, size_t size,
-                      int xerr)
+int ovl_do_setxattr_ext(struct dentry *dentry, const char *name,
+               const void *value, size_t size, int flags, int propagate_cap)
+{
+       int err = vfs_setxattr(dentry, name, value, size, flags);
+
+       if (propagate_cap && err == -EPERM) {
+               struct inode *ino = dentry->d_inode;
+
+               if (IS_IMMUTABLE(ino) || IS_APPEND(ino))
+                       goto out;
+
+               inode_lock(ino);
+               err = __vfs_setxattr_noperm(dentry, name, value, size, flags);
+               inode_unlock(ino);

I don't like this method. We have different permission checks in vfs_setxattr (e.g.: xattr_permission and security_inode_setxattr), by this we just ignore all of them, but actually need only xattr_permission's trusted part.

I would feel much better if we just don't override creds in overlay if our current cred is already more permissive then the overriding one. Because we can face ve_capable checks somewhere else later unexpectedly and will need to debug it all again. Probably it's better to allow everything for a host user?

Maybe we need to discuss this with company of @den @khorenko @vvs.

+       }
+
+out:
+       pr_debug("setxattr(%pd2, \"%s\", \"%*pE\", %zu, 0x%x) = %i, propagate_cap = 
%d\n",
+                dentry, name, min((int)size, 48), value, size, flags, err,
+                propagate_cap);
+       return err;
+}
+
+int ovl_check_setxattr_ext(struct dentry *dentry, struct dentry *upperdentry,
+                          const char *name, const void *value, size_t size,
+                          int xerr, int propagate_cap)
  {
        int err;
        struct ovl_fs *ofs = dentry->d_sb->s_fs_info;
@@ -429,7 +452,8 @@ int ovl_check_setxattr(struct dentry *dentry, struct dentry 
*upperdentry,
        if (ofs->noxattr)
                return xerr;
- err = ovl_do_setxattr(upperdentry, name, value, size, 0);
+       err = ovl_do_setxattr_ext(upperdentry, name, value, size, 0,
+                             propagate_cap);
if (err == -EOPNOTSUPP) {
                pr_warn("overlayfs: cannot set %s xattr on upper\n", name);
diff --git a/fs/xattr.c b/fs/xattr.c
index 9c24acc..c164e83 100644
--- a/fs/xattr.c
+++ b/fs/xattr.c
@@ -124,7 +124,7 @@ int __vfs_setxattr_noperm(struct dentry *dentry, const char 
*name,
return error;
  }
-
+EXPORT_SYMBOL_GPL(__vfs_setxattr_noperm);
int
  vfs_setxattr(struct dentry *dentry, const char *name, const void *value,


--
Best regards, Tikhomirov Pavel
Software Developer, Virtuozzo.
_______________________________________________
Devel mailing list
Devel@openvz.org
https://lists.openvz.org/mailman/listinfo/devel

Reply via email to