Add support for the "system.richacl" xattr in nfs.  The existing
"system.nfs4_acl" xattr on nfs doesn't map user and group names to uids
and gids; the "system.richacl" xattr does, and only keeps the
on-the-wire names when there is no mapping.  This allows to copy
permissions across different file systems.

Signed-off-by: Andreas Gruenbacher <agrue...@redhat.com>
---
 fs/nfs/inode.c            |   3 -
 fs/nfs/nfs4proc.c         | 698 +++++++++++++++++++++++++++++++++-------------
 fs/nfs/nfs4xdr.c          | 179 ++++++++++--
 fs/nfs/super.c            |   4 +-
 include/linux/nfs_fs.h    |   1 -
 include/linux/nfs_fs_sb.h |   2 +
 include/linux/nfs_xdr.h   |   9 +-
 7 files changed, 673 insertions(+), 223 deletions(-)

diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index 326d9e1..843d15d 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -1852,9 +1852,6 @@ struct inode *nfs_alloc_inode(struct super_block *sb)
                return NULL;
        nfsi->flags = 0UL;
        nfsi->cache_validity = 0UL;
-#if IS_ENABLED(CONFIG_NFS_V4)
-       nfsi->nfs4_acl = NULL;
-#endif /* CONFIG_NFS_V4 */
        return &nfsi->vfs_inode;
 }
 EXPORT_SYMBOL_GPL(nfs_alloc_inode);
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index b3a6558..3888c70 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -55,6 +55,9 @@
 #include <linux/xattr.h>
 #include <linux/utsname.h>
 #include <linux/freezer.h>
+#include <linux/richacl.h>
+#include <linux/richacl_xattr.h>
+#include <linux/nfs4acl.h>
 
 #include "nfs4_fs.h"
 #include "delegation.h"
@@ -2971,15 +2974,18 @@ static int _nfs4_server_capabilities(struct nfs_server 
*server, struct nfs_fh *f
                        res.attr_bitmask[2] &= FATTR4_WORD2_NFS42_MASK;
                }
                memcpy(server->attr_bitmask, res.attr_bitmask, 
sizeof(server->attr_bitmask));
-               server->caps &= ~(NFS_CAP_ACLS|NFS_CAP_HARDLINKS|
-                               NFS_CAP_SYMLINKS|NFS_CAP_FILEID|
+               server->caps &= ~(NFS_CAP_ALLOW_ACLS|NFS_CAP_DENY_ACLS|
+                               
NFS_CAP_HARDLINKS|NFS_CAP_SYMLINKS|NFS_CAP_FILEID|
                                NFS_CAP_MODE|NFS_CAP_NLINK|NFS_CAP_OWNER|
                                NFS_CAP_OWNER_GROUP|NFS_CAP_ATIME|
                                NFS_CAP_CTIME|NFS_CAP_MTIME|
                                NFS_CAP_SECURITY_LABEL);
-               if (res.attr_bitmask[0] & FATTR4_WORD0_ACL &&
-                               res.acl_bitmask & ACL4_SUPPORT_ALLOW_ACL)
-                       server->caps |= NFS_CAP_ACLS;
+               if (res.attr_bitmask[0] & FATTR4_WORD0_ACL) {
+                       if (res.acl_bitmask & ACL4_SUPPORT_ALLOW_ACL)
+                               server->caps |= NFS_CAP_ALLOW_ACLS;
+                       if (res.acl_bitmask & ACL4_SUPPORT_DENY_ACL)
+                               server->caps |= NFS_CAP_DENY_ACLS;
+               }
                if (res.has_links != 0)
                        server->caps |= NFS_CAP_HARDLINKS;
                if (res.has_symlinks != 0)
@@ -4507,45 +4513,11 @@ static int nfs4_proc_renew(struct nfs_client *clp, 
struct rpc_cred *cred)
        return 0;
 }
 
-static inline int nfs4_server_supports_acls(struct nfs_server *server)
-{
-       return server->caps & NFS_CAP_ACLS;
-}
-
-/* Assuming that XATTR_SIZE_MAX is a multiple of PAGE_SIZE, and that
- * it's OK to put sizeof(void) * (XATTR_SIZE_MAX/PAGE_SIZE) bytes on
- * the stack.
+/* A arbitrary limit; we allocate at most DIV_ROUND_UP(NFS4ACL_SIZE_MAX,
+ * PAGE_SIZE) pages and put an array of DIV_ROUND_UP(NFS4ACL_SIZE_MAX,
+ * PAGE_SIZE) pages on the stack when encoding or decoding acls.
  */
-#define NFS4ACL_MAXPAGES DIV_ROUND_UP(XATTR_SIZE_MAX, PAGE_SIZE)
-
-static int buf_to_pages_noslab(const void *buf, size_t buflen,
-               struct page **pages)
-{
-       struct page *newpage, **spages;
-       int rc = 0;
-       size_t len;
-       spages = pages;
-
-       do {
-               len = min_t(size_t, PAGE_SIZE, buflen);
-               newpage = alloc_page(GFP_KERNEL);
-
-               if (newpage == NULL)
-                       goto unwind;
-               memcpy(page_address(newpage), buf, len);
-                buf += len;
-                buflen -= len;
-               *pages++ = newpage;
-               rc++;
-       } while (buflen != 0);
-
-       return rc;
-
-unwind:
-       for(; rc > 0; rc--)
-               __free_page(spages[rc-1]);
-       return -ENOMEM;
-}
+#define NFS4ACL_SIZE_MAX 65536
 
 struct nfs4_cached_acl {
        int cached;
@@ -4553,66 +4525,9 @@ struct nfs4_cached_acl {
        char data[0];
 };
 
-static void nfs4_set_cached_acl(struct inode *inode, struct nfs4_cached_acl 
*acl)
-{
-       struct nfs_inode *nfsi = NFS_I(inode);
-
-       spin_lock(&inode->i_lock);
-       kfree(nfsi->nfs4_acl);
-       nfsi->nfs4_acl = acl;
-       spin_unlock(&inode->i_lock);
-}
-
 static void nfs4_zap_acl_attr(struct inode *inode)
 {
-       nfs4_set_cached_acl(inode, NULL);
-}
-
-static inline ssize_t nfs4_read_cached_acl(struct inode *inode, char *buf, 
size_t buflen)
-{
-       struct nfs_inode *nfsi = NFS_I(inode);
-       struct nfs4_cached_acl *acl;
-       int ret = -ENOENT;
-
-       spin_lock(&inode->i_lock);
-       acl = nfsi->nfs4_acl;
-       if (acl == NULL)
-               goto out;
-       if (buf == NULL) /* user is just asking for length */
-               goto out_len;
-       if (acl->cached == 0)
-               goto out;
-       ret = -ERANGE; /* see getxattr(2) man page */
-       if (acl->len > buflen)
-               goto out;
-       memcpy(buf, acl->data, acl->len);
-out_len:
-       ret = acl->len;
-out:
-       spin_unlock(&inode->i_lock);
-       return ret;
-}
-
-static void nfs4_write_cached_acl(struct inode *inode, struct page **pages, 
size_t pgbase, size_t acl_len)
-{
-       struct nfs4_cached_acl *acl;
-       size_t buflen = sizeof(*acl) + acl_len;
-
-       if (buflen <= PAGE_SIZE) {
-               acl = kmalloc(buflen, GFP_KERNEL);
-               if (acl == NULL)
-                       goto out;
-               acl->cached = 1;
-               _copy_from_pages(acl->data, pages, pgbase, acl_len);
-       } else {
-               acl = kmalloc(sizeof(*acl), GFP_KERNEL);
-               if (acl == NULL)
-                       goto out;
-               acl->cached = 0;
-       }
-       acl->len = acl_len;
-out:
-       nfs4_set_cached_acl(inode, acl);
+       forget_cached_richacl(inode);
 }
 
 /*
@@ -4625,121 +4540,269 @@ out:
  * length. The next getxattr call will then produce another round trip to
  * the server, this time with the input buf of the required size.
  */
-static ssize_t __nfs4_get_acl_uncached(struct inode *inode, void *buf, size_t 
buflen)
+static struct richacl *__nfs4_get_acl_uncached(struct inode *inode)
 {
-       struct page *pages[NFS4ACL_MAXPAGES] = {NULL, };
+       struct nfs_server *server = NFS_SERVER(inode);
+       struct page *pages[DIV_ROUND_UP(NFS4ACL_SIZE_MAX, PAGE_SIZE)] = {};
        struct nfs_getaclargs args = {
                .fh = NFS_FH(inode),
                .acl_pages = pages,
-               .acl_len = buflen,
+               .acl_len = ARRAY_SIZE(pages) * PAGE_SIZE,
        };
        struct nfs_getaclres res = {
-               .acl_len = buflen,
+               .server = server,
        };
        struct rpc_message msg = {
                .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_GETACL],
                .rpc_argp = &args,
                .rpc_resp = &res,
        };
-       unsigned int npages = DIV_ROUND_UP(buflen, PAGE_SIZE);
-       int ret = -ENOMEM, i;
+       int err, i;
 
-       /* As long as we're doing a round trip to the server anyway,
-        * let's be prepared for a page of acl data. */
-       if (npages == 0)
-               npages = 1;
-       if (npages > ARRAY_SIZE(pages))
-               return -ERANGE;
-
-       for (i = 0; i < npages; i++) {
-               pages[i] = alloc_page(GFP_KERNEL);
-               if (!pages[i])
+       if (ARRAY_SIZE(pages) > 1) {
+               /* for decoding across pages */
+               res.acl_scratch = alloc_page(GFP_KERNEL);
+               err = -ENOMEM;
+               if (!res.acl_scratch)
                        goto out_free;
        }
 
-       /* for decoding across pages */
-       res.acl_scratch = alloc_page(GFP_KERNEL);
-       if (!res.acl_scratch)
-               goto out_free;
-
-       args.acl_len = npages * PAGE_SIZE;
-
-       dprintk("%s  buf %p buflen %zu npages %d args.acl_len %zu\n",
-               __func__, buf, buflen, npages, args.acl_len);
-       ret = nfs4_call_sync(NFS_SERVER(inode)->client, NFS_SERVER(inode),
+       dprintk("%s  args.acl_len %zu\n",
+               __func__, args.acl_len);
+       err = nfs4_call_sync(NFS_SERVER(inode)->client, NFS_SERVER(inode),
                             &msg, &args.seq_args, &res.seq_res, 0);
-       if (ret)
+       if (err)
                goto out_free;
 
-       /* Handle the case where the passed-in buffer is too short */
-       if (res.acl_flags & NFS4_ACL_TRUNC) {
-               /* Did the user only issue a request for the acl length? */
-               if (buf == NULL)
-                       goto out_ok;
-               ret = -ERANGE;
-               goto out_free;
-       }
-       nfs4_write_cached_acl(inode, pages, res.acl_data_offset, res.acl_len);
-       if (buf) {
-               if (res.acl_len > buflen) {
-                       ret = -ERANGE;
-                       goto out_free;
-               }
-               _copy_from_pages(buf, pages, res.acl_data_offset, res.acl_len);
-       }
-out_ok:
-       ret = res.acl_len;
+       richacl_compute_max_masks(res.acl);
+       /* FIXME: Set inode->i_mode from res->mode?  */
+       set_cached_richacl(inode, res.acl);
+       err = 0;
+
 out_free:
-       for (i = 0; i < npages; i++)
-               if (pages[i])
-                       __free_page(pages[i]);
+       if (err) {
+               richacl_put(res.acl);
+               res.acl = ERR_PTR(err);
+       }
+       for (i = 0; i < ARRAY_SIZE(pages) && pages[i]; i++)
+               __free_page(pages[i]);
        if (res.acl_scratch)
                __free_page(res.acl_scratch);
-       return ret;
+       return res.acl;
 }
 
-static ssize_t nfs4_get_acl_uncached(struct inode *inode, void *buf, size_t 
buflen)
+static struct richacl *nfs4_get_acl_uncached(struct inode *inode)
 {
        struct nfs4_exception exception = { };
-       ssize_t ret;
+       struct richacl *acl;
        do {
-               ret = __nfs4_get_acl_uncached(inode, buf, buflen);
-               trace_nfs4_get_acl(inode, ret);
-               if (ret >= 0)
+               acl = __nfs4_get_acl_uncached(inode);
+               trace_nfs4_get_acl(inode, IS_ERR(acl) ? PTR_ERR(acl) : 0);
+               if (!IS_ERR(acl))
                        break;
-               ret = nfs4_handle_exception(NFS_SERVER(inode), ret, &exception);
+               acl = ERR_PTR(nfs4_handle_exception(NFS_SERVER(inode),
+                             PTR_ERR(acl), &exception));
        } while (exception.retry);
-       return ret;
+       return acl;
 }
 
-static ssize_t nfs4_proc_get_acl(struct inode *inode, void *buf, size_t buflen)
+static struct richacl *nfs4_proc_get_acl(struct inode *inode)
 {
        struct nfs_server *server = NFS_SERVER(inode);
+       struct richacl *acl;
        int ret;
 
-       if (!nfs4_server_supports_acls(server))
-               return -EOPNOTSUPP;
+       if (!(server->caps & (NFS_CAP_ALLOW_ACLS | NFS_CAP_DENY_ACLS)))
+               return ERR_PTR(-EOPNOTSUPP);
        ret = nfs_revalidate_inode(server, inode);
        if (ret < 0)
-               return ret;
+               return ERR_PTR(ret);
        if (NFS_I(inode)->cache_validity & NFS_INO_INVALID_ACL)
                nfs_zap_acl_cache(inode);
-       ret = nfs4_read_cached_acl(inode, buf, buflen);
-       if (ret != -ENOENT)
-               /* -ENOENT is returned if there is no ACL or if there is an ACL
-                * but no cached acl data, just the acl length */
-               return ret;
-       return nfs4_get_acl_uncached(inode, buf, buflen);
+       acl = get_cached_richacl(inode);
+       if (acl != ACL_NOT_CACHED)
+               return acl;
+       return nfs4_get_acl_uncached(inode);
+}
+
+static int
+richacl_supported(struct nfs_server *server, struct richacl *acl)
+{
+       struct richace *ace;
+
+       if (!(server->caps & (NFS_CAP_ALLOW_ACLS | NFS_CAP_DENY_ACLS)))
+               return -EOPNOTSUPP;
+
+       richacl_for_each_entry(ace, acl) {
+               if (richace_is_allow(ace)) {
+                       if (!(server->caps & NFS_CAP_ALLOW_ACLS))
+                               return -EINVAL;
+               } else if (richace_is_deny(ace)) {
+                       if (!(server->caps & NFS_CAP_DENY_ACLS))
+                               return -EINVAL;
+               } else
+                       return -EINVAL;
+       }
+       return 0;
 }
 
-static int __nfs4_proc_set_acl(struct inode *inode, const void *buf, size_t 
buflen)
+static int
+nfs4_encode_user(struct xdr_stream *xdr, const struct nfs_server *server,
+                kuid_t uid)
+{
+       char name[IDMAP_NAMESZ];
+       int len;
+       __be32 *p;
+
+       len = nfs_map_uid_to_name(server, uid, name, IDMAP_NAMESZ);
+       if (len < 0) {
+               dprintk("nfs: couldn't resolve uid %d to string\n",
+                               from_kuid(&init_user_ns, uid));
+               return -ENOENT;
+       }
+       p = xdr_reserve_space(xdr, 4 + len);
+       if (!p)
+               return -EIO;
+       p = xdr_encode_opaque(p, name, len);
+       return 0;
+}
+
+static int
+nfs4_encode_group(struct xdr_stream *xdr, const struct nfs_server *server,
+                 kgid_t gid)
+{
+       char name[IDMAP_NAMESZ];
+       int len;
+       __be32 *p;
+
+       len = nfs_map_gid_to_group(server, gid, name, IDMAP_NAMESZ);
+       if (len < 0) {
+               dprintk("nfs: couldn't resolve gid %d to string\n",
+                               from_kgid(&init_user_ns, gid));
+               return -ENOENT;
+       }
+       p = xdr_reserve_space(xdr, 4 + len);
+       if (!p)
+               return -EIO;
+       p = xdr_encode_opaque(p, name, len);
+       return 0;
+}
+
+static unsigned int
+nfs4_ace_mask(int minorversion)
+{
+       return minorversion == 0 ? NFS40_ACE_MASK_ALL : NFS4_ACE_MASK_ALL;
+}
+
+static int
+nfs4_encode_ace_who(struct xdr_stream *xdr, const struct nfs_server *server,
+                   struct richace *ace, struct richacl *acl)
+{
+       const char *who;
+       __be32 *p;
+
+       if (ace->e_flags & RICHACE_SPECIAL_WHO) {
+               unsigned int special_id = ace->e_id.special;
+               const char *who;
+               unsigned int len;
+
+               if (!nfs4acl_special_id_to_who(special_id, &who, &len)) {
+                       WARN_ON_ONCE(1);
+                       return -EIO;
+               }
+               p = xdr_reserve_space(xdr, 4 + len);
+               if (!p)
+                       return -EIO;
+               xdr_encode_opaque(p, who, len);
+               return 0;
+       } else {
+               who = richace_unmapped_identifier(ace, acl);
+               if (who) {
+                       unsigned int len = strlen(who);
+
+                       p = xdr_reserve_space(xdr, 4 + len);
+                       if (!p)
+                               return -EIO;
+                       xdr_encode_opaque(p, who, len);
+                       return 0;
+               } else if (ace->e_flags & RICHACE_IDENTIFIER_GROUP)
+                       return nfs4_encode_group(xdr, server, ace->e_id.gid);
+               else
+                       return nfs4_encode_user(xdr, server, ace->e_id.uid);
+       }
+}
+
+static int
+nfs4_encode_acl(struct page **pages, unsigned int len, struct richacl *acl,
+               const struct nfs_server *server)
+{
+       int minorversion = server->nfs_client->cl_minorversion;
+       unsigned int ace_mask = nfs4_ace_mask(minorversion);
+       struct xdr_stream xdr;
+       struct xdr_buf buf;
+       __be32 *p;
+       struct richace *ace;
+
+       /* Reject acls not understood by the server */
+       if (server->attr_bitmask[1] & FATTR4_WORD1_DACL) {
+               BUILD_BUG_ON(NFS4_ACE_MASK_ALL != RICHACE_VALID_MASK);
+       } else {
+               if (acl->a_flags)
+                       return -EINVAL;
+               richacl_for_each_entry(ace, acl) {
+                       if (ace->e_flags & RICHACE_INHERITED_ACE)
+                               return -EINVAL;
+               }
+       }
+       richacl_for_each_entry(ace, acl) {
+               if (ace->e_mask & ~ace_mask)
+                       return -EINVAL;
+       }
+
+       xdr_init_encode_pages(&xdr, &buf, pages, len);
+
+       if (server->attr_bitmask[1] & FATTR4_WORD1_DACL) {
+               p = xdr_reserve_space(&xdr, 4);
+               if (!p)
+                       goto fail;
+               *p = cpu_to_be32(acl ? acl->a_flags : 0);
+       }
+
+       p = xdr_reserve_space(&xdr, 4);
+       if (!p)
+               goto fail;
+       if (!acl) {
+               *p++ = cpu_to_be32(0);
+               return buf.len;
+       }
+       *p++ = cpu_to_be32(acl->a_count);
+
+       richacl_for_each_entry(ace, acl) {
+               p = xdr_reserve_space(&xdr, 4*3);
+               if (!p)
+                       goto fail;
+               *p++ = cpu_to_be32(ace->e_type);
+               *p++ = cpu_to_be32(ace->e_flags &
+                       ~(RICHACE_SPECIAL_WHO | RICHACE_UNMAPPED_WHO));
+               *p++ = cpu_to_be32(ace->e_mask & NFS4_ACE_MASK_ALL);
+               if (nfs4_encode_ace_who(&xdr, server, ace, acl) != 0)
+                       goto fail;
+       }
+
+       return buf.len;
+
+fail:
+       return -ENOMEM;
+}
+
+static int __nfs4_proc_set_acl(struct inode *inode, struct richacl *acl)
 {
        struct nfs_server *server = NFS_SERVER(inode);
-       struct page *pages[NFS4ACL_MAXPAGES];
+       struct page *pages[DIV_ROUND_UP(NFS4ACL_SIZE_MAX, PAGE_SIZE) + 1 /* 
scratch */] = {};
        struct nfs_setaclargs arg = {
+               .server         = server,
                .fh             = NFS_FH(inode),
                .acl_pages      = pages,
-               .acl_len        = buflen,
        };
        struct nfs_setaclres res;
        struct rpc_message msg = {
@@ -4747,16 +4810,20 @@ static int __nfs4_proc_set_acl(struct inode *inode, 
const void *buf, size_t bufl
                .rpc_argp       = &arg,
                .rpc_resp       = &res,
        };
-       unsigned int npages = DIV_ROUND_UP(buflen, PAGE_SIZE);
        int ret, i;
 
-       if (!nfs4_server_supports_acls(server))
-               return -EOPNOTSUPP;
-       if (npages > ARRAY_SIZE(pages))
-               return -ERANGE;
-       i = buf_to_pages_noslab(buf, buflen, arg.acl_pages);
-       if (i < 0)
-               return i;
+       ret = richacl_supported(server, acl);
+       if (ret)
+               return ret;
+
+       ret = nfs4_encode_acl(pages, NFS4ACL_SIZE_MAX, acl, server);
+       if (ret < 0) {
+               for (i = 0; i < ARRAY_SIZE(pages) && pages[i]; i++)
+                       put_page(pages[i]);
+               return ret;
+       }
+       arg.acl_len = ret;
+
        nfs4_inode_return_delegation(inode);
        ret = nfs4_call_sync(server->client, server, &msg, &arg.seq_args, 
&res.seq_res, 1);
 
@@ -4764,8 +4831,8 @@ static int __nfs4_proc_set_acl(struct inode *inode, const 
void *buf, size_t bufl
         * Free each page after tx, so the only ref left is
         * held by the network stack
         */
-       for (; i > 0; i--)
-               put_page(pages[i-1]);
+       for (i = 0; i < ARRAY_SIZE(pages) && pages[i]; i++)
+               put_page(pages[i]);
 
        /*
         * Acl update can result in inode attribute update.
@@ -4779,12 +4846,12 @@ static int __nfs4_proc_set_acl(struct inode *inode, 
const void *buf, size_t bufl
        return ret;
 }
 
-static int nfs4_proc_set_acl(struct inode *inode, const void *buf, size_t 
buflen)
+static int nfs4_proc_set_acl(struct inode *inode, struct richacl *acl)
 {
        struct nfs4_exception exception = { };
        int err;
        do {
-               err = __nfs4_proc_set_acl(inode, buf, buflen);
+               err = __nfs4_proc_set_acl(inode, acl);
                trace_nfs4_set_acl(inode, err);
                err = nfs4_handle_exception(NFS_SERVER(inode), err,
                                &exception);
@@ -6246,34 +6313,283 @@ nfs4_release_lockowner(struct nfs_server *server, 
struct nfs4_lock_state *lsp)
        rpc_call_async(server->client, &msg, 0, &nfs4_release_lockowner_ops, 
data);
 }
 
+static int nfs4_xattr_set_richacl(struct dentry *dentry, const char *key,
+                                 const void *buf, size_t buflen,
+                                 int flags, int handler_flags)
+{
+       struct inode *inode = d_inode(dentry);
+       struct richacl *acl;
+       int error;
+
+       if (strcmp(key, "") != 0)
+               return -EINVAL;
+
+       if (buf) {
+               acl = richacl_from_xattr(&init_user_ns, buf, buflen);
+               if (IS_ERR(acl))
+                       return PTR_ERR(acl);
+               error = richacl_apply_masks(&acl, inode->i_uid);
+       } else {
+               /*
+                * "Remove the acl"; only permissions granted by the mode
+                * remain.  We are using the cached mode here which could be
+                * outdated; should we do a GETATTR first to narrow down the
+                * race window?
+                */
+               acl = richacl_from_mode(inode->i_mode);
+               error = 0;
+       }
+
+       if (!error)
+               error = nfs4_proc_set_acl(inode, acl);
+       richacl_put(acl);
+       return error;
+}
+
+static int nfs4_xattr_get_richacl(struct dentry *dentry, const char *key,
+                                 void *buf, size_t buflen, int handler_flags)
+{
+       struct inode *inode = d_inode(dentry);
+       struct richacl *acl;
+       int error;
+       mode_t mode = inode->i_mode & S_IFMT;
+
+       if (strcmp(key, "") != 0)
+               return -EINVAL;
+
+       acl = nfs4_proc_get_acl(inode);
+       if (IS_ERR(acl))
+               return PTR_ERR(acl);
+       if (acl == NULL)
+               return -ENODATA;
+       error = -ENODATA;
+       if (richacl_equiv_mode(acl, &mode) == 0 &&
+           ((mode ^ inode->i_mode) & S_IRWXUGO) == 0)
+               goto out;
+       error = richacl_to_xattr(&init_user_ns, acl, buf, buflen);
+out:
+       richacl_put(acl);
+       return error;
+}
+
+static size_t nfs4_xattr_list_richacl(struct dentry *dentry, char *list,
+                                     size_t list_len, const char *name,
+                                     size_t name_len, int handler_flags)
+{
+       struct nfs_server *server = NFS_SERVER(d_inode(dentry));
+       size_t len = sizeof(XATTR_NAME_RICHACL);
+
+       if (!(server->caps & (NFS_CAP_ALLOW_ACLS | NFS_CAP_DENY_ACLS)))
+               return 0;
+
+       if (list && len <= list_len)
+               memcpy(list, XATTR_NAME_RICHACL, len);
+       return len;
+}
+
 #define XATTR_NAME_NFSV4_ACL "system.nfs4_acl"
 
+static int richacl_to_nfs4_acl(struct nfs_server *server,
+                              const struct richacl *acl,
+                              void *buf, size_t buflen)
+{
+       const struct richace *ace;
+       __be32 *p = buf;
+       size_t size = 0;
+
+       size += sizeof(*p);
+       if (buflen >= size)
+               *p++ = cpu_to_be32(acl->a_count);
+
+       richacl_for_each_entry(ace, acl) {
+               char who_buf[IDMAP_NAMESZ];
+               const char *who = who_buf;
+               int who_len;
+
+               size += 3 * sizeof(*p);
+               if (buflen >= size) {
+                       *p++ = cpu_to_be32(ace->e_type);
+                       *p++ = cpu_to_be32(ace->e_flags &
+                                          ~(RICHACE_INHERITED_ACE |
+                                            RICHACE_UNMAPPED_WHO |
+                                            RICHACE_SPECIAL_WHO));
+                       *p++ = cpu_to_be32(ace->e_mask);
+               }
+
+               if (richace_is_unix_user(ace)) {
+                       who_len = nfs_map_uid_to_name(server, ace->e_id.uid,
+                                                     who_buf, sizeof(who_buf));
+                       if (who_len < 0)
+                               return -EIO;
+               } else if (richace_is_unix_group(ace)) {
+                       who_len = nfs_map_gid_to_group(server, ace->e_id.gid,
+                                                      who_buf, 
sizeof(who_buf));
+                       if (who_len < 0)
+                               return -EIO;
+               } else if (ace->e_flags & RICHACE_SPECIAL_WHO) {
+                       if (!nfs4acl_special_id_to_who(ace->e_id.special,
+                                                      &who, &who_len))
+                               return -EIO;
+               } else {
+                       who = richace_unmapped_identifier(ace, acl);
+                       if (who)
+                               who_len = strlen(who);
+                       else
+                               return -EIO;
+               }
+
+               size += sizeof(*p) + ALIGN(who_len, sizeof(*p));
+               if (buflen >= size) {
+                       unsigned int padding = -who_len & (sizeof(*p) - 1);
+
+                       *p++ = cpu_to_be32(who_len);
+                       memcpy(p, who, who_len);
+                       memset((char *)p + who_len, 0, padding);
+                       p += DIV_ROUND_UP(who_len, sizeof(*p));
+               }
+       }
+       if (buflen && buflen < size)
+               return -ERANGE;
+       return size;
+}
+
+static struct richacl *richacl_from_nfs4_acl(struct nfs_server *server,
+                                            const void *buf, size_t buflen)
+{
+       struct richacl *acl = NULL;
+       struct richace *ace;
+       const __be32 *p = buf;
+       int count, err;
+
+       if (buflen < sizeof(*p))
+               return ERR_PTR(-EINVAL);
+       count = be32_to_cpu(*p++);
+       if (count > RICHACL_XATTR_MAX_COUNT)
+               return ERR_PTR(-EINVAL);
+       buflen -= sizeof(*p);
+       acl = richacl_alloc(count, GFP_NOFS);
+       if (!acl)
+               return ERR_PTR(-ENOMEM);
+       richacl_for_each_entry(ace, acl) {
+               u32 who_len, size;
+               int special_id;
+               char *who;
+
+               err = -EINVAL;
+               if (buflen < 4 * sizeof(*p))
+                       goto out;
+               ace->e_type = be32_to_cpu(*p++);
+               ace->e_flags = be32_to_cpu(*p++);
+               if (ace->e_flags & (RICHACE_SPECIAL_WHO | RICHACE_UNMAPPED_WHO))
+                       goto out;
+               ace->e_mask = be32_to_cpu(*p++);
+               who_len = be32_to_cpu(*p++);
+               buflen -= 4 * sizeof(*p);
+               size = ALIGN(who_len, 4);
+               if (buflen < size || size == 0)
+                       goto out;
+               who = (char *)p;
+               special_id = nfs4acl_who_to_special_id(who, who_len);
+               if (special_id >= 0) {
+                       ace->e_flags |= RICHACE_SPECIAL_WHO;
+                       ace->e_id.special = special_id;
+               } else {
+                       bool unmappable;
+
+                       if (ace->e_flags & RICHACE_IDENTIFIER_GROUP) {
+                               err = nfs_map_group_to_gid(server, who, who_len,
+                                                          &ace->e_id.gid);
+                               if (err) {
+                                       dprintk("%s: nfs_map_group_to_gid "
+                                               "failed!\n", __func__);
+                                       goto out;
+                               }
+                               /* FIXME: nfsidmap doesn't distinguish between
+                                         group nobody and unmappable groups! */
+                               unmappable = gid_eq(ace->e_id.gid,
+                                       make_kgid(&init_user_ns, 99));
+                       } else {
+                               err = nfs_map_name_to_uid(server, who, who_len,
+                                                         &ace->e_id.uid);
+                               if (err) {
+                                       dprintk("%s: nfs_map_name_to_gid "
+                                               "failed!\n", __func__);
+                                       goto out;
+                               }
+                               /* FIXME: nfsidmap doesn't distinguish between
+                                         user nobody and unmappable users! */
+                               unmappable = uid_eq(ace->e_id.uid,
+                                       make_kuid(&init_user_ns, 99));
+                       }
+                       if (unmappable) {
+                               err = -ENOMEM;
+                               if (richacl_add_unmapped_identifier(&acl, &ace,
+                                       who, who_len, GFP_NOFS))
+                                       goto out;
+                       }
+               }
+               p += size / sizeof(*p);
+               buflen -= size;
+       }
+       err = -EINVAL;
+       if (buflen != 0)
+               goto out;
+       err = 0;
+
+out:
+       if (err) {
+               richacl_put(acl);
+               acl = ERR_PTR(err);
+       }
+       return acl;
+}
+
 static int nfs4_xattr_set_nfs4_acl(struct dentry *dentry, const char *key,
                                   const void *buf, size_t buflen,
                                   int flags, int type)
 {
-       if (strcmp(key, "") != 0)
+       struct inode *inode = d_inode(dentry);
+       struct richacl *acl;
+       int error;
+
+       if (!buf || strcmp(key, "") != 0)
                return -EINVAL;
 
-       return nfs4_proc_set_acl(d_inode(dentry), buf, buflen);
+       acl = richacl_from_nfs4_acl(NFS_SERVER(inode), (void *)buf, buflen);
+       if (IS_ERR(acl))
+               return PTR_ERR(acl);
+       error = nfs4_proc_set_acl(inode, acl);
+       richacl_put(acl);
+       return error;
 }
 
 static int nfs4_xattr_get_nfs4_acl(struct dentry *dentry, const char *key,
                                   void *buf, size_t buflen, int type)
 {
+       struct inode *inode = d_inode(dentry);
+       struct richacl *acl;
+       int error;
+
        if (strcmp(key, "") != 0)
                return -EINVAL;
-
-       return nfs4_proc_get_acl(d_inode(dentry), buf, buflen);
+       acl = nfs4_proc_get_acl(inode);
+       if (IS_ERR(acl))
+               return PTR_ERR(acl);
+       if (acl == NULL)
+               return -ENODATA;
+       error = richacl_to_nfs4_acl(NFS_SERVER(inode), acl, buf, buflen);
+       richacl_put(acl);
+       return error;
 }
 
 static size_t nfs4_xattr_list_nfs4_acl(struct dentry *dentry, char *list,
                                       size_t list_len, const char *name,
                                       size_t name_len, int type)
 {
+       struct nfs_server *server = NFS_SERVER(d_inode(dentry));
        size_t len = sizeof(XATTR_NAME_NFSV4_ACL);
 
-       if (!nfs4_server_supports_acls(NFS_SERVER(d_inode(dentry))))
+       if (!(server->caps & (NFS_CAP_ALLOW_ACLS | NFS_CAP_DENY_ACLS)))
                return 0;
 
        if (list && len <= list_len)
@@ -8826,6 +9142,13 @@ const struct nfs_rpc_ops nfs_v4_clientops = {
        .clone_server   = nfs_clone_server,
 };
 
+static const struct xattr_handler nfs4_xattr_richacl_handler = {
+       .prefix = XATTR_NAME_RICHACL,
+       .list   = nfs4_xattr_list_richacl,
+       .get    = nfs4_xattr_get_richacl,
+       .set    = nfs4_xattr_set_richacl,
+};
+
 static const struct xattr_handler nfs4_xattr_nfs4_acl_handler = {
        .prefix = XATTR_NAME_NFSV4_ACL,
        .list   = nfs4_xattr_list_nfs4_acl,
@@ -8834,6 +9157,7 @@ static const struct xattr_handler 
nfs4_xattr_nfs4_acl_handler = {
 };
 
 const struct xattr_handler *nfs4_xattr_handlers[] = {
+       &nfs4_xattr_richacl_handler,
        &nfs4_xattr_nfs4_acl_handler,
 #ifdef CONFIG_NFS_V4_SECURITY_LABEL
        &nfs4_xattr_nfs4_label_handler,
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index eefed15..f2507d7 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -52,6 +52,10 @@
 #include <linux/nfs.h>
 #include <linux/nfs4.h>
 #include <linux/nfs_fs.h>
+#include <linux/nfs_idmap.h>
+#include <linux/richacl.h>
+#include <linux/richacl_xattr.h>  /* for RICHACL_XATTR_MAX_COUNT */
+#include <linux/nfs4acl.h>
 
 #include "nfs4_fs.h"
 #include "internal.h"
@@ -1650,16 +1654,24 @@ encode_restorefh(struct xdr_stream *xdr, struct 
compound_hdr *hdr)
 static void
 encode_setacl(struct xdr_stream *xdr, struct nfs_setaclargs *arg, struct 
compound_hdr *hdr)
 {
-       __be32 *p;
+       int attrlen_offset;
+       __be32 attrlen, *p;
 
        encode_op_hdr(xdr, OP_SETATTR, decode_setacl_maxsz, hdr);
        encode_nfs4_stateid(xdr, &zero_stateid);
+
+       /* Encode attribute bitmap. */
        p = reserve_space(xdr, 2*4);
        *p++ = cpu_to_be32(1);
        *p = cpu_to_be32(FATTR4_WORD0_ACL);
-       p = reserve_space(xdr, 4);
-       *p = cpu_to_be32(arg->acl_len);
+
+       attrlen_offset = xdr->buf->len;
+       xdr_reserve_space(xdr, 4);  /* to be backfilled later */
+
        xdr_write_pages(xdr, arg->acl_pages, 0, arg->acl_len);
+
+       attrlen = htonl(xdr->buf->len - attrlen_offset - 4);
+       write_bytes_to_xdr_buf(xdr->buf, attrlen_offset, &attrlen, 4);
 }
 
 static void
@@ -2488,7 +2500,7 @@ static void nfs4_xdr_enc_getacl(struct rpc_rqst *req, 
struct xdr_stream *xdr,
        encode_sequence(xdr, &args->seq_args, &hdr);
        encode_putfh(xdr, args->fh, &hdr);
        replen = hdr.replen + op_decode_hdr_maxsz + 1;
-       encode_getattr_two(xdr, FATTR4_WORD0_ACL, 0, &hdr);
+       encode_getattr_two(xdr, FATTR4_WORD0_ACL, FATTR4_WORD1_MODE, &hdr);
 
        xdr_inline_pages(&req->rq_rcv_buf, replen << 2,
                args->acl_pages, 0, args->acl_len);
@@ -5260,24 +5272,135 @@ decode_restorefh(struct xdr_stream *xdr)
        return decode_op_hdr(xdr, OP_RESTOREFH);
 }
 
+static int
+nfs4_decode_ace_who(struct richace *ace,
+                   const char **unmapped, unsigned int *unmapped_len,
+                   const struct nfs_server *server,
+                   struct xdr_stream *xdr)
+{
+       char *who;
+       u32 len;
+       int special_id;
+       __be32 *p;
+       int error;
+
+       p = xdr_inline_decode(xdr, 4);
+       if (!p)
+               return -ENOMEM;  /* acl truncated */
+       len = be32_to_cpup(p++);
+       if (len >= XDR_MAX_NETOBJ) {
+               dprintk("%s: name too long (%u)!\n",
+                       __func__, len);
+               return -EIO;
+       }
+       who = (char *)xdr_inline_decode(xdr, len);
+       if (!who)
+               return -ENOMEM;  /* acl truncated */
+
+       special_id = nfs4acl_who_to_special_id(who, len);
+       if (special_id >= 0) {
+               ace->e_flags |= RICHACE_SPECIAL_WHO;
+               ace->e_flags &= ~RICHACE_IDENTIFIER_GROUP;
+               ace->e_id.special = special_id;
+               return 0;
+       }
+       if (ace->e_flags & RICHACE_IDENTIFIER_GROUP) {
+               error = nfs_map_group_to_gid(server, who, len, &ace->e_id.gid);
+               if (error) {
+                       dprintk("%s: nfs_map_group_to_gid failed!\n",
+                                       __func__);
+                       return error;
+               }
+               /* FIXME: nfsidmap doesn't distinguish between group nobody and
+                         unmappable groups! */
+               if (gid_eq(ace->e_id.gid, make_kgid(&init_user_ns, 99))) {
+                       *unmapped = who;
+                       *unmapped_len = len;
+               }
+       } else {
+               error = nfs_map_name_to_uid(server, who, len, &ace->e_id.uid);
+               if (error) {
+                       dprintk("%s: nfs_map_name_to_uid failed!\n",
+                                       __func__);
+                       return error;
+               }
+               /* FIXME: nfsidmap doesn't distinguish between user nobody and
+                         unmappable users! */
+               if (uid_eq(ace->e_id.uid, make_kuid(&init_user_ns, 99))) {
+                       *unmapped = who;
+                       *unmapped_len = len;
+               }
+       }
+       return 0;
+}
+
+static struct richacl *
+decode_acl_entries(struct xdr_stream *xdr, const struct nfs_server *server)
+{
+       struct richacl *acl;
+       struct richace *ace;
+       uint32_t count;
+       __be32 *p;
+       int status;
+
+       p = xdr_inline_decode(xdr, 4);
+       if (unlikely(!p))
+               return ERR_PTR(-ENOMEM);  /* acl truncated */
+       count = be32_to_cpup(p);
+       if (count > RICHACL_XATTR_MAX_COUNT)
+               return ERR_PTR(-EIO);
+       acl = richacl_alloc(count, GFP_NOFS);
+       if (!acl)
+               return ERR_PTR(-ENOMEM);
+       richacl_for_each_entry(ace, acl) {
+               const char *unmapped = NULL;
+               unsigned int unmapped_len;
+
+               p = xdr_inline_decode(xdr, 4*3);
+               status = -ENOMEM;
+               if (unlikely(!p))
+                       goto out;  /* acl truncated */
+               ace->e_type = be32_to_cpup(p++);
+               ace->e_flags = be32_to_cpup(p++);
+               status = -EIO;
+               if (ace->e_flags &
+                   (RICHACE_SPECIAL_WHO | RICHACE_UNMAPPED_WHO))
+                       goto out;
+               ace->e_mask = be32_to_cpup(p++);
+               status = nfs4_decode_ace_who(ace, &unmapped,
+                                            &unmapped_len, server,
+                                            xdr);
+               if (status)
+                       goto out;
+               if (unmapped) {
+                       status = -ENOMEM;
+                       if (richacl_add_unmapped_identifier(&acl, &ace,
+                                       unmapped, unmapped_len,
+                                       GFP_NOFS))
+                               goto out;
+               }
+       }
+       status = 0;
+
+out:
+       if (status) {
+               richacl_put(acl);
+               acl = ERR_PTR(status);
+       }
+       return acl;
+}
+
 static int decode_getacl(struct xdr_stream *xdr, struct rpc_rqst *req,
                         struct nfs_getaclres *res)
 {
        unsigned int savep;
        uint32_t attrlen,
                 bitmap[3] = {0};
+       struct richacl *acl = NULL;
        int status;
-       unsigned int pg_offset;
 
-       res->acl_len = 0;
        if ((status = decode_op_hdr(xdr, OP_GETATTR)) != 0)
                goto out;
-
-       xdr_enter_page(xdr, xdr->buf->page_len);
-
-       /* Calculate the offset of the page data */
-       pg_offset = xdr->buf->head[0].iov_len;
-
        if ((status = decode_attr_bitmap(xdr, bitmap)) != 0)
                goto out;
        if ((status = decode_attr_length(xdr, &attrlen, &savep)) != 0)
@@ -5286,24 +5409,28 @@ static int decode_getacl(struct xdr_stream *xdr, struct 
rpc_rqst *req,
        if (unlikely(bitmap[0] & (FATTR4_WORD0_ACL - 1U)))
                return -EIO;
        if (likely(bitmap[0] & FATTR4_WORD0_ACL)) {
-
-               /* The bitmap (xdr len + bitmaps) and the attr xdr len words
-                * are stored with the acl data to handle the problem of
-                * variable length bitmaps.*/
-               res->acl_data_offset = xdr_stream_pos(xdr) - pg_offset;
-               res->acl_len = attrlen;
-
-               /* Check for receive buffer overflow */
-               if (res->acl_len > (xdr->nwords << 2) ||
-                   res->acl_len + res->acl_data_offset > xdr->buf->page_len) {
-                       res->acl_flags |= NFS4_ACL_TRUNC;
-                       dprintk("NFS: acl reply: attrlen %u > page_len %u\n",
-                                       attrlen, xdr->nwords << 2);
-               }
+               acl = decode_acl_entries(xdr, res->server);
+               status = PTR_ERR(acl);
+               if (IS_ERR(acl))
+                       goto out;
+               bitmap[0] &= ~FATTR4_WORD0_ACL;
        } else
                status = -EOPNOTSUPP;
 
+       status = -EIO;
+       if (unlikely(bitmap[0]))
+               goto out;
+
+       status = decode_attr_mode(xdr, bitmap, &res->mode);
+       if (status < 0)
+               goto out;
+       status = 0;
+
 out:
+       if (status == 0)
+               res->acl = acl;
+       else
+               richacl_put(acl);
        return status;
 }
 
diff --git a/fs/nfs/super.c b/fs/nfs/super.c
index 383a027..8ced33d 100644
--- a/fs/nfs/super.c
+++ b/fs/nfs/super.c
@@ -2319,7 +2319,7 @@ void nfs_fill_super(struct super_block *sb, struct 
nfs_mount_info *mount_info)
                /* The VFS shouldn't apply the umask to mode bits. We will do
                 * so ourselves when necessary.
                 */
-               sb->s_flags |= MS_POSIXACL;
+               sb->s_flags |= MS_RICHACL;
                sb->s_time_gran = 1;
        }
 
@@ -2346,7 +2346,7 @@ void nfs_clone_super(struct super_block *sb, struct 
nfs_mount_info *mount_info)
                /* The VFS shouldn't apply the umask to mode bits. We will do
                 * so ourselves when necessary.
                 */
-               sb->s_flags |= MS_POSIXACL;
+               sb->s_flags |= MS_RICHACL;
        }
 
        nfs_initialise_sb(sb);
diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h
index c0e9614..b84e194 100644
--- a/include/linux/nfs_fs.h
+++ b/include/linux/nfs_fs.h
@@ -176,7 +176,6 @@ struct nfs_inode {
        wait_queue_head_t       waitqueue;
 
 #if IS_ENABLED(CONFIG_NFS_V4)
-       struct nfs4_cached_acl  *nfs4_acl;
         /* NFSv4 state */
        struct list_head        open_states;
        struct nfs_delegation __rcu *delegation;
diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h
index 570a7df..6c41668 100644
--- a/include/linux/nfs_fs_sb.h
+++ b/include/linux/nfs_fs_sb.h
@@ -243,5 +243,7 @@ struct nfs_server {
 #define NFS_CAP_ALLOCATE       (1U << 20)
 #define NFS_CAP_DEALLOCATE     (1U << 21)
 #define NFS_CAP_LAYOUTSTATS    (1U << 22)
+#define NFS_CAP_ALLOW_ACLS     (1U << 23)
+#define NFS_CAP_DENY_ACLS      (1U << 24)
 
 #endif
diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index 090ade4..337c341 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -683,9 +683,10 @@ struct nfs_setattrargs {
 
 struct nfs_setaclargs {
        struct nfs4_sequence_args       seq_args;
+       const struct nfs_server *       server;
        struct nfs_fh *                 fh;
-       size_t                          acl_len;
        struct page **                  acl_pages;
+       size_t                          acl_len;
 };
 
 struct nfs_setaclres {
@@ -703,9 +704,9 @@ struct nfs_getaclargs {
 #define NFS4_ACL_TRUNC         0x0001  /* ACL was truncated */
 struct nfs_getaclres {
        struct nfs4_sequence_res        seq_res;
-       size_t                          acl_len;
-       size_t                          acl_data_offset;
-       int                             acl_flags;
+       const struct nfs_server *       server;
+       struct richacl *                acl;
+       umode_t                         mode;
        struct page *                   acl_scratch;
 };
 
-- 
2.4.3

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Reply via email to