The following pull request was submitted through Github.
It can be accessed and reviewed at: https://github.com/lxc/lxd/pull/6443

This e-mail was sent by the LXC bot, direct replies will not reach the author
unless they happen to be subscribed to this list.

=== Description (from pull-request) ===
This allows to intercept and redirect mount syscalls for filesystems and
redirect them to their corresponding fuse implementation.

A new key
security.syscalls.intercept.mount.fuse=<fstype>=<fuse-binary>
is added.

Filesystems cannot both appear in security.syscalls.intercept.mount.fuse and
security.syscalls.intercept.mount.allowed.

Signed-off-by: Christian Brauner <christian.brau...@ubuntu.com>
From 00811cbb3b68840f9796e2fa939f411601ebb970 Mon Sep 17 00:00:00 2001
From: Christian Brauner <christian.brau...@ubuntu.com>
Date: Tue, 12 Nov 2019 23:14:13 +0100
Subject: [PATCH 1/4] seccomp: implement redirection to fuse

This allows to intercept and redirect mount syscalls for filesystems and
redirect them to their corresponding fuse implementation.

A new key
security.syscalls.intercept.mount.fuse=<fstype>=<fuse-binary>
is added.

Filesystems cannot both appear in security.syscalls.intercept.mount.fuse and
security.syscalls.intercept.mount.allowed.

Signed-off-by: Christian Brauner <christian.brau...@ubuntu.com>
---
 lxd/container.go        |   6 ++
 lxd/main_forksyscall.go |  31 +++++--
 lxd/seccomp/seccomp.go  | 190 +++++++++++++++++++++++++++++++++++-----
 shared/container.go     |   1 +
 shared/util.go          |   2 +
 5 files changed, 199 insertions(+), 31 deletions(-)

diff --git a/lxd/container.go b/lxd/container.go
index 8d5d3c457b..f61f286d01 100644
--- a/lxd/container.go
+++ b/lxd/container.go
@@ -24,6 +24,7 @@ import (
        deviceConfig "github.com/lxc/lxd/lxd/device/config"
        "github.com/lxc/lxd/lxd/instance/instancetype"
        "github.com/lxc/lxd/lxd/operations"
+       "github.com/lxc/lxd/lxd/seccomp"
        "github.com/lxc/lxd/lxd/state"
        storagePools "github.com/lxc/lxd/lxd/storage"
        storageDrivers "github.com/lxc/lxd/lxd/storage/drivers"
@@ -168,6 +169,11 @@ func containerValidConfig(sysOS *sys.OS, config 
map[string]string, profile bool,
                return fmt.Errorf("security.syscalls.whitelist is mutually 
exclusive with security.syscalls.blacklist*")
        }
 
+       err, _ := seccomp.SeccompSyscallInterceptMountFilter(config)
+       if err != nil {
+               return err
+       }
+
        if expanded && (config["security.privileged"] == "" || 
!shared.IsTrue(config["security.privileged"])) && sysOS.IdmapSet == nil {
                return fmt.Errorf("LXD doesn't have a uid/gid allocation. In 
this mode, only privileged containers are supported")
        }
diff --git a/lxd/main_forksyscall.go b/lxd/main_forksyscall.go
index 04738aa6d9..6b2e1ac5d1 100644
--- a/lxd/main_forksyscall.go
+++ b/lxd/main_forksyscall.go
@@ -381,6 +381,7 @@ static void mount_emulate(void)
 {
        __do_close_prot_errno int mnt_fd = -EBADF;
        char *source = NULL, *shiftfs = NULL, *target = NULL, *fstype = NULL;
+       bool use_fuse;
        uid_t uid = -1, fsuid = -1;
        gid_t gid = -1, fsgid = -1;
        int ret;
@@ -389,28 +390,40 @@ static void mount_emulate(void)
        const void *data;
 
        pid = atoi(advance_arg(true));
-       source = advance_arg(true);
-       target = advance_arg(true);
-       fstype = advance_arg(true);
-       flags = atoi(advance_arg(true));
-       shiftfs = advance_arg(true);
+       use_fuse = (atoi(advance_arg(true)) == 1);
+       if (!use_fuse) {
+               source = advance_arg(true);
+               target = advance_arg(true);
+               fstype = advance_arg(true);
+               flags = atoi(advance_arg(true));
+               shiftfs = advance_arg(true);
+       }
        uid = atoi(advance_arg(true));
        gid = atoi(advance_arg(true));
        fsuid = atoi(advance_arg(true));
        fsgid = atoi(advance_arg(true));
-       data = advance_arg(false);
+       if (!use_fuse)
+               data = advance_arg(false);
 
        mnt_fd = preserve_ns(getpid(), "mnt");
        if (mnt_fd < 0)
                _exit(EXIT_FAILURE);
 
+       if (use_fuse)
+               attach_userns(pid);
+
        if (!acquire_basic_creds(pid))
                _exit(EXIT_FAILURE);
 
        if (!acquire_final_creds(pid, uid, gid, fsuid, fsgid))
                _exit(EXIT_FAILURE);
 
-       if (strcmp(shiftfs, "true") == 0) {
+       if (use_fuse) {
+               const char *cmd = advance_arg(true);
+               ret = system(cmd);
+               if (ret)
+                       _exit(EXIT_FAILURE);
+       } else if (strcmp(shiftfs, "true") == 0) {
                char template[] = P_tmpdir "/.lxd_tmp_mount_XXXXXX";
 
                // Create basic mount in container's mount namespace.
@@ -525,12 +538,12 @@ type cmdForksyscall struct {
 func (c *cmdForksyscall) Command() *cobra.Command {
        // Main subcommand
        cmd := &cobra.Command{}
-       cmd.Use = "forksyscall <syscall> <PID> <path> <mode> <dev>"
+       cmd.Use = "forksyscall <syscall> <PID> [...]"
        cmd.Short = "Perform syscall operations"
        cmd.Long = `Description:
   Perform syscall operations
 
-  This set of internal commands are used for all seccom-based container syscall
+  This set of internal commands is used for all seccomp-based container syscall
   operations.
 `
        cmd.RunE = c.Run
diff --git a/lxd/seccomp/seccomp.go b/lxd/seccomp/seccomp.go
index 65ff54b70f..eef84aeb68 100644
--- a/lxd/seccomp/seccomp.go
+++ b/lxd/seccomp/seccomp.go
@@ -45,6 +45,7 @@ import (
 #include <stdint.h>
 #include <stdlib.h>
 #include <string.h>
+#include <sys/mount.h>
 #include <sys/socket.h>
 #include <sys/stat.h>
 #include <sys/syscall.h>
@@ -1176,6 +1177,77 @@ type MountArgs struct {
        shift  bool
 }
 
+// MS_REC
+var mountFlagsToOptMap = map[C.ulong]string{
+       C.MS_BIND:        "bind",
+       C.ulong(0):       "defaults",
+       C.MS_LAZYTIME:    "lazytime",
+       C.MS_MANDLOCK:    "mand",
+       C.MS_NOATIME:     "noatime",
+       C.MS_NODEV:       "nodev",
+       C.MS_NODIRATIME:  "nodiratime",
+       C.MS_NOEXEC:      "noexec",
+       C.MS_NOSUID:      "nosuid",
+       C.MS_RELATIME:    "relatime",
+       C.MS_REMOUNT:     "remount",
+       C.MS_RDONLY:      "ro",
+       C.MS_STRICTATIME: "strictatime",
+       C.MS_SYNCHRONOUS: "sync",
+       C.MS_PRIVATE:     "--make-private",
+       C.MS_SHARED:      "--make-shared",
+       C.MS_SLAVE:       "--make-slave",
+       C.MS_UNBINDABLE:  "--make-unbindable",
+
+       C.MS_REC | C.MS_BIND:       "rbind",
+       C.MS_REC | C.MS_PRIVATE:    "--make-rprivate",
+       C.MS_REC | C.MS_SHARED:     "--make-rshared",
+       C.MS_REC | C.MS_SLAVE:      "--make-rslave",
+       C.MS_REC | C.MS_UNBINDABLE: "--make-runbindable",
+}
+
+func mountFlagsToOpts(flags C.ulong) (string, string) {
+       var bit C.ulong = 0
+       opts := ""
+       args := ""
+       var msRec C.ulong = (flags & C.MS_REC)
+
+       flags = (flags &^ C.MS_REC)
+       for bit < (4*8 - 1) {
+               if (flags & (1 << bit)) > 0 {
+                       var flagKey C.ulong = (1 << bit)
+
+                       switch flagKey {
+                       case C.MS_BIND:
+                               fallthrough
+                       case C.MS_PRIVATE:
+                               fallthrough
+                       case C.MS_SHARED:
+                               fallthrough
+                       case C.MS_SLAVE:
+                               fallthrough
+                       case C.MS_UNBINDABLE:
+                               flagKey |= msRec
+                       }
+                       optOrArg := mountFlagsToOptMap[flagKey]
+
+                       if optOrArg == "" {
+                               continue
+                       }
+
+                       if strings.HasPrefix(optOrArg, "--") {
+                               args = fmt.Sprintf("%s %s", args, optOrArg)
+                       } else if opts == "" {
+                               opts = fmt.Sprintf("%s", optOrArg)
+                       } else {
+                               opts = fmt.Sprintf("%s,%s", opts, optOrArg)
+                       }
+               }
+               bit++
+       }
+
+       return opts, args
+}
+
 // HandleMountSyscall handles mount syscalls.
 func (s *Server) HandleMountSyscall(c Instance, siov *Iovec) int {
        ctx := log.Ctx{"container": c.Name(),
@@ -1252,7 +1324,8 @@ func (s *Server) HandleMountSyscall(c Instance, siov 
*Iovec) int {
                args.data = C.GoString(&cBuf[0])
        }
 
-       if !s.MountSyscallValid(c, &args) {
+       ok, fuseBinary := s.MountSyscallValid(c, &args)
+       if !ok {
                ctx["syscall_continue"] = "true"
                C.seccomp_notify_update_response(siov.resp, 0, 
C.uint32_t(seccompUserNotifFlagContinue))
                return 0
@@ -1265,20 +1338,55 @@ func (s *Server) HandleMountSyscall(c Instance, siov 
*Iovec) int {
                return 0
        }
 
-       _, _, err = shared.RunCommandSplit(nil, util.GetExecPath(),
-               "forksyscall",
-               "mount",
-               fmt.Sprintf("%d", args.pid),
-               fmt.Sprintf("%s", args.source),
-               fmt.Sprintf("%s", args.target),
-               fmt.Sprintf("%s", args.fstype),
-               fmt.Sprintf("%d", args.flags),
-               fmt.Sprintf("%t", args.shift),
-               fmt.Sprintf("%d", nsuid),
-               fmt.Sprintf("%d", nsgid),
-               fmt.Sprintf("%d", nsfsuid),
-               fmt.Sprintf("%d", nsfsgid),
-               fmt.Sprintf("%s", args.data))
+       if fuseBinary != "" {
+               addOpts, addArgs := mountFlagsToOpts(C.ulong(args.flags))
+
+               fuseCmd := fmt.Sprintf("mount.fuse %s#%s %s", fuseBinary, 
args.source, args.target)
+
+               if addArgs != "" {
+                       fuseCmd = fmt.Sprintf("%s %s", fuseCmd, addArgs)
+               }
+
+               if args.data != "" || addOpts != "" {
+                       fuseCmd = fmt.Sprintf("%s -o", fuseCmd)
+                       if args.data != "" && addOpts != "" {
+                               fuseCmd = fmt.Sprintf("%s %s,%s", fuseCmd, 
args.data, addOpts)
+                       } else if args.data != "" {
+                               fuseCmd = fmt.Sprintf("%s %s", fuseCmd, 
args.data)
+                       } else {
+                               fuseCmd = fmt.Sprintf("%s %s", fuseCmd, addOpts)
+                       }
+               }
+
+               logger.Errorf("AAAA: %s", fuseCmd)
+               ctx["fuse_cmd"] = fuseCmd
+               _, _, err = shared.RunCommandSplit(nil, util.GetExecPath(),
+                       "forksyscall",
+                       "mount",
+                       fmt.Sprintf("%d", args.pid),
+                       fmt.Sprintf("%d", 1),
+                       fmt.Sprintf("%d", nsuid),
+                       fmt.Sprintf("%d", nsgid),
+                       fmt.Sprintf("%d", nsfsuid),
+                       fmt.Sprintf("%d", nsfsgid),
+                       fmt.Sprintf("%s", fuseCmd))
+       } else {
+               _, _, err = shared.RunCommandSplit(nil, util.GetExecPath(),
+                       "forksyscall",
+                       "mount",
+                       fmt.Sprintf("%d", args.pid),
+                       fmt.Sprintf("%d", 0),
+                       fmt.Sprintf("%s", args.source),
+                       fmt.Sprintf("%s", args.target),
+                       fmt.Sprintf("%s", args.fstype),
+                       fmt.Sprintf("%d", args.flags),
+                       fmt.Sprintf("%t", args.shift),
+                       fmt.Sprintf("%d", nsuid),
+                       fmt.Sprintf("%d", nsgid),
+                       fmt.Sprintf("%d", nsfsuid),
+                       fmt.Sprintf("%d", nsfsgid),
+                       fmt.Sprintf("%s", args.data))
+       }
        if err != nil {
                ctx["syscall_continue"] = "true"
                C.seccomp_notify_update_response(siov.resp, 0, 
C.uint32_t(seccompUserNotifFlagContinue))
@@ -1390,16 +1498,54 @@ func MountSyscallFilter(config map[string]string) 
[]string {
        return fs
 }
 
-// MountSyscallValid checks whether this is a mount syscall we intercept.
-func (s *Server) MountSyscallValid(c Instance, args *MountArgs) bool {
-       fsList := MountSyscallFilter(c.ExpandedConfig())
-       for _, fs := range fsList {
-               if fs == args.fstype {
-                       return true
+// SeccompSyscallInterceptMountFilter creates a new mount syscall interception 
filter
+func SeccompSyscallInterceptMountFilter(config map[string]string) (error, 
map[string]string) {
+       if !shared.IsTrue(config["security.syscalls.intercept.mount"]) {
+               return nil, map[string]string{}
+
+       }
+
+       fsMap := map[string]string{}
+       fsFused := 
strings.Split(config["security.syscalls.intercept.mount.fuse"], ",")
+       if len(fsFused) > 0 && fsFused[0] != "" {
+               for _, ent := range fsFused {
+                       fsfuse := strings.Split(ent, "=")
+                       if len(fsfuse) != 2 {
+                               return 
fmt.Errorf("security.syscalls.intercept.mount.fuse is not of the form 
'filesystem=fuse-binary': %s", ent), map[string]string{}
+                       }
+
+                       // fsfuse[0] == filesystems that are ok to mount
+                       // fsfuse[1] == fuse binary to use to mount 
filesystemstype
+                       fsMap[fsfuse[0]] = fsfuse[1]
                }
        }
 
-       return false
+       fsAllowed := 
strings.Split(config["security.syscalls.intercept.mount.allowed"], ",")
+       if len(fsAllowed) > 0 && fsAllowed[0] != "" {
+               for _, allowedfs := range fsAllowed {
+                       if fsMap[allowedfs] != "" {
+                               return fmt.Errorf("Filesystem %s cannot appear 
in security.syscalls.intercept.mount.allowed and 
security.syscalls.intercept.mount.fuse", allowedfs), map[string]string{}
+                       }
+
+                       fsMap[allowedfs] = ""
+               }
+       }
+
+       return nil, fsMap
+}
+
+// MountSyscallValid checks whether this is a mount syscall we intercept.
+func (s *Server) MountSyscallValid(c Instance, args *MountArgs) (bool, string) 
{
+       err, fsMap := SeccompSyscallInterceptMountFilter(c.ExpandedConfig())
+       if err != nil {
+               return false, ""
+       }
+
+       if fuse, ok := fsMap[args.fstype]; ok {
+               return true, fuse
+       }
+
+       return false, ""
 }
 
 // MountSyscallShift checks whether this mount syscall needs shiftfs.
diff --git a/shared/container.go b/shared/container.go
index cb04e09141..719aac857f 100644
--- a/shared/container.go
+++ b/shared/container.go
@@ -300,6 +300,7 @@ var KnownContainerConfigKeys = map[string]func(value 
string) error{
        "security.syscalls.intercept.mknod":         IsBool,
        "security.syscalls.intercept.mount":         IsBool,
        "security.syscalls.intercept.mount.allowed": IsAny,
+       "security.syscalls.intercept.mount.fuse":    IsAny,
        "security.syscalls.intercept.mount.shift":   IsBool,
        "security.syscalls.intercept.setxattr":      IsBool,
        "security.syscalls.whitelist":               IsAny,
diff --git a/shared/util.go b/shared/util.go
index e307f985fe..8bb03ab470 100644
--- a/shared/util.go
+++ b/shared/util.go
@@ -29,6 +29,7 @@ import (
 
        "github.com/lxc/lxd/shared/cancel"
        "github.com/lxc/lxd/shared/ioprogress"
+       "github.com/lxc/lxd/shared/logger"
        "github.com/lxc/lxd/shared/units"
 )
 
@@ -991,6 +992,7 @@ func DownloadFileHash(httpClient *http.Client, useragent 
string, progress func(p
                }
 
                result := fmt.Sprintf("%x", hashFunc.Sum(nil))
+               logger.Errorf("Hashing for download from url %s. Got hash: %s. 
Expected hash: %s", url, result, hash)
                if result != hash {
                        return -1, fmt.Errorf("Hash mismatch for %s: %s != %s", 
url, result, hash)
                }

From dab3ee3cfe2401081d9954ce3ada2d40632f6dd2 Mon Sep 17 00:00:00 2001
From: Christian Brauner <christian.brau...@ubuntu.com>
Date: Tue, 12 Nov 2019 23:18:37 +0100
Subject: [PATCH 2/4] api: add container_syscall_intercept_mount_fuse extension

Signed-off-by: Christian Brauner <christian.brau...@ubuntu.com>
---
 doc/api-extensions.md | 7 ++++++-
 shared/version/api.go | 1 +
 2 files changed, 7 insertions(+), 1 deletion(-)

diff --git a/doc/api-extensions.md b/doc/api-extensions.md
index ca09e50e41..b78ffb32c8 100644
--- a/doc/api-extensions.md
+++ b/doc/api-extensions.md
@@ -871,4 +871,9 @@ elevated permissions.
 Adds support for importing/exporting of images/backups using SquashFS file 
system format.
 
 ## container\_raw\_mount
-This adds support for passing in raw mount options for disk devices. 
\ No newline at end of file
+This adds support for passing in raw mount options for disk devices.
+
+## container\_syscall\_intercept\_mount\_fuse
+Adds the `security.syscalls.intercept.mount.fuse` key. It can be used to
+redirect filesystem mounts to their fuse implementation. To this end, set e.g.
+`security.syscalls.intercept.mount.fuse=ext4=fuse2fs`.
diff --git a/shared/version/api.go b/shared/version/api.go
index f6b0e345a1..fb1e6edd43 100644
--- a/shared/version/api.go
+++ b/shared/version/api.go
@@ -175,6 +175,7 @@ var APIExtensions = []string{
        "container_syscall_intercept_mount",
        "compression_squashfs",
        "container_raw_mount",
+       "container_syscall_intercept_mount_fuse",
 }
 
 // APIExtensionsCount returns the number of available API extensions.

From 9f64917fb6e48c39b3d8ed5516999f2a5a59957d Mon Sep 17 00:00:00 2001
From: Christian Brauner <christian.brau...@ubuntu.com>
Date: Tue, 12 Nov 2019 23:22:17 +0100
Subject: [PATCH 3/4] doc: add security.syscalls.intercept.mount.fuse

Signed-off-by: Christian Brauner <christian.brau...@ubuntu.com>
---
 doc/containers.md | 109 +++++++++++++++++++++++-----------------------
 1 file changed, 55 insertions(+), 54 deletions(-)

diff --git a/doc/containers.md b/doc/containers.md
index 5441367e3b..e5c46602c0 100644
--- a/doc/containers.md
+++ b/doc/containers.md
@@ -34,60 +34,61 @@ currently supported:
 
 The currently supported keys are:
 
-Key                                             | Type      | Default          
 | Live update   | API extension                        | Description
-:--                                             | :---      | :------          
 | :----------   | :------------                        | :----------
-boot.autostart                                  | boolean   | -                
 | n/a           | -                                    | Always start the 
container when LXD starts (if not set, restore last state)
-boot.autostart.delay                            | integer   | 0                
 | n/a           | -                                    | Number of seconds to 
wait after the container started before starting the next one
-boot.autostart.priority                         | integer   | 0                
 | n/a           | -                                    | What order to start 
the containers in (starting with highest)
-boot.host\_shutdown\_timeout                    | integer   | 30               
 | yes           | container\_host\_shutdown\_timeout   | Seconds to wait for 
container to shutdown before it is force stopped
-boot.stop.priority                              | integer   | 0                
 | n/a           | container\_stop\_priority            | What order to 
shutdown the containers (starting with highest)
-environment.\*                                  | string    | -                
 | yes (exec)    | -                                    | key/value environment 
variables to export to the container and set on exec
-limits.cpu                                      | string    | - (all)          
 | yes           | -                                    | Number or range of 
CPUs to expose to the container
-limits.cpu.allowance                            | string    | 100%             
 | yes           | -                                    | How much of the CPU 
can be used. Can be a percentage (e.g. 50%) for a soft limit or hard a chunk of 
time (25ms/100ms)
-limits.cpu.priority                             | integer   | 10 (maximum)     
 | yes           | -                                    | CPU scheduling 
priority compared to other containers sharing the same CPUs (overcommit) 
(integer between 0 and 10)
-limits.disk.priority                            | integer   | 5 (medium)       
 | yes           | -                                    | When under load, how 
much priority to give to the container's I/O requests (integer between 0 and 10)
-limits.kernel.\*                                | string    | -                
 | no            | kernel\_limits                       | This limits kernel 
resources per container (e.g. number of open files)
-limits.memory                                   | string    | - (all)          
 | yes           | -                                    | Percentage of the 
host's memory or fixed value in bytes (various suffixes supported, see below)
-limits.memory.enforce                           | string    | hard             
 | yes           | -                                    | If hard, container 
can't exceed its memory limit. If soft, the container can exceed its memory 
limit when extra host memory is available.
-limits.memory.swap                              | boolean   | true             
 | yes           | -                                    | Whether to allow some 
of the container's memory to be swapped out to disk
-limits.memory.swap.priority                     | integer   | 10 (maximum)     
 | yes           | -                                    | The higher this is 
set, the least likely the container is to be swapped to disk (integer between 0 
and 10)
-limits.network.priority                         | integer   | 0 (minimum)      
 | yes           | -                                    | When under load, how 
much priority to give to the container's network requests (integer between 0 
and 10)
-limits.processes                                | integer   | - (max)          
 | yes           | -                                    | Maximum number of 
processes that can run in the container
-linux.kernel\_modules                           | string    | -                
 | yes           | -                                    | Comma separated list 
of kernel modules to load before starting the container
-migration.incremental.memory                    | boolean   | false            
 | yes           | migration\_pre\_copy                 | Incremental memory 
transfer of the container's memory to reduce downtime.
-migration.incremental.memory.goal               | integer   | 70               
 | yes           | migration\_pre\_copy                 | Percentage of memory 
to have in sync before stopping the container.
-migration.incremental.memory.iterations         | integer   | 10               
 | yes           | migration\_pre\_copy                 | Maximum number of 
transfer operations to go through before stopping the container.
-nvidia.driver.capabilities                      | string    | compute,utility  
 | no            | nvidia\_runtime\_config              | What driver 
capabilities the container needs (sets libnvidia-container 
NVIDIA\_DRIVER\_CAPABILITIES)
-nvidia.runtime                                  | boolean   | false            
 | no            | nvidia\_runtime                      | Pass the host NVIDIA 
and CUDA runtime libraries into the container
-nvidia.require.cuda                             | string    | -                
 | no            | nvidia\_runtime\_config              | Version expression 
for the required CUDA version (sets libnvidia-container NVIDIA\_REQUIRE\_CUDA)
-nvidia.require.driver                           | string    | -                
 | no            | nvidia\_runtime\_config              | Version expression 
for the required driver version (sets libnvidia-container 
NVIDIA\_REQUIRE\_DRIVER)
-raw.apparmor                                    | blob      | -                
 | yes           | -                                    | Apparmor profile 
entries to be appended to the generated profile
-raw.idmap                                       | blob      | -                
 | no            | id\_map                              | Raw idmap 
configuration (e.g. "both 1000 1000")
-raw.lxc                                         | blob      | -                
 | no            | -                                    | Raw LXC configuration 
to be appended to the generated one
-raw.seccomp                                     | blob      | -                
 | no            | container\_syscall\_filtering        | Raw Seccomp 
configuration
-security.devlxd                                 | boolean   | true             
 | no            | restrict\_devlxd                     | Controls the presence 
of /dev/lxd in the container
-security.devlxd.images                          | boolean   | false            
 | no            | devlxd\_images                       | Controls the 
availability of the /1.0/images API over devlxd
-security.idmap.base                             | integer   | -                
 | no            | id\_map\_base                        | The base host ID to 
use for the allocation (overrides auto-detection)
-security.idmap.isolated                         | boolean   | false            
 | no            | id\_map                              | Use an idmap for this 
container that is unique among containers with isolated set.
-security.idmap.size                             | integer   | -                
 | no            | id\_map                              | The size of the idmap 
to use
-security.nesting                                | boolean   | false            
 | yes           | -                                    | Support running lxd 
(nested) inside the container
-security.privileged                             | boolean   | false            
 | no            | -                                    | Runs the container in 
privileged mode
-security.protection.delete                      | boolean   | false            
 | yes           | container\_protection\_delete        | Prevents the 
container from being deleted
-security.protection.shift                       | boolean   | false            
 | yes           | container\_protection\_shift         | Prevents the 
container's filesystem from being uid/gid shifted on startup
-security.syscalls.blacklist                     | string    | -                
 | no            | container\_syscall\_filtering        | A '\n' separated list 
of syscalls to blacklist
-security.syscalls.blacklist\_compat             | boolean   | false            
 | no            | container\_syscall\_filtering        | On x86\_64 this 
enables blocking of compat\_\* syscalls, it is a no-op on other arches
-security.syscalls.blacklist\_default            | boolean   | true             
 | no            | container\_syscall\_filtering        | Enables the default 
syscall blacklist
-security.syscalls.intercept.mknod               | boolean   | false            
 | no            | container\_syscall\_intercept        | Handles the `mknod` 
and `mknodat` system calls (allows creation of a limited subset of char/block 
devices)
-security.syscalls.intercept.mount               | boolean   | false            
 | no            | container\_syscall\_intercept\_mount | Handles the `mount` 
system call
-security.syscalls.intercept.mount.allowed       | string    | -                
 | yes           | container\_syscall\_intercept\_mount | Specify a 
comma-separated list of filesystems that are safe to mount for processes inside 
the container.
-security.syscalls.intercept.mount.shift         | boolean   | false            
 | yes           | container\_syscall\_intercept\_mount | Whether to mount 
shiftfs on top of filesystems handled through mount syscall interception.
-security.syscalls.intercept.setxattr            | boolean   | false            
 | no            | container\_syscall\_intercept        | Handles the 
`setxattr` system call (allows setting a limited subset of restricted extended 
attributes)
-security.syscalls.whitelist                     | string    | -                
 | no            | container\_syscall\_filtering        | A '\n' separated list 
of syscalls to whitelist (mutually exclusive with security.syscalls.blacklist\*)
-snapshots.schedule                              | string    | -                
 | no            | snapshot\_scheduling                 | Cron expression 
(`<minute> <hour> <dom> <month> <dow>`)
-snapshots.schedule.stopped                      | bool      | false            
 | no            | snapshot\_scheduling                 | Controls whether or 
not stopped containers are to be snapshoted automatically
-snapshots.pattern                               | string    | snap%d           
 | no            | snapshot\_scheduling                 | Pongo2 template 
string which represents the snapshot name (used for scheduled snapshots and 
unnamed snapshots)
-snapshots.expiry                                | string    | -                
 | no            | snapshot\_expiry                     | Controls when 
snapshots are to be deleted (expects expression like `1M 2H 3d 4w 5m 6y`)
-user.\*                                         | string    | -                
 | n/a           | -                                    | Free form user 
key/value storage (can be used in search)
+Key                                             | Type      | Default          
 | Live update   | API extension                              | Description
+:--                                             | :---      | :------          
 | :----------   | :------------                              | :----------
+boot.autostart                                  | boolean   | -                
 | n/a           | -                                          | Always start 
the container when LXD starts (if not set, restore last state)
+boot.autostart.delay                            | integer   | 0                
 | n/a           | -                                          | Number of 
seconds to wait after the container started before starting the next one
+boot.autostart.priority                         | integer   | 0                
 | n/a           | -                                          | What order to 
start the containers in (starting with highest)
+boot.host\_shutdown\_timeout                    | integer   | 30               
 | yes           | container\_host\_shutdown\_timeout         | Seconds to wait 
for container to shutdown before it is force stopped
+boot.stop.priority                              | integer   | 0                
 | n/a           | container\_stop\_priority                  | What order to 
shutdown the containers (starting with highest)
+environment.\*                                  | string    | -                
 | yes (exec)    | -                                          | key/value 
environment variables to export to the container and set on exec
+limits.cpu                                      | string    | - (all)          
 | yes           | -                                          | Number or range 
of CPUs to expose to the container
+limits.cpu.allowance                            | string    | 100%             
 | yes           | -                                          | How much of the 
CPU can be used. Can be a percentage (e.g. 50%) for a soft limit or hard a 
chunk of time (25ms/100ms)
+limits.cpu.priority                             | integer   | 10 (maximum)     
 | yes           | -                                          | CPU scheduling 
priority compared to other containers sharing the same CPUs (overcommit) 
(integer between 0 and 10)
+limits.disk.priority                            | integer   | 5 (medium)       
 | yes           | -                                          | When under 
load, how much priority to give to the container's I/O requests (integer 
between 0 and 10)
+limits.kernel.\*                                | string    | -                
 | no            | kernel\_limits                             | This limits 
kernel resources per container (e.g. number of open files)
+limits.memory                                   | string    | - (all)          
 | yes           | -                                          | Percentage of 
the host's memory or fixed value in bytes (various suffixes supported, see 
below)
+limits.memory.enforce                           | string    | hard             
 | yes           | -                                          | If hard, 
container can't exceed its memory limit. If soft, the container can exceed its 
memory limit when extra host memory is available.
+limits.memory.swap                              | boolean   | true             
 | yes           | -                                          | Whether to 
allow some of the container's memory to be swapped out to disk
+limits.memory.swap.priority                     | integer   | 10 (maximum)     
 | yes           | -                                          | The higher this 
is set, the least likely the container is to be swapped to disk (integer 
between 0 and 10)
+limits.network.priority                         | integer   | 0 (minimum)      
 | yes           | -                                          | When under 
load, how much priority to give to the container's network requests (integer 
between 0 and 10)
+limits.processes                                | integer   | - (max)          
 | yes           | -                                          | Maximum number 
of processes that can run in the container
+linux.kernel\_modules                           | string    | -                
 | yes           | -                                          | Comma separated 
list of kernel modules to load before starting the container
+migration.incremental.memory                    | boolean   | false            
 | yes           | migration\_pre\_copy                       | Incremental 
memory transfer of the container's memory to reduce downtime.
+migration.incremental.memory.goal               | integer   | 70               
 | yes           | migration\_pre\_copy                       | Percentage of 
memory to have in sync before stopping the container.
+migration.incremental.memory.iterations         | integer   | 10               
 | yes           | migration\_pre\_copy                       | Maximum number 
of transfer operations to go through before stopping the container.
+nvidia.driver.capabilities                      | string    | compute,utility  
 | no            | nvidia\_runtime\_config                    | What driver 
capabilities the container needs (sets libnvidia-container 
NVIDIA\_DRIVER\_CAPABILITIES)
+nvidia.runtime                                  | boolean   | false            
 | no            | nvidia\_runtime                            | Pass the host 
NVIDIA and CUDA runtime libraries into the container
+nvidia.require.cuda                             | string    | -                
 | no            | nvidia\_runtime\_config                    | Version 
expression for the required CUDA version (sets libnvidia-container 
NVIDIA\_REQUIRE\_CUDA)
+nvidia.require.driver                           | string    | -                
 | no            | nvidia\_runtime\_config                    | Version 
expression for the required driver version (sets libnvidia-container 
NVIDIA\_REQUIRE\_DRIVER)
+raw.apparmor                                    | blob      | -                
 | yes           | -                                          | Apparmor 
profile entries to be appended to the generated profile
+raw.idmap                                       | blob      | -                
 | no            | id\_map                                    | Raw idmap 
configuration (e.g. "both 1000 1000")
+raw.lxc                                         | blob      | -                
 | no            | -                                          | Raw LXC 
configuration to be appended to the generated one
+raw.seccomp                                     | blob      | -                
 | no            | container\_syscall\_filtering              | Raw Seccomp 
configuration
+security.devlxd                                 | boolean   | true             
 | no            | restrict\_devlxd                           | Controls the 
presence of /dev/lxd in the container
+security.devlxd.images                          | boolean   | false            
 | no            | devlxd\_images                             | Controls the 
availability of the /1.0/images API over devlxd
+security.idmap.base                             | integer   | -                
 | no            | id\_map\_base                              | The base host 
ID to use for the allocation (overrides auto-detection)
+security.idmap.isolated                         | boolean   | false            
 | no            | id\_map                                    | Use an idmap 
for this container that is unique among containers with isolated set.
+security.idmap.size                             | integer   | -                
 | no            | id\_map                                    | The size of the 
idmap to use
+security.nesting                                | boolean   | false            
 | yes           | -                                          | Support running 
lxd (nested) inside the container
+security.privileged                             | boolean   | false            
 | no            | -                                          | Runs the 
container in privileged mode
+security.protection.delete                      | boolean   | false            
 | yes           | container\_protection\_delete              | Prevents the 
container from being deleted
+security.protection.shift                       | boolean   | false            
 | yes           | container\_protection\_shift               | Prevents the 
container's filesystem from being uid/gid shifted on startup
+security.syscalls.blacklist                     | string    | -                
 | no            | container\_syscall\_filtering              | A '\n' 
separated list of syscalls to blacklist
+security.syscalls.blacklist\_compat             | boolean   | false            
 | no            | container\_syscall\_filtering              | On x86\_64 this 
enables blocking of compat\_\* syscalls, it is a no-op on other arches
+security.syscalls.blacklist\_default            | boolean   | true             
 | no            | container\_syscall\_filtering              | Enables the 
default syscall blacklist
+security.syscalls.intercept.mknod               | boolean   | false            
 | no            | container\_syscall\_intercept              | Handles the 
`mknod` and `mknodat` system calls (allows creation of a limited subset of 
char/block devices)
+security.syscalls.intercept.mount               | boolean   | false            
 | no            | container\_syscall\_intercept\_mount       | Handles the 
`mount` system call
+security.syscalls.intercept.mount.allowed       | string    | -                
 | yes           | container\_syscall\_intercept\_mount       | Specify a 
comma-separated list of filesystems that are safe to mount for processes inside 
the container.
+security.syscalls.intercept.mount.fuse          | string    | -                
 | yes           | container\_syscall\_intercept\_mount\_fuse | Whether to 
mount shiftfs on top of filesystems handled through mount syscall interception.
+security.syscalls.intercept.mount.shift         | boolean   | false            
 | yes           | container\_syscall\_intercept\_mount       | Whether to 
redirect mounts of a given filesystem to their fuse implemenation (e.g. 
ext4=fuse2fs)
+security.syscalls.intercept.setxattr            | boolean   | false            
 | no            | container\_syscall\_intercept              | Handles the 
`setxattr` system call (allows setting a limited subset of restricted extended 
attributes)
+security.syscalls.whitelist                     | string    | -                
 | no            | container\_syscall\_filtering              | A '\n' 
separated list of syscalls to whitelist (mutually exclusive with 
security.syscalls.blacklist\*)
+snapshots.schedule                              | string    | -                
 | no            | snapshot\_scheduling                       | Cron expression 
(`<minute> <hour> <dom> <month> <dow>`)
+snapshots.schedule.stopped                      | bool      | false            
 | no            | snapshot\_scheduling                       | Controls 
whether or not stopped containers are to be snapshoted automatically
+snapshots.pattern                               | string    | snap%d           
 | no            | snapshot\_scheduling                       | Pongo2 template 
string which represents the snapshot name (used for scheduled snapshots and 
unnamed snapshots)
+snapshots.expiry                                | string    | -                
 | no            | snapshot\_expiry                           | Controls when 
snapshots are to be deleted (expects expression like `1M 2H 3d 4w 5m 6y`)
+user.\*                                         | string    | -                
 | n/a           | -                                          | Free form user 
key/value storage (can be used in search)
 
 The following volatile keys are currently internally used by LXD:
 

From 9cb23c07a7f17cefc235afe8436d184b68e1a832 Mon Sep 17 00:00:00 2001
From: Christian Brauner <christian.brau...@ubuntu.com>
Date: Tue, 12 Nov 2019 23:22:52 +0100
Subject: [PATCH 4/4] scripts: add security.syscalls.intercept.mount.fuse

Signed-off-by: Christian Brauner <christian.brau...@ubuntu.com>
---
 scripts/bash/lxd-client | 1 +
 1 file changed, 1 insertion(+)

diff --git a/scripts/bash/lxd-client b/scripts/bash/lxd-client
index 1fae67dea7..19d4173bd7 100644
--- a/scripts/bash/lxd-client
+++ b/scripts/bash/lxd-client
@@ -96,6 +96,7 @@ _have lxc && {
       security.syscalls.blacklist_compat security.syscalls.blacklist_default \
       security.syscalls.intercept.mknod security.syscalls.intercept.mount \
       security.syscalls.intercept.mount.allowed \
+      security.syscall.intercept.mount.fuse \
       security.syscalls.intercept.setxattr \
       security.syscall.intercept.mount.shift \
       snapshots.schedule snapshots.schedule.stopped snapshots.pattern \
_______________________________________________
lxc-devel mailing list
lxc-devel@lists.linuxcontainers.org
http://lists.linuxcontainers.org/listinfo/lxc-devel

Reply via email to