The following pull request was submitted through Github. It can be accessed and reviewed at: https://github.com/lxc/lxd/pull/2428
This e-mail was sent by the LXC bot, direct replies will not reach the author unless they happen to be subscribed to this list. === Description (from pull-request) === Signed-off-by: Tycho Andersen <tycho.ander...@canonical.com>
From a00486e1e740de120edf734c3f9bb4f6a9452040 Mon Sep 17 00:00:00 2001 From: Tycho Andersen <tycho.ander...@canonical.com> Date: Fri, 17 Jun 2016 17:51:17 +0000 Subject: [PATCH] apparmor: create an apparmor namespace for each container Signed-off-by: Tycho Andersen <tycho.ander...@canonical.com> --- lxd/apparmor.go | 101 ++++++++++++++++++++++++++++++++++++++++++++------- lxd/container_lxc.go | 26 +++++++++++-- lxd/daemon.go | 43 ++++++++++++++++++++++ test/suites/basic.sh | 13 +++++-- 4 files changed, 163 insertions(+), 20 deletions(-) diff --git a/lxd/apparmor.go b/lxd/apparmor.go index b4d73f3..1adeadd 100644 --- a/lxd/apparmor.go +++ b/lxd/apparmor.go @@ -94,7 +94,7 @@ const AA_PROFILE_BASE = ` mount fstype=sysfs -> /sys/, mount options=(rw, nosuid, nodev, noexec, remount) -> /sys/, deny /sys/firmware/efi/efivars/** rwklx, - deny /sys/kernel/security/** rwklx, + # note, /sys/kernel/security/** handled below mount options=(move) /sys/fs/cgroup/cgmanager/ -> /sys/fs/cgroup/cgmanager.lower/, mount options=(ro, nosuid, nodev, noexec, remount, strictatime) -> /sys/fs/cgroup/, @@ -186,7 +186,7 @@ const AA_PROFILE_BASE = ` deny /proc/sys/n[^e]*{,/**} wklx, deny /proc/sys/ne[^t]*{,/**} wklx, deny /proc/sys/net?*{,/**} wklx, - deny /sys/[^fdc]*{,/**} wklx, + deny /sys/[^fdck]*{,/**} wklx, deny /sys/c[^l]*{,/**} wklx, deny /sys/cl[^a]*{,/**} wklx, deny /sys/cla[^s]*{,/**} wklx, @@ -250,14 +250,28 @@ const AA_PROFILE_NESTING = ` mount options=bind /var/lib/lxd/shmounts/** -> /var/lib/lxd/**, ` -func AAProfileFull(c container) string { - lxddir := shared.VarPath("") - if len(c.Name())+len(lxddir)+7 >= 253 { +func mkApparmorName(name string) string { + if len(name)+7 >= 253 { hash := sha256.New() - io.WriteString(hash, lxddir) - lxddir = fmt.Sprintf("%x", hash.Sum(nil)) + io.WriteString(hash, name) + return fmt.Sprintf("%x", hash.Sum(nil)) } + return name +} + +func AANamespace(c container) string { + /* / is not allowed in apparmor namespace names; let's also trim the + * leading / so it doesn't look like "-var-lib-lxd" + */ + lxddir := strings.Replace(strings.Trim(shared.VarPath(""), "/"), "/", "-", -1) + lxddir = mkApparmorName(lxddir) + return fmt.Sprintf("lxd-%s_<%s>", c.Name(), lxddir) +} + +func AAProfileFull(c container) string { + lxddir := shared.VarPath("") + lxddir = mkApparmorName(lxddir) return fmt.Sprintf("lxd-%s_<%s>", c.Name(), lxddir) } @@ -289,11 +303,50 @@ func getAAProfileContent(c container) string { profile += " mount fstype=cgroup -> /sys/fs/cgroup/**,\n" } - // Apply nesting bits + if aaStacking { + profile += "\n ### Feature: apparmor stacking\n" + + if c.IsPrivileged() { + profile += "\n ### Configuration: apparmor loading disabled in privileged containers\n" + profile += " deny /sys/k*{,/**} rwklx,\n" + } else { + profile += ` ### Configuration: apparmor loading in unprivileged containers + deny /sys/k[^e]*{,/**} wklx, + deny /sys/ke[^r]*{,/**} wklx, + deny /sys/ker[^n]*{,/**} wklx, + deny /sys/kern[^e]*{,/**} wklx, + deny /sys/kerne[^l]*{,/**} wklx, + deny /sys/kernel/[^s]*{,/**} wklx, + deny /sys/kernel/s[^e]*{,/**} wklx, + deny /sys/kernel/se[^c]*{,/**} wklx, + deny /sys/kernel/sec[^u]*{,/**} wklx, + deny /sys/kernel/secu[^r]*{,/**} wklx, + deny /sys/kernel/secur[^i]*{,/**} wklx, + deny /sys/kernel/securi[^t]*{,/**} wklx, + deny /sys/kernel/securit[^y]*{,/**} wklx, + deny /sys/kernel/security/[^a]*{,/**} wklx, + deny /sys/kernel/security/a[^p]*{,/**} wklx, + deny /sys/kernel/security/ap[^p]*{,/**} wklx, + deny /sys/kernel/security/app[^a]*{,/**} wklx, + deny /sys/kernel/security/appa[^r]*{,/**} wklx, + deny /sys/kernel/security/appar[^m]*{,/**} wklx, + deny /sys/kernel/security/apparm[^o]*{,/**} wklx, + deny /sys/kernel/security/apparmo[^r]*{,/**} wklx, + deny /sys/kernel/security/apparmor?*{,/**} wklx, + deny /sys/kernel/security?*{,/**} wklx, + deny /sys/kernel?*{,/**} wklx, +` + profile += fmt.Sprintf(" change_profile -> \":%s://*\",\n", AANamespace(c)) + } + } + if c.IsNesting() { + // Apply nesting bits profile += "\n ### Configuration: nesting\n" profile += strings.TrimLeft(AA_PROFILE_NESTING, "\n") - profile += fmt.Sprintf(" change_profile -> \"%s\",\n", AAProfileFull(c)) + if !aaStacking || c.IsPrivileged() { + profile += fmt.Sprintf(" change_profile -> \"%s\",\n", AAProfileFull(c)) + } } // Append raw.apparmor @@ -317,12 +370,13 @@ func runApparmor(command string, c container) error { return nil } - cmd := exec.Command("apparmor_parser", []string{ + args := []string{ fmt.Sprintf("-%sWL", command), path.Join(aaPath, "cache"), path.Join(aaPath, "profiles", AAProfileShort(c)), - }...) + } + cmd := exec.Command("apparmor_parser", args...) output, err := cmd.CombinedOutput() if err != nil { shared.LogError("Running apparmor", @@ -332,6 +386,14 @@ func runApparmor(command string, c container) error { return err } +func mkApparmorNamespace(namespace string) error { + if !aaStacking { + return nil + } + + return os.Mkdir(path.Join("/sys/kernel/security/apparmor/policy/namespaces", namespace), 0755) +} + // Ensure that the container's policy is loaded into the kernel so the // container can boot. func AALoadProfile(c container) error { @@ -339,6 +401,10 @@ func AALoadProfile(c container) error { return nil } + if err := mkApparmorNamespace(AANamespace(c)); err != nil { + return err + } + /* In order to avoid forcing a profile parse (potentially slow) on * every container start, let's use apparmor's binary policy cache, * which checks mtime of the files to figure out if the policy needs to @@ -375,13 +441,20 @@ func AALoadProfile(c container) error { return runApparmor(APPARMOR_CMD_LOAD, c) } -// Ensure that the container's policy is unloaded to free kernel memory. This -// does not delete the policy from disk or cache. -func AAUnloadProfile(c container) error { +// Ensure that the container's policy namespace is unloaded to free kernel +// memory. This does not delete the policy from disk or cache. +func AADestroy(c container) error { if !aaAdmin { return nil } + if aaStacking { + p := path.Join("/sys/kernel/security/apparmor/policy/namespaces", AANamespace(c)) + if err := os.Remove(p); err != nil { + shared.LogError("error removing apparmor namespace", log.Ctx{"err": err, "ns": p}) + } + } + return runApparmor(APPARMOR_CMD_UNLOAD, c) } diff --git a/lxd/container_lxc.go b/lxd/container_lxc.go index 99282dc..9159511 100644 --- a/lxd/container_lxc.go +++ b/lxd/container_lxc.go @@ -388,7 +388,12 @@ func (c *containerLXC) initLXC() error { } // Base config - err = lxcSetConfigItem(cc, "lxc.cap.drop", "mac_admin mac_override sys_time sys_module sys_rawio") + toDrop := "sys_time sys_module sys_rawio" + if !aaStacking || c.IsPrivileged() { + toDrop = toDrop + " mac_admin mac_override" + } + + err = lxcSetConfigItem(cc, "lxc.cap.drop", toDrop) if err != nil { return err } @@ -587,7 +592,20 @@ func (c *containerLXC) initLXC() error { } } else { // If not currently confined, use the container's profile - err := lxcSetConfigItem(cc, "lxc.aa_profile", AAProfileFull(c)) + profile := AAProfileFull(c) + + /* In the nesting case, we want to enable the inside + * LXD to load its profile. Unprivileged containers can + * load profiles, but privileged containers cannot, so + * let's not use a namespace so they can fall back to + * the old way of nesting, i.e. using the parent's + * profile. + */ + if aaStacking && (!c.IsNesting() || !c.IsPrivileged()) { + profile = fmt.Sprintf("%s//&:%s:", profile, AANamespace(c)) + } + + err := lxcSetConfigItem(cc, "lxc.aa_profile", profile) if err != nil { return err } @@ -1702,7 +1720,9 @@ func (c *containerLXC) OnStop(target string) error { } // Unload the apparmor profile - AAUnloadProfile(c) + if err := AADestroy(c); err != nil { + shared.LogError("failed to destroy apparmor namespace", log.Ctx{"container": c.Name(), "err": err}) + } // FIXME: The go routine can go away once we can rely on LXC_TARGET go func(c *containerLXC, target string, op *lxcContainerOperation) { diff --git a/lxd/daemon.go b/lxd/daemon.go index 984d2e0..586c432 100644 --- a/lxd/daemon.go +++ b/lxd/daemon.go @@ -40,6 +40,7 @@ import ( var aaAdmin = true var aaAvailable = true var aaConfined = false +var aaStacking = false // CGroup var cgBlkioController = false @@ -626,6 +627,48 @@ func (d *Daemon) Init() error { } } + if aaAvailable { + canStack := func() bool { + contentBytes, err := ioutil.ReadFile("/sys/kernel/security/apparmor/features/domain/stack") + if err != nil { + return false + } + + if string(contentBytes) != "yes\n" { + return false + } + + contentBytes, err = ioutil.ReadFile("/sys/kernel/security/apparmor/features/domain/version") + if err != nil { + return false + } + + content := string(contentBytes) + + parts := strings.Split(strings.TrimSpace(content), ".") + if len(parts) != 2 { + shared.LogWarn("unknown apparmor domain version", log.Ctx{"version": content}) + return false + } + + major, err := strconv.Atoi(parts[0]) + if err != nil { + shared.LogWarn("unknown apparmor domain version", log.Ctx{"version": content}) + return false + } + + minor, err := strconv.Atoi(parts[1]) + if err != nil { + shared.LogWarn("unknown apparmor domain version", log.Ctx{"version": content}) + return false + } + + return major >= 1 && minor >= 1 + } + + aaStacking = canStack() + } + /* Detect CGroup support */ cgBlkioController = shared.PathExists("/sys/fs/cgroup/blkio/") if !cgBlkioController { diff --git a/test/suites/basic.sh b/test/suites/basic.sh index 509260a..5bcfdc9 100644 --- a/test/suites/basic.sh +++ b/test/suites/basic.sh @@ -313,9 +313,16 @@ test_basic_usage() { # check that an apparmor profile is created for this container, that it is # unloaded on stop, and that it is deleted when the container is deleted lxc launch testimage lxd-apparmor-test - aa-status | grep "lxd-lxd-apparmor-test_<${LXD_DIR}>" - lxc stop lxd-apparmor-test --force - ! aa-status | grep -q "lxd-lxd-apparmor-test_<${LXD_DIR}>" + if [ -f /sys/kernel/security/apparmor/features/domain/version ]; then + aa_namespace="lxd-lxd-apparmor-test_<$(echo "${LXD_DIR}" | sed -e 's/\//-/g' -e 's/^.//')>" + aa-status | grep ":${aa_namespace}://unconfined" + lxc stop lxd-apparmor-test --force + ! aa-status | grep -q ":${aa_namespace}:" + else + aa-status | grep "lxd-lxd-apparmor-test_<${LXD_DIR}>" + lxc stop lxd-apparmor-test --force + ! aa-status | grep -q "lxd-lxd-apparmor-test_<${LXD_DIR}>" + fi lxc delete lxd-apparmor-test [ ! -f "${LXD_DIR}/security/apparmor/profiles/lxd-lxd-apparmor-test" ]
_______________________________________________ lxc-devel mailing list lxc-devel@lists.linuxcontainers.org http://lists.linuxcontainers.org/listinfo/lxc-devel