Quoting Daniel P. Berrange (berra...@redhat.com):
> This patch updates the LXC driver to make use of libcap-ng for managing
> process capabilities. Previously Ryota Ozaki had provided code to remove
> the CAP_BOOT  capabilities inside the container, preventing host reboots.
> In addition to that one, I believe we should be removing ability to
> load kernel modules, change the system clock and changing audit/MAC.
> So this patch also clears the following:
> 
>      CAP_SYS_MODULE, /* No kernel module loading */
>      CAP_SYS_TIME, /* No changing the clock */
>      CAP_AUDIT_CONTROL, /* No messing with auditing */
>      CAP_AUDIT_WRITE, /* No messing with auditing */
>      CAP_MAC_ADMIN, /* No messing with LSM */
>      CAP_MAC_OVERRIDE, /* No messing with LSM */

Thanks, Daniel, this does look good.  I wonder whether there is a more
appropriate list to email caps-related patches (including libcap-ng
itself) to.  Not only does the code itself warrant discussion (for
instance, should capng_lock() also set CAP_NOSUID_FIXUP?), but such
discussions, in one place, about converting several programs to dropping
capabilities would help others to do the same with this code.

I can't think of anything other than the LSM list, so I'm cc:ing it
here.

> We use libcap-ng's capng_updatev/apply functions to remove these from 
> the permitted, inheritable, effective and bounding sets. Then we use
> capng_lock to set NOROOT and NOROOT_LOCKED in the process securebits
> to prevent them ever being re-acquired.
> 
> The other thing I realized is that the 'libvirt_lxc' controller process
> does not need to keep any capabilities at all once it has spawned the 
> container process, since all its doing is forwarding I/O between 2 open
> file descripts. So I also clear all capabilities from that. We should
> probably make it chuid/gid to a non-root user in future too. 
> 
>  lxc_container.c  |   66 
> +++++++++++++++++++++++++++++++++++++------------------
>  lxc_controller.c |   28 +++++++++++++++++++++++
>  2 files changed, 73 insertions(+), 21 deletions(-)
> 
> 
> Regards,
> Daniel
> 
> diff -r 7e766489c4a2 src/lxc_container.c
> --- a/src/lxc_container.c     Tue Jun 23 11:13:45 2009 +0100
> +++ b/src/lxc_container.c     Tue Jun 23 11:54:10 2009 +0100
> @@ -41,8 +41,9 @@
>  /* For MS_MOVE */
>  #include <linux/fs.h>
> 
> -#include <sys/prctl.h>
> -#include <linux/capability.h>
> +#if HAVE_CAPNG
> +#include <cap-ng.h>
> +#endif
> 
>  #include "virterror_internal.h"
>  #include "logging.h"
> @@ -642,27 +643,50 @@ static int lxcContainerSetupMounts(virDo
>          return lxcContainerSetupExtraMounts(vmDef);
>  }
> 
> -static int lxcContainerDropCapabilities(virDomainDefPtr vmDef 
> ATTRIBUTE_UNUSED)
> +
> +/*
> + * This is running as the 'init' process insid the container.
> + * It removes some capabilities that could be dangerous to
> + * host system, since they are not currently "containerized"
> + */
> +static int lxcContainerDropCapabilities(void)
>  {
> -#ifdef PR_CAPBSET_DROP
> -    int i;
> -    const struct {
> -        int id;
> -        const char *name;
> -    } caps[] = {
> -#define ID_STRING(name) name, #name
> -        { ID_STRING(CAP_SYS_BOOT) },
> -    };
> +#if HAVE_CAPNG
> +    int ret;
> 
> -    for (i = 0 ; i < ARRAY_CARDINALITY(caps) ; i++) {
> -        if (prctl(PR_CAPBSET_DROP, caps[i].id, 0, 0, 0)) {
> -            lxcError(NULL, NULL, VIR_ERR_INTERNAL_ERROR,
> -                     _("failed to drop %s"), caps[i].name);
> -            return -1;
> -        }
> +    capng_get_caps_process();
> +
> +    if ((ret = capng_updatev(CAPNG_DROP,
> +                             CAPNG_EFFECTIVE | CAPNG_PERMITTED |
> +                             CAPNG_INHERITABLE | CAPNG_BOUNDING_SET,
> +                             CAP_SYS_BOOT, /* No use of reboot */
> +                             CAP_SYS_MODULE, /* No kernel module loading */
> +                             CAP_SYS_TIME, /* No changing the clock */
> +                             CAP_AUDIT_CONTROL, /* No messing with auditing 
> */
> +                             CAP_AUDIT_WRITE, /* No messing with auditing */
> +                             CAP_MAC_ADMIN, /* No messing with LSM */
> +                             CAP_MAC_OVERRIDE, /* No messing with LSM */
> +                             -1 /* sentinal */)) < 0) {
> +        lxcError(NULL, NULL, VIR_ERR_INTERNAL_ERROR,
> +                 _("failed to remove capabilities %d"), ret);
> +        return -1;
>      }
> -#else /* ! PR_CAPBSET_DROP */
> -    VIR_WARN0(_("failed to drop capabilities PR_CAPBSET_DROP undefined"));
> +
> +    if ((ret = capng_apply(CAPNG_SELECT_BOTH)) < 0) {
> +        lxcError(NULL, NULL, VIR_ERR_INTERNAL_ERROR,
> +                 _("failed to apply capabilities: %d"), ret);
> +        return -1;
> +    }

The only problem offhand with this idiom is that you need CAP_SETPCAP to
set securebits and drop caps from bounding set, but I think a lot of
applications could stand to drop CAP_SETPCAP otherwise.  So I don't know
if it would help to do the capng_lock() before capng_apply().

(To be clear, not bc you need to do so right here, but because others
may well look at your code as example code.)

> +    /* Need to prevent them regaining any caps on exec */
> +    if ((ret = capng_lock()) < 0) {
> +        lxcError(NULL, NULL, VIR_ERR_INTERNAL_ERROR,
> +                 _("failed to lock capabilities: %d"), ret);
> +        return -1;
> +    }
> +
> +#else
> +    VIR_WARN0(_("libcap-ng support not compiled in, unable to clear 
> capabilities"));
>  #endif
>      return 0;
>  }
> @@ -735,7 +759,7 @@ static int lxcContainerChild( void *data
>          return -1;
> 
>      /* drop a set of root capabilities */
> -    if (lxcContainerDropCapabilities(vmDef) < 0)
> +    if (lxcContainerDropCapabilities() < 0)
>          return -1;
> 
>      /* this function will only return if an error occured */
> diff -r 7e766489c4a2 src/lxc_controller.c
> --- a/src/lxc_controller.c    Tue Jun 23 11:13:45 2009 +0100
> +++ b/src/lxc_controller.c    Tue Jun 23 11:54:10 2009 +0100
> @@ -35,6 +35,10 @@
>  #include <getopt.h>
>  #include <sys/mount.h>
> 
> +#if HAVE_CAPNG
> +#include <cap-ng.h>
> +#endif
> +
>  #include "virterror_internal.h"
>  #include "logging.h"
>  #include "util.h"
> @@ -210,6 +214,25 @@ cleanup:
>      return rc;
>  }
> 
> +
> +static int lxcControllerClearCapabilities(void)
> +{
> +#if HAVE_CAPNG
> +    int ret;
> +
> +    capng_clear(CAPNG_SELECT_BOTH);
> +
> +    if ((ret = capng_apply(CAPNG_SELECT_BOTH)) < 0) {
> +        lxcError(NULL, NULL, VIR_ERR_INTERNAL_ERROR,
> +                 _("failed to apply capabilities: %d"), ret);
> +        return -1;
> +    }
> +#else
> +    VIR_WARN0(_("libcap-ng support not compiled in, unable to clear 
> capabilities"));
> +#endif
> +    return 0;
> +}
> +
>  typedef struct _lxcTtyForwardFd_t {
>      int fd;
>      int active;
> @@ -562,6 +585,11 @@ lxcControllerRun(virDomainDefPtr def,
>      if (lxcContainerSendContinue(control[0]) < 0)
>          goto cleanup;
> 
> +    /* Now the container is running, there's no need for us to keep
> +       any elevated capabilities */
> +    if (lxcControllerClearCapabilities() < 0)
> +        goto cleanup;
> +
>      rc = lxcControllerMain(monitor, client, appPty, containerPty);
> 
>  cleanup:
> 
> 
> -- 
> |: Red Hat, Engineering, London   -o-   http://people.redhat.com/berrange/ :|
> |: http://libvirt.org  -o-  http://virt-manager.org  -o-  http://ovirt.org :|
> |: http://autobuild.org       -o-         http://search.cpan.org/~danberr/ :|
> |: GnuPG: 7D3B9505  -o-  F3C9 553F A1DA 4AC2 5648 23C1 B3DF F742 7D3B 9505 :|
> 
> --
> Libvir-list mailing list
> Libvir-list@redhat.com
> https://www.redhat.com/mailman/listinfo/libvir-list

--
Libvir-list mailing list
Libvir-list@redhat.com
https://www.redhat.com/mailman/listinfo/libvir-list

Reply via email to