Ohai, On Sat, May 13, 2017 at 09:44:42AM +0100, Evgeni Golov wrote: > I wonder if you would be OK with accepting the latest LXC stable release > into Stretch at this point in time. > Admittedly, the diff between the version in testing and 2.0.8 is quite > big (64 files changed, 1652 insertions(+), 996 deletions(-)), but it > allows us to drop both cherry-picked patches we carry today and gives an > IMHO better base for later updates during the Stretch life cycle.
Niels asked on IRC if the diff can be filtered a bit. It can, but it still does not look magically awesome then: 54 files changed, 1548 insertions(+), 955 deletions(-) Generated with: git diff debian/1%2.0.7-2.. |filterdiff -x a/debian/patches/\* -x a/configure -x a/src/tests/\* -x a/README -x a/doc/\* -x \*/Makefile.in Bear in mind that the above still contains the upstream versions of the two dropped patches: lxc-2.0-CVE-2017-5985-Ensure-target-netns-is-caller-owned.patch src/lxc/lxc_user_nic.c | 119 ++++++++++++++++++++++++++++++++++++------------- 0010-lxc-debian-root-password.patch templates/lxc-debian.in | 3 --- And I have no way to filter that out. New debdiff attached. Also, as it was asked, lxc and lxcfs are not coupled, we can update them independently.
diff --git a/config/init/common/lxc-containers.in b/config/init/common/lxc-containers.in index ebce36e..35b9084 100644 --- a/config/init/common/lxc-containers.in +++ b/config/init/common/lxc-containers.in @@ -51,7 +51,9 @@ fi # to start wait_for_bridge() { - local BRNAME try flags + [ "x$USE_LXC_BRIDGE" = "xtrue" ] || { return 0; } + + local BRNAME try flags br [ -f "$sysconfdir"/lxc/default.conf ] || { return 0; } BRNAME=`grep '^[ ]*lxc.network.link' "$sysconfdir"/lxc/default.conf | sed 's/^.*=[ ]*//'` @@ -60,11 +62,12 @@ wait_for_bridge() fi for try in `seq 1 30`; do - if [ -r /sys/class/net/$BRNAME/flags ]; then - read flags < /sys/class/net/$BRNAME/flags - [ $((flags & 0x1)) -eq 1 ] && { return 0; } - fi - sleep 1 + for br in ${BRNAME}; do + [ -r /sys/class/net/${br}/flags ] || { sleep 1; continue 2; } + read flags < /sys/class/net/${br}/flags + [ $((flags & 0x1)) -eq 1 ] || { sleep 1; continue 2; } + done + return 0 done } diff --git a/config/init/common/lxc-net.in b/config/init/common/lxc-net.in index 4797f20..f770950 100644 --- a/config/init/common/lxc-net.in +++ b/config/init/common/lxc-net.in @@ -66,6 +66,7 @@ start() { if [ "$FAILED" = "1" ]; then echo "Failed to setup lxc-net." >&2 stop force + exit 1 fi } diff --git a/config/templates/userns.conf.in b/config/templates/userns.conf.in index 5dc19c7..78383eb 100644 --- a/config/templates/userns.conf.in +++ b/config/templates/userns.conf.in @@ -6,7 +6,6 @@ lxc.cgroup.devices.allow = lxc.devttydir = # Extra bind-mounts for userns -lxc.mount.entry = /dev/console dev/console none bind,create=file 0 0 lxc.mount.entry = /dev/full dev/full none bind,create=file 0 0 lxc.mount.entry = /dev/null dev/null none bind,create=file 0 0 lxc.mount.entry = /dev/random dev/random none bind,create=file 0 0 diff --git a/configure.ac b/configure.ac index 42ece7a..bd2d82f 100644 --- a/configure.ac +++ b/configure.ac @@ -4,7 +4,7 @@ m4_define([lxc_devel], 0) m4_define([lxc_version_major], 2) m4_define([lxc_version_minor], 0) -m4_define([lxc_version_micro], 7) +m4_define([lxc_version_micro], 8) m4_define([lxc_version_beta], []) m4_define([lxc_abi_major], 1) @@ -113,13 +113,13 @@ case "$with_init_script" in fedora|altlinux|opensuse*) init_script=systemd ;; - redhat|centos|oracle|oracleserver|sparclinux|plamo) + redhat|oracle|oracleserver|sparclinux|plamo) init_script=sysvinit ;; - debian|raspbian) - init_script=upstart,systemd + centos) + init_script=sysvinit,systemd ;; - ubuntu) + debian|raspbian|ubuntu) init_script=upstart,systemd ;; *) @@ -366,8 +366,11 @@ fi AM_CONDITIONAL([ENABLE_CAP], [test "x$enable_capabilities" = "xyes"]) AM_COND_IF([ENABLE_CAP], - [AC_CHECK_LIB(cap,cap_set_proc,[true],[AC_MSG_ERROR([You are missing libcap support.])]) - AC_SUBST([CAP_LIBS], [-lcap])]) + [AC_CHECK_HEADER([sys/capability.h],[],[AC_MSG_ERROR([You must install the libcap development package in order to compile lxc])]) + AC_CHECK_LIB(cap,cap_set_proc,[],[AC_MSG_ERROR([You must install the libcap development package in order to compile lxc])]) + # Test whether we support getting file capabilities via cap_get_file(). + AC_CHECK_LIB(cap,cap_get_file, AC_DEFINE(LIBCAP_SUPPORTS_FILE_CAPABILITIES,1,[Have cap_get_file]),[],[]) + AC_SUBST([CAP_LIBS], [-lcap])]) # HAVE_SCMP_FILTER_CTX=1 will tell us we have libseccomp api >= 1.0.0 OLD_CFLAGS="$CFLAGS" @@ -630,7 +633,7 @@ AM_CONDITIONAL([IS_BIONIC], [test "x$is_bionic" = "xyes"]) AC_CHECK_DECLS([PR_CAPBSET_DROP], [], [], [#include <sys/prctl.h>]) # Check for some headers -AC_CHECK_HEADERS([sys/signalfd.h pty.h ifaddrs.h sys/capability.h sys/memfd.h sys/personality.h utmpx.h sys/timerfd.h]) +AC_CHECK_HEADERS([sys/signalfd.h pty.h ifaddrs.h sys/memfd.h sys/personality.h utmpx.h sys/timerfd.h]) # lookup major()/minor()/makedev() AC_HEADER_MAJOR diff --git a/hooks/clonehostname b/hooks/clonehostname index ed2765c..8eec7a6 100755 --- a/hooks/clonehostname +++ b/hooks/clonehostname @@ -19,9 +19,9 @@ # Note that /etc/hostname is updated by lxc itself for file in \ - $LXC_ROOTFS_PATH/etc/sysconfig/network \ - $LXC_ROOTFS_PATH/etc/sysconfig/network-scripts/ifcfg-* \ - $LXC_ROOTFS_PATH/etc/hosts ; + $LXC_ROOTFS_MOUNT/etc/sysconfig/network \ + $LXC_ROOTFS_MOUNT/etc/sysconfig/network-scripts/ifcfg-* \ + $LXC_ROOTFS_MOUNT/etc/hosts ; do if [ -f $file ]; then sed -i "s|$LXC_SRC_NAME|$LXC_NAME|" $file diff --git a/lxc.spec b/lxc.spec index 4d4ef07..993c46c 100644 --- a/lxc.spec +++ b/lxc.spec @@ -60,7 +60,7 @@ BuildRequires: systemd %endif Name: lxc -Version: 2.0.7 +Version: 2.0.8 Release: %{?beta_rel:0.1.%{beta_rel}}%{?!beta_rel:%{norm_rel}}%{?dist} URL: http://linuxcontainers.org Source: http://linuxcontainers.org/downloads/%{name}-%{version}%{?beta_dot}.tar.gz diff --git a/src/config.h.in b/src/config.h.in index a83788a..1228131 100644 --- a/src/config.h.in +++ b/src/config.h.in @@ -56,6 +56,9 @@ /* Define to 1 if you have the `apparmor' library (-lapparmor). */ #undef HAVE_LIBAPPARMOR +/* Define to 1 if you have the `cap' library (-lcap). */ +#undef HAVE_LIBCAP + /* Define to 1 if you have the `gnutls' library (-lgnutls). */ #undef HAVE_LIBGNUTLS @@ -128,9 +131,6 @@ /* Define to 1 if you have the <string.h> header file. */ #undef HAVE_STRING_H -/* Define to 1 if you have the <sys/capability.h> header file. */ -#undef HAVE_SYS_CAPABILITY_H - /* Define to 1 if you have the <sys/memfd.h> header file. */ #undef HAVE_SYS_MEMFD_H @@ -167,6 +167,9 @@ /* bionic libc */ #undef IS_BIONIC +/* Have cap_get_file */ +#undef LIBCAP_SUPPORTS_FILE_CAPABILITIES + /* Define to the sub-directory where libtool stores uninstalled libraries. */ #undef LT_OBJDIR diff --git a/src/lxc/Makefile.am b/src/lxc/Makefile.am index 6bcb6da..d7c05d6 100644 --- a/src/lxc/Makefile.am +++ b/src/lxc/Makefile.am @@ -290,7 +290,7 @@ init_lxc_static_SOURCES += ../include/getline.c endif endif -init_lxc_static_LDFLAGS = -static +init_lxc_static_LDFLAGS = -all-static init_lxc_static_LDADD = @CAP_LIBS@ init_lxc_static_CFLAGS = $(AM_CFLAGS) -DNO_LXC_CONF endif diff --git a/src/lxc/af_unix.c b/src/lxc/af_unix.c index 46d8e50..ac83994 100644 --- a/src/lxc/af_unix.c +++ b/src/lxc/af_unix.c @@ -55,8 +55,9 @@ int lxc_abstract_unix_open(const char *path, int type, int flags) addr.sun_family = AF_UNIX; - len = strlen(&path[1]) + 1; - if (len >= sizeof(addr.sun_path) - 1) { + len = strlen(&path[1]); + /* do not enforce \0-termination */ + if (len >= sizeof(addr.sun_path)) { close(fd); errno = ENAMETOOLONG; return -1; @@ -64,7 +65,7 @@ int lxc_abstract_unix_open(const char *path, int type, int flags) /* addr.sun_path[0] has already been set to 0 by memset() */ strncpy(&addr.sun_path[1], &path[1], strlen(&path[1])); - if (bind(fd, (struct sockaddr *)&addr, offsetof(struct sockaddr_un, sun_path) + len)) { + if (bind(fd, (struct sockaddr *)&addr, offsetof(struct sockaddr_un, sun_path) + len + 1)) { int tmp = errno; close(fd); errno = tmp; @@ -109,8 +110,9 @@ int lxc_abstract_unix_connect(const char *path) addr.sun_family = AF_UNIX; - len = strlen(&path[1]) + 1; - if (len >= sizeof(addr.sun_path) - 1) { + len = strlen(&path[1]); + /* do not enforce \0-termination */ + if (len >= sizeof(addr.sun_path)) { close(fd); errno = ENAMETOOLONG; return -1; @@ -118,7 +120,7 @@ int lxc_abstract_unix_connect(const char *path) /* addr.sun_path[0] has already been set to 0 by memset() */ strncpy(&addr.sun_path[1], &path[1], strlen(&path[1])); - if (connect(fd, (struct sockaddr *)&addr, offsetof(struct sockaddr_un, sun_path) + len)) { + if (connect(fd, (struct sockaddr *)&addr, offsetof(struct sockaddr_un, sun_path) + len + 1)) { int tmp = errno; /* special case to connect to older containers */ if (connect(fd, (struct sockaddr *)&addr, sizeof(addr)) == 0) @@ -136,8 +138,8 @@ int lxc_abstract_unix_send_fd(int fd, int sendfd, void *data, size_t size) struct msghdr msg = { 0 }; struct iovec iov; struct cmsghdr *cmsg; - char cmsgbuf[CMSG_SPACE(sizeof(int))]; - char buf[1]; + char cmsgbuf[CMSG_SPACE(sizeof(int))] = {0}; + char buf[1] = {0}; int *val; msg.msg_control = cmsgbuf; @@ -166,9 +168,9 @@ int lxc_abstract_unix_recv_fd(int fd, int *recvfd, void *data, size_t size) struct msghdr msg = { 0 }; struct iovec iov; struct cmsghdr *cmsg; - char cmsgbuf[CMSG_SPACE(sizeof(int))]; - char buf[1]; int ret, *val; + char cmsgbuf[CMSG_SPACE(sizeof(int))] = {0}; + char buf[1] = {0}; msg.msg_name = NULL; msg.msg_namelen = 0; @@ -210,8 +212,8 @@ int lxc_abstract_unix_send_credential(int fd, void *data, size_t size) .uid = getuid(), .gid = getgid(), }; - char cmsgbuf[CMSG_SPACE(sizeof(cred))]; - char buf[1]; + char cmsgbuf[CMSG_SPACE(sizeof(cred))] = {0}; + char buf[1] = {0}; msg.msg_control = cmsgbuf; msg.msg_controllen = sizeof(cmsgbuf); @@ -239,9 +241,9 @@ int lxc_abstract_unix_rcv_credential(int fd, void *data, size_t size) struct iovec iov; struct cmsghdr *cmsg; struct ucred cred; - char cmsgbuf[CMSG_SPACE(sizeof(cred))]; - char buf[1]; int ret; + char cmsgbuf[CMSG_SPACE(sizeof(cred))] = {0}; + char buf[1] = {0}; msg.msg_name = NULL; msg.msg_namelen = 0; diff --git a/src/lxc/af_unix.h b/src/lxc/af_unix.h index 3f5d01f..d25a211 100644 --- a/src/lxc/af_unix.h +++ b/src/lxc/af_unix.h @@ -24,8 +24,10 @@ #ifndef __LXC_AF_UNIX_H #define __LXC_AF_UNIX_H +/* does not enforce \0-termination */ extern int lxc_abstract_unix_open(const char *path, int type, int flags); extern int lxc_abstract_unix_close(int fd); +/* does not enforce \0-termination */ extern int lxc_abstract_unix_connect(const char *path); extern int lxc_abstract_unix_send_fd(int fd, int sendfd, void *data, size_t size); extern int lxc_abstract_unix_recv_fd(int fd, int *recvfd, void *data, size_t size); diff --git a/src/lxc/bdev/lxcloop.c b/src/lxc/bdev/lxcloop.c index b322002..a4633e4 100644 --- a/src/lxc/bdev/lxcloop.c +++ b/src/lxc/bdev/lxcloop.c @@ -35,19 +35,9 @@ #include "lxcloop.h" #include "utils.h" -#ifndef LO_FLAGS_AUTOCLEAR -#define LO_FLAGS_AUTOCLEAR 4 -#endif - -#ifndef LOOP_CTL_GET_FREE -#define LOOP_CTL_GET_FREE 0x4C82 -#endif - lxc_log_define(lxcloop, lxc); static int do_loop_create(const char *path, uint64_t size, const char *fstype); -static int find_free_loopdev_no_control(int *retfd, char *namep); -static int find_free_loopdev(int *retfd, char *namep); /* * No idea what the original blockdev will be called, but the copy will be @@ -174,47 +164,26 @@ int loop_detect(const char *path) int loop_mount(struct bdev *bdev) { - int lfd, ffd = -1, ret = -1; - struct loop_info64 lo; - char loname[100]; + int ret, loopfd; + char loname[MAXPATHLEN]; if (strcmp(bdev->type, "loop")) return -22; if (!bdev->src || !bdev->dest) return -22; - if (find_free_loopdev(&lfd, loname) < 0) - return -22; - - ffd = open(bdev->src + 5, O_RDWR); - if (ffd < 0) { - SYSERROR("Error opening backing file %s", bdev->src); - goto out; - } - if (ioctl(lfd, LOOP_SET_FD, ffd) < 0) { - SYSERROR("Error attaching backing file to loop dev"); - goto out; - } - memset(&lo, 0, sizeof(lo)); - lo.lo_flags = LO_FLAGS_AUTOCLEAR; - if (ioctl(lfd, LOOP_SET_STATUS64, &lo) < 0) { - SYSERROR("Error setting autoclear on loop dev"); - goto out; - } + loopfd = lxc_prepare_loop_dev(bdev->src + 5, loname, LO_FLAGS_AUTOCLEAR); + if (loopfd < 0) + return -1; + DEBUG("prepared loop device \"%s\"", loname); ret = mount_unknown_fs(loname, bdev->dest, bdev->mntopts); if (ret < 0) - ERROR("Error mounting %s", bdev->src); + ERROR("failed to mount rootfs \"%s\" onto \"%s\" via loop device \"%s\"", bdev->src, bdev->dest, loname); else - bdev->lofd = lfd; + bdev->lofd = loopfd; + DEBUG("mounted rootfs \"%s\" onto \"%s\" via loop device \"%s\"", bdev->src, bdev->dest, loname); -out: - if (ffd > -1) - close(ffd); - if (ret < 0) { - close(lfd); - bdev->lofd = -1; - } return ret; } @@ -266,63 +235,3 @@ static int do_loop_create(const char *path, uint64_t size, const char *fstype) return 0; } - -static int find_free_loopdev_no_control(int *retfd, char *namep) -{ - struct dirent *direntp; - struct loop_info64 lo; - DIR *dir; - int fd = -1; - - dir = opendir("/dev"); - if (!dir) { - SYSERROR("Error opening /dev"); - return -1; - } - while ((direntp = readdir(dir))) { - - if (!direntp) - break; - if (strncmp(direntp->d_name, "loop", 4) != 0) - continue; - fd = openat(dirfd(dir), direntp->d_name, O_RDWR); - if (fd < 0) - continue; - if (ioctl(fd, LOOP_GET_STATUS64, &lo) == 0 || errno != ENXIO) { - close(fd); - fd = -1; - continue; - } - // We can use this fd - snprintf(namep, 100, "/dev/%s", direntp->d_name); - break; - } - closedir(dir); - if (fd == -1) { - ERROR("No loop device found"); - return -1; - } - - *retfd = fd; - return 0; -} - -static int find_free_loopdev(int *retfd, char *namep) -{ - int rc, fd = -1; - int ctl = open("/dev/loop-control", O_RDWR); - if (ctl < 0) - return find_free_loopdev_no_control(retfd, namep); - rc = ioctl(ctl, LOOP_CTL_GET_FREE); - if (rc >= 0) { - snprintf(namep, 100, "/dev/loop%d", rc); - fd = open(namep, O_RDWR); - } - close(ctl); - if (fd == -1) { - ERROR("No loop device found"); - return -1; - } - *retfd = fd; - return 0; -} diff --git a/src/lxc/bdev/lxclvm.c b/src/lxc/bdev/lxclvm.c index 75de17f..bcd8be8 100644 --- a/src/lxc/bdev/lxclvm.c +++ b/src/lxc/bdev/lxclvm.c @@ -29,6 +29,7 @@ #include <stdlib.h> #include <string.h> #include <unistd.h> +#include <sys/sysmacros.h> #include <sys/wait.h> #include "bdev.h" @@ -41,9 +42,6 @@ #ifdef MAJOR_IN_MKDEV # include <sys/mkdev.h> #endif -#ifdef MAJOR_IN_SYSMACROS -# include <sys/sysmacros.h> -#endif lxc_log_define(lxclvm, lxc); diff --git a/src/lxc/caps.c b/src/lxc/caps.c index 73b5516..195707f 100644 --- a/src/lxc/caps.c +++ b/src/lxc/caps.c @@ -36,7 +36,7 @@ lxc_log_define(lxc_caps, lxc); -#if HAVE_SYS_CAPABILITY_H +#if HAVE_LIBCAP #ifndef PR_CAPBSET_READ #define PR_CAPBSET_READ 23 @@ -209,27 +209,61 @@ int lxc_caps_last_cap(void) return last_cap; } -bool lxc_cap_is_set(cap_value_t cap, cap_flag_t flag) +static bool lxc_cap_is_set(cap_t caps, cap_value_t cap, cap_flag_t flag) { int ret; - cap_t caps; cap_flag_value_t flagval; - caps = cap_get_proc(); + ret = cap_get_flag(caps, cap, flag, &flagval); + if (ret < 0) { + ERROR("Failed to perform cap_get_flag(): %s.", strerror(errno)); + return false; + } + + return flagval == CAP_SET; +} + +bool lxc_file_cap_is_set(const char *path, cap_value_t cap, cap_flag_t flag) +{ + #if LIBCAP_SUPPORTS_FILE_CAPABILITIES + bool cap_is_set; + cap_t caps; + + caps = cap_get_file(path); if (!caps) { - ERROR("Failed to perform cap_get_proc(): %s.", strerror(errno)); + /* This is undocumented in the manpage but the source code show + * that cap_get_file() may return NULL when successful for the + * case where it didn't detect any file capabilities. In this + * case errno will be set to ENODATA. + */ + if (errno != ENODATA) + ERROR("Failed to perform cap_get_file(): %s.\n", strerror(errno)); return false; } - ret = cap_get_flag(caps, cap, flag, &flagval); - if (ret < 0) { - ERROR("Failed to perform cap_get_flag(): %s.", strerror(errno)); - cap_free(caps); + cap_is_set = lxc_cap_is_set(caps, cap, flag); + cap_free(caps); + return cap_is_set; + #else + errno = ENODATA; + return false; + #endif +} + +bool lxc_proc_cap_is_set(cap_value_t cap, cap_flag_t flag) +{ + bool cap_is_set; + cap_t caps; + + caps = cap_get_proc(); + if (!caps) { + ERROR("Failed to perform cap_get_proc(): %s.\n", strerror(errno)); return false; } + cap_is_set = lxc_cap_is_set(caps, cap, flag); cap_free(caps); - return flagval == CAP_SET; + return cap_is_set; } #endif diff --git a/src/lxc/caps.h b/src/lxc/caps.h index 390dbdd..2a8c282 100644 --- a/src/lxc/caps.h +++ b/src/lxc/caps.h @@ -27,7 +27,7 @@ #ifndef __LXC_CAPS_H #define __LXC_CAPS_H -#if HAVE_SYS_CAPABILITY_H +#if HAVE_LIBCAP #include <sys/capability.h> extern int lxc_caps_down(void); @@ -36,7 +36,8 @@ extern int lxc_caps_init(void); extern int lxc_caps_last_cap(void); -extern bool lxc_cap_is_set(cap_value_t cap, cap_flag_t flag); +extern bool lxc_proc_cap_is_set(cap_value_t cap, cap_flag_t flag); +extern bool lxc_file_cap_is_set(const char *path, cap_value_t cap, cap_flag_t flag); #else static inline int lxc_caps_down(void) { return 0; @@ -54,8 +55,12 @@ static inline int lxc_caps_last_cap(void) { typedef int cap_value_t; typedef int cap_flag_t; -static inline bool lxc_cap_is_set(cap_value_t cap, cap_flag_t flag) { - return true; +static inline bool lxc_proc_cap_is_set(cap_value_t cap, cap_flag_t flag) { + return false; +} + +static inline bool lxc_file_cap_is_set(const char *path, cap_value_t cap, cap_flag_t flag) { + return false; } #endif diff --git a/src/lxc/cgroups/cgfs.c b/src/lxc/cgroups/cgfs.c index 8499200..3bfa523 100644 --- a/src/lxc/cgroups/cgfs.c +++ b/src/lxc/cgroups/cgfs.c @@ -1880,7 +1880,7 @@ static int create_or_remove_cgroup(bool do_remove, } else r = rmdir(buf); } else - r = mkdir(buf, 0777); + r = mkdir_p(buf, 0777); saved_errno = errno; free(buf); errno = saved_errno; diff --git a/src/lxc/cgroups/cgfsng.c b/src/lxc/cgroups/cgfsng.c index 2b772e2..ebd548b 100644 --- a/src/lxc/cgroups/cgfsng.c +++ b/src/lxc/cgroups/cgfsng.c @@ -101,6 +101,12 @@ struct hierarchy **hierarchies; */ char *cgroup_use; +/* + * @lxc_cgfsng_debug - whether to print debug info to stdout for the cgfsng + * driver + */ +static bool lxc_cgfsng_debug; + static void free_string_list(char **clist) { if (clist) { @@ -986,45 +992,44 @@ static void get_existing_subsystems(char ***klist, char ***nlist) static void trim(char *s) { size_t len = strlen(s); - while (s[len-1] == '\n') + while ((len > 1) && (s[len - 1] == '\n')) s[--len] = '\0'; } -static void print_init_debuginfo(struct cgfsng_handler_data *d) +static void lxc_cgfsng_print_handler_data(const struct cgfsng_handler_data *d) +{ + printf("Cgroup information:\n"); + printf(" container name: %s\n", d->name ? d->name : "(null)"); + printf(" lxc.cgroup.use: %s\n", cgroup_use ? cgroup_use : "(null)"); + printf(" lxc.cgroup.pattern: %s\n", d->cgroup_pattern ? d->cgroup_pattern : "(null)"); + printf(" cgroup: %s\n", d->container_cgroup ? d->container_cgroup : "(null)"); +} + +static void lxc_cgfsng_print_hierarchies() { struct hierarchy **it; int i; - if (!getenv("LXC_DEBUG_CGFSNG")) - return; - - DEBUG("Cgroup information:"); - DEBUG(" container name: %s", d->name ? d->name : "(null)"); - DEBUG(" lxc.cgroup.use: %s", cgroup_use ? cgroup_use : "(null)"); - DEBUG(" lxc.cgroup.pattern: %s", d->cgroup_pattern ? d->cgroup_pattern : "(null)"); - DEBUG(" cgroup: %s", d->container_cgroup ? d->container_cgroup : "(null)"); if (!hierarchies) { - DEBUG(" No hierarchies found."); + printf(" No hierarchies found."); return; } - DEBUG(" Hierarchies:"); + printf(" Hierarchies:\n"); for (i = 0, it = hierarchies; it && *it; it++, i++) { char **cit; int j; - DEBUG(" %d: base_cgroup %s", i, (*it)->base_cgroup ? (*it)->base_cgroup : "(null)"); - DEBUG(" mountpoint %s", (*it)->mountpoint ? (*it)->mountpoint : "(null)"); - DEBUG(" controllers:"); + printf(" %d: base_cgroup %s\n", i, (*it)->base_cgroup ? (*it)->base_cgroup : "(null)"); + printf(" mountpoint %s\n", (*it)->mountpoint ? (*it)->mountpoint : "(null)"); + printf(" controllers:\n"); for (j = 0, cit = (*it)->controllers; cit && *cit; cit++, j++) - DEBUG(" %d: %s", j, *cit); + printf(" %d: %s\n", j, *cit); } } -static void print_basecg_debuginfo(char *basecginfo, char **klist, char **nlist) +static void lxc_cgfsng_print_basecg_debuginfo(char *basecginfo, char **klist, char **nlist) { int k; char **it; - if (!getenv("LXC_DEBUG_CGFSNG")) - return; printf("basecginfo is:\n"); printf("%s\n", basecginfo); @@ -1035,6 +1040,12 @@ static void print_basecg_debuginfo(char *basecginfo, char **klist, char **nlist) printf("named subsystem %d: %s\n", k, *it); } +static void lxc_cgfsng_print_debuginfo(const struct cgfsng_handler_data *d) +{ + lxc_cgfsng_print_handler_data(d); + lxc_cgfsng_print_hierarchies(); +} + /* * At startup, parse_hierarchies finds all the info we need about * cgroup mountpoints and current cgroups, and stores it in @d. @@ -1064,7 +1075,8 @@ static bool parse_hierarchies(void) get_existing_subsystems(&klist, &nlist); - print_basecg_debuginfo(basecginfo, klist, nlist); + if (lxc_cgfsng_debug) + lxc_cgfsng_print_basecg_debuginfo(basecginfo, klist, nlist); /* we support simple cgroup mounts and lxcfs mounts */ while (getline(&line, &len, f) != -1) { @@ -1116,6 +1128,11 @@ static bool parse_hierarchies(void) fclose(f); free(line); + if (lxc_cgfsng_debug) { + printf("writeable subsystems:\n"); + lxc_cgfsng_print_hierarchies(); + } + /* verify that all controllers in cgroup.use and all crucial * controllers are accounted for */ @@ -1156,7 +1173,8 @@ static void *cgfsng_init(const char *name) } d->cgroup_pattern = must_copy_string(cgroup_pattern); - print_init_debuginfo(d); + if (lxc_cgfsng_debug) + lxc_cgfsng_print_debuginfo(d); return d; @@ -1294,8 +1312,12 @@ static void cgfsng_destroy(void *hdata, struct lxc_conf *conf) struct cgroup_ops *cgfsng_ops_init(void) { + if (getenv("LXC_DEBUG_CGFSNG")) + lxc_cgfsng_debug = true; + if (!collect_hierarchy_info()) return NULL; + return &cgfsng_ops; } diff --git a/src/lxc/commands.c b/src/lxc/commands.c index b17879b..27c8c08 100644 --- a/src/lxc/commands.c +++ b/src/lxc/commands.c @@ -74,14 +74,19 @@ lxc_log_define(lxc_commands, lxc); -static int fill_sock_name(char *path, int len, const char *name, +static int fill_sock_name(char *path, int len, const char *lxcname, const char *lxcpath, const char *hashed_sock_name) { + const char *name; char *tmppath; size_t tmplen; uint64_t hash; int ret; + name = lxcname; + if (!name) + name = ""; + if (hashed_sock_name != NULL) { ret = snprintf(path, len, "lxc/%s/command", hashed_sock_name); if (ret < 0 || ret >= len) { @@ -193,8 +198,11 @@ static int lxc_cmd_rsp_recv(int sock, struct lxc_cmd_rr *cmd) rsp->data = rspdata; } - if (rsp->datalen == 0) + if (rsp->datalen == 0) { + DEBUG("command %s response data length is 0", + lxc_cmd_str(cmd->req.cmd)); return ret; + } if (rsp->datalen > LXC_CMD_DATA_MAX) { ERROR("Command %s response data %d too long.", lxc_cmd_str(cmd->req.cmd), rsp->datalen); @@ -274,7 +282,7 @@ static int lxc_cmd(const char *name, struct lxc_cmd_rr *cmd, int *stopped, int sock, ret = -1; char path[sizeof(((struct sockaddr_un *)0)->sun_path)] = { 0 }; char *offset = &path[1]; - int len; + size_t len; int stay_connected = cmd->req.cmd == LXC_CMD_CONSOLE; *stopped = 0; @@ -982,7 +990,7 @@ int lxc_cmd_init(const char *name, struct lxc_handler *handler, * Although null termination isn't required by the API, we do it anyway * because we print the sockname out sometimes. */ - len = sizeof(path)-2; + len = sizeof(path) - 2; if (fill_sock_name(offset, len, name, lxcpath, NULL)) return -1; diff --git a/src/lxc/conf.c b/src/lxc/conf.c index a93124b..923a4d9 100644 --- a/src/lxc/conf.c +++ b/src/lxc/conf.c @@ -47,6 +47,7 @@ #include <sys/prctl.h> #include <sys/stat.h> #include <sys/socket.h> +#include <sys/sysmacros.h> #include <sys/syscall.h> #include <sys/types.h> #include <sys/utsname.h> @@ -56,9 +57,6 @@ #ifdef MAJOR_IN_MKDEV # include <sys/mkdev.h> #endif -#ifdef MAJOR_IN_SYSMACROS -# include <sys/sysmacros.h> -#endif #ifdef HAVE_STATVFS #include <sys/statvfs.h> @@ -91,7 +89,7 @@ #include "utils.h" #include "lsm/lsm.h" -#if HAVE_SYS_CAPABILITY_H +#if HAVE_LIBCAP #include <sys/capability.h> #endif @@ -107,7 +105,7 @@ lxc_log_define(lxc_conf, lxc); -#if HAVE_SYS_CAPABILITY_H +#if HAVE_LIBCAP #ifndef CAP_SETFCAP #define CAP_SETFCAP 31 #endif @@ -172,6 +170,10 @@ static int sethostname(const char * name, size_t len) #define MS_PRIVATE (1<<18) #endif +#ifndef MS_LAZYTIME +#define MS_LAZYTIME (1<<25) +#endif + /* memfd_create() */ #ifndef MFD_CLOEXEC #define MFD_CLOEXEC 0x0001U @@ -288,35 +290,36 @@ static instantiate_cb netdev_deconf[LXC_NET_MAXCONFTYPE + 1] = { }; static struct mount_opt mount_opt[] = { + { "async", 1, MS_SYNCHRONOUS }, + { "atime", 1, MS_NOATIME }, + { "bind", 0, MS_BIND }, { "defaults", 0, 0 }, - { "ro", 0, MS_RDONLY }, - { "rw", 1, MS_RDONLY }, - { "suid", 1, MS_NOSUID }, - { "nosuid", 0, MS_NOSUID }, { "dev", 1, MS_NODEV }, - { "nodev", 0, MS_NODEV }, - { "exec", 1, MS_NOEXEC }, - { "noexec", 0, MS_NOEXEC }, - { "sync", 0, MS_SYNCHRONOUS }, - { "async", 1, MS_SYNCHRONOUS }, + { "diratime", 1, MS_NODIRATIME }, { "dirsync", 0, MS_DIRSYNC }, - { "remount", 0, MS_REMOUNT }, + { "exec", 1, MS_NOEXEC }, + { "lazytime", 0, MS_LAZYTIME }, { "mand", 0, MS_MANDLOCK }, - { "nomand", 1, MS_MANDLOCK }, - { "atime", 1, MS_NOATIME }, { "noatime", 0, MS_NOATIME }, - { "diratime", 1, MS_NODIRATIME }, + { "nodev", 0, MS_NODEV }, { "nodiratime", 0, MS_NODIRATIME }, - { "bind", 0, MS_BIND }, + { "noexec", 0, MS_NOEXEC }, + { "nomand", 1, MS_MANDLOCK }, + { "norelatime", 1, MS_RELATIME }, + { "nostrictatime", 1, MS_STRICTATIME }, + { "nosuid", 0, MS_NOSUID }, { "rbind", 0, MS_BIND|MS_REC }, { "relatime", 0, MS_RELATIME }, - { "norelatime", 1, MS_RELATIME }, + { "remount", 0, MS_REMOUNT }, + { "ro", 0, MS_RDONLY }, + { "rw", 1, MS_RDONLY }, { "strictatime", 0, MS_STRICTATIME }, - { "nostrictatime", 1, MS_STRICTATIME }, + { "suid", 1, MS_NOSUID }, + { "sync", 0, MS_SYNCHRONOUS }, { NULL, 0, 0 }, }; -#if HAVE_SYS_CAPABILITY_H +#if HAVE_LIBCAP static struct caps_opt caps_opt[] = { { "chown", CAP_CHOWN }, { "dac_override", CAP_DAC_OVERRIDE }, @@ -516,7 +519,7 @@ static int run_script(const char *name, const char *section, const char *script, } static int mount_rootfs_dir(const char *rootfs, const char *target, - const char *options) + const char *options) { unsigned long mntflags; char *mntdata; @@ -533,99 +536,21 @@ static int mount_rootfs_dir(const char *rootfs, const char *target, return ret; } -static int setup_lodev(const char *rootfs, int fd, struct loop_info64 *loinfo) -{ - int rfd; - int ret = -1; - - rfd = open(rootfs, O_RDWR); - if (rfd < 0) { - SYSERROR("failed to open '%s'", rootfs); - return -1; - } - - memset(loinfo, 0, sizeof(*loinfo)); - - loinfo->lo_flags = LO_FLAGS_AUTOCLEAR; - - if (ioctl(fd, LOOP_SET_FD, rfd)) { - SYSERROR("failed to LOOP_SET_FD"); - goto out; - } - - if (ioctl(fd, LOOP_SET_STATUS64, loinfo)) { - SYSERROR("failed to LOOP_SET_STATUS64"); - goto out; - } - - ret = 0; -out: - close(rfd); - - return ret; -} - -static int mount_rootfs_file(const char *rootfs, const char *target, - const char *options) +static int lxc_mount_rootfs_file(const char *rootfs, const char *target, + const char *options) { - struct dirent *direntp; - struct loop_info64 loinfo; - int ret = -1, fd = -1, rc; - DIR *dir; + int ret, loopfd; char path[MAXPATHLEN]; - dir = opendir("/dev"); - if (!dir) { - SYSERROR("failed to open '/dev'"); + loopfd = lxc_prepare_loop_dev(rootfs, path, LO_FLAGS_AUTOCLEAR); + if (loopfd < 0) return -1; - } + DEBUG("prepared loop device \"%s\"", path); - while ((direntp = readdir(dir))) { + ret = mount_unknown_fs(path, target, options); + close(loopfd); - if (!direntp) - break; - - if (!strcmp(direntp->d_name, ".")) - continue; - - if (!strcmp(direntp->d_name, "..")) - continue; - - if (strncmp(direntp->d_name, "loop", 4)) - continue; - - rc = snprintf(path, MAXPATHLEN, "/dev/%s", direntp->d_name); - if (rc < 0 || rc >= MAXPATHLEN) - continue; - - fd = open(path, O_RDWR); - if (fd < 0) - continue; - - if (ioctl(fd, LOOP_GET_STATUS64, &loinfo) == 0) { - close(fd); - continue; - } - - if (errno != ENXIO) { - WARN("unexpected error for ioctl on '%s': %m", - direntp->d_name); - close(fd); - continue; - } - - DEBUG("found '%s' free lodev", path); - - ret = setup_lodev(rootfs, fd, &loinfo); - if (!ret) - ret = mount_unknown_fs(path, target, options); - close(fd); - - break; - } - - if (closedir(dir)) - WARN("failed to close directory"); + DEBUG("mounted rootfs \"%s\" on loop device \"%s\" via loop device \"%s\"", rootfs, target, path); return ret; } @@ -858,33 +783,32 @@ static int mount_rootfs(const char *rootfs, const char *target, const char *opti } rtfs_type[] = { { S_IFDIR, mount_rootfs_dir }, { S_IFBLK, mount_rootfs_block }, - { S_IFREG, mount_rootfs_file }, + { S_IFREG, lxc_mount_rootfs_file }, }; if (!realpath(rootfs, absrootfs)) { - SYSERROR("failed to get real path for '%s'", rootfs); + SYSERROR("Failed to get real path for \"%s\".", rootfs); return -1; } if (access(absrootfs, F_OK)) { - SYSERROR("'%s' is not accessible", absrootfs); + SYSERROR("The rootfs \"%s\" is not accessible.", absrootfs); return -1; } if (stat(absrootfs, &s)) { - SYSERROR("failed to stat '%s'", absrootfs); + SYSERROR("Failed to stat the rootfs \"%s\".", absrootfs); return -1; } for (i = 0; i < sizeof(rtfs_type)/sizeof(rtfs_type[0]); i++) { - if (!__S_ISTYPE(s.st_mode, rtfs_type[i].type)) continue; return rtfs_type[i].cb(absrootfs, target, options); } - ERROR("unsupported rootfs type for '%s'", absrootfs); + ERROR("Unsupported rootfs type for rootfs \"%s\".", absrootfs); return -1; } @@ -1186,45 +1110,47 @@ static const struct lxc_devs lxc_devs[] = { { "urandom", S_IFCHR | S_IRWXU | S_IRWXG | S_IRWXO, 1, 9 }, { "random", S_IFCHR | S_IRWXU | S_IRWXG | S_IRWXO, 1, 8 }, { "tty", S_IFCHR | S_IRWXU | S_IRWXG | S_IRWXO, 5, 0 }, - { "console", S_IFCHR | S_IRUSR | S_IWUSR, 5, 1 }, }; -static int fill_autodev(const struct lxc_rootfs *rootfs, bool mount_console) +static int lxc_fill_autodev(const struct lxc_rootfs *rootfs) { int ret; char path[MAXPATHLEN]; int i; mode_t cmask; - INFO("Creating initial consoles under container /dev"); - ret = snprintf(path, MAXPATHLEN, "%s/dev", rootfs->path ? rootfs->mount : ""); if (ret < 0 || ret >= MAXPATHLEN) { ERROR("Error calculating container /dev location"); return -1; } - if (!dir_exists(path)) // ignore, just don't try to fill in + /* ignore, just don't try to fill in */ + if (!dir_exists(path)) return 0; - INFO("Populating container /dev"); + INFO("populating container /dev"); cmask = umask(S_IXUSR | S_IXGRP | S_IXOTH); for (i = 0; i < sizeof(lxc_devs) / sizeof(lxc_devs[0]); i++) { const struct lxc_devs *d = &lxc_devs[i]; - if (!strcmp(d->name, "console") && !mount_console) - continue; - ret = snprintf(path, MAXPATHLEN, "%s/dev/%s", rootfs->path ? rootfs->mount : "", d->name); if (ret < 0 || ret >= MAXPATHLEN) return -1; + ret = mknod(path, d->mode, makedev(d->maj, d->min)); - if (ret && errno != EEXIST) { + if (ret < 0) { char hostpath[MAXPATHLEN]; FILE *pathfile; - // Unprivileged containers cannot create devices, so - // bind mount the device from the host + if (errno == EEXIST) { + DEBUG("\"%s\" device already existed", path); + continue; + } + + /* Unprivileged containers cannot create devices, so + * bind mount the device from the host. + */ ret = snprintf(hostpath, MAXPATHLEN, "/dev/%s", d->name); if (ret < 0 || ret >= MAXPATHLEN) return -1; @@ -1234,54 +1160,62 @@ static int fill_autodev(const struct lxc_rootfs *rootfs, bool mount_console) return -1; } fclose(pathfile); - if (safe_mount(hostpath, path, 0, MS_BIND, NULL, - rootfs->path ? rootfs->mount : NULL) != 0) { - SYSERROR("Failed bind mounting device %s from host into container", - d->name); + if (safe_mount(hostpath, path, 0, MS_BIND, NULL, rootfs->path ? rootfs->mount : NULL) != 0) { + SYSERROR("Failed bind mounting device %s from host into container", d->name); return -1; } + DEBUG("bind mounted \"%s\" onto \"%s\"", hostpath, path); + } else { + DEBUG("created device node \"%s\"", path); } } umask(cmask); - INFO("Populated container /dev"); + INFO("populated container /dev"); return 0; } static int setup_rootfs(struct lxc_conf *conf) { - const struct lxc_rootfs *rootfs = &conf->rootfs; + struct bdev *bdev; + const struct lxc_rootfs *rootfs; + rootfs = &conf->rootfs; if (!rootfs->path) { - if (mount("", "/", NULL, MS_SLAVE|MS_REC, 0)) { - SYSERROR("Failed to make / rslave"); + if (mount("", "/", NULL, MS_SLAVE | MS_REC, 0)) { + SYSERROR("Failed to make / rslave."); return -1; } return 0; } if (access(rootfs->mount, F_OK)) { - SYSERROR("failed to access to '%s', check it is present", + SYSERROR("Failed to access to \"%s\". Check it is present.", rootfs->mount); return -1; } - // First try mounting rootfs using a bdev - struct bdev *bdev = bdev_init(conf, rootfs->path, rootfs->mount, rootfs->options); - if (bdev && bdev->ops->mount(bdev) == 0) { + /* First try mounting rootfs using a bdev. */ + bdev = bdev_init(conf, rootfs->path, rootfs->mount, rootfs->options); + if (bdev && !bdev->ops->mount(bdev)) { bdev_put(bdev); - DEBUG("mounted '%s' on '%s'", rootfs->path, rootfs->mount); + DEBUG("Mounted rootfs \"%s\" onto \"%s\" with options \"%s\".", + rootfs->path, rootfs->mount, + rootfs->options ? rootfs->options : "(null)"); return 0; } if (bdev) bdev_put(bdev); if (mount_rootfs(rootfs->path, rootfs->mount, rootfs->options)) { - ERROR("failed to mount rootfs"); + ERROR("Failed to mount rootfs \"%s\" onto \"%s\" with options \"%s\".", + rootfs->path, rootfs->mount, + rootfs->options ? rootfs->options : "(null)"); return -1; } - DEBUG("mounted '%s' on '%s'", rootfs->path, rootfs->mount); - + DEBUG("Mounted rootfs \"%s\" onto \"%s\" with options \"%s\".", + rootfs->path, rootfs->mount, + rootfs->options ? rootfs->options : "(null)"); return 0; } @@ -1294,23 +1228,23 @@ int prepare_ramfs_root(char *root) char *p2; if (realpath(root, nroot) == NULL) - return -1; + return -errno; if (chdir("/") == -1) - return -1; + return -errno; /* * We could use here MS_MOVE, but in userns this mount is * locked and can't be moved. */ - if (mount(root, "/", NULL, MS_REC | MS_BIND, NULL)) { + if (mount(root, "/", NULL, MS_REC | MS_BIND, NULL) < 0) { SYSERROR("Failed to move %s into /", root); - return -1; + return -errno; } - if (mount(NULL, "/", NULL, MS_REC | MS_PRIVATE, NULL)) { + if (mount(NULL, "/", NULL, MS_REC | MS_PRIVATE, NULL) < 0) { SYSERROR("Failed to make . rprivate"); - return -1; + return -errno; } /* @@ -1376,64 +1310,112 @@ int prepare_ramfs_root(char *root) static int setup_pivot_root(const struct lxc_rootfs *rootfs) { - if (!rootfs->path) + if (!rootfs->path) { + DEBUG("container does not have a rootfs, so not doing pivot root"); return 0; + } if (detect_ramfs_rootfs()) { - if (prepare_ramfs_root(rootfs->mount)) + DEBUG("detected that container is on ramfs"); + if (prepare_ramfs_root(rootfs->mount)) { + ERROR("failed to prepare minimal ramfs root"); return -1; - } else if (setup_rootfs_pivot_root(rootfs->mount)) { - ERROR("failed to setup pivot root"); + } + + DEBUG("prepared ramfs root for container"); + return 0; + } + + if (setup_rootfs_pivot_root(rootfs->mount) < 0) { + ERROR("failed to pivot root"); return -1; } + DEBUG("finished pivot root"); return 0; } -static int setup_pts(int pts) +static int lxc_setup_devpts(int num_pts) { - char target[PATH_MAX]; + int ret; + const char *devpts_mntopts = "newinstance,ptmxmode=0666,mode=0620,gid=5"; - if (!pts) + if (!num_pts) { + DEBUG("no new devpts instance will be mounted since no pts " + "devices are requested"); return 0; - - if (!access("/dev/pts/ptmx", F_OK) && umount("/dev/pts")) { - SYSERROR("failed to umount 'dev/pts'"); - return -1; } - if (mkdir("/dev/pts", 0755)) { - if ( errno != EEXIST ) { - SYSERROR("failed to create '/dev/pts'"); - return -1; + /* Unmount old devpts instance. */ + ret = access("/dev/pts/ptmx", F_OK); + if (!ret) { + ret = umount("/dev/pts"); + if (ret < 0) { + SYSERROR("failed to unmount old devpts instance"); + return -1; } + DEBUG("unmounted old /dev/pts instance"); } - if (mount("devpts", "/dev/pts", "devpts", MS_MGC_VAL, - "newinstance,ptmxmode=0666,mode=0620,gid=5")) { - SYSERROR("failed to mount a new instance of '/dev/pts'"); + /* Create mountpoint for devpts instance. */ + ret = mkdir("/dev/pts", 0755); + if (ret < 0 && errno != EEXIST) { + SYSERROR("failed to create the \"/dev/pts\" directory"); return -1; } - if (access("/dev/ptmx", F_OK)) { - if (!symlink("/dev/pts/ptmx", "/dev/ptmx")) - goto out; - SYSERROR("failed to symlink '/dev/pts/ptmx'->'/dev/ptmx'"); + /* Mount new devpts instance. */ + ret = mount("devpts", "/dev/pts", "devpts", MS_MGC_VAL, devpts_mntopts); + if (ret < 0) { + SYSERROR("failed to mount new devpts instance"); return -1; } - if (realpath("/dev/ptmx", target) && !strcmp(target, "/dev/pts/ptmx")) - goto out; + /* Remove any pre-existing /dev/ptmx file. */ + ret = access("/dev/ptmx", F_OK); + if (!ret) { + ret = remove("/dev/ptmx"); + if (ret < 0) { + SYSERROR("failed to remove existing \"/dev/ptmx\""); + return -1; + } + DEBUG("removed existing \"/dev/ptmx\""); + } - /* fallback here, /dev/pts/ptmx exists just mount bind */ - if (mount("/dev/pts/ptmx", "/dev/ptmx", "none", MS_BIND, 0)) { - SYSERROR("mount failed '/dev/pts/ptmx'->'/dev/ptmx'"); + /* Create dummy /dev/ptmx file as bind mountpoint for /dev/pts/ptmx. */ + ret = open("/dev/ptmx", O_CREAT, 0666); + if (ret < 0) { + SYSERROR("failed to create dummy \"/dev/ptmx\" file as bind mount target"); return -1; } + close(ret); + DEBUG("created dummy \"/dev/ptmx\" file as bind mount target"); - INFO("created new pts instance"); + /* Fallback option: create symlink /dev/ptmx -> /dev/pts/ptmx */ + ret = mount("/dev/pts/ptmx", "/dev/ptmx", NULL, MS_BIND, NULL); + if (!ret) { + DEBUG("bind mounted \"/dev/pts/ptmx\" to \"/dev/ptmx\""); + return 0; + } else { + /* Fallthrough and try to create a symlink. */ + ERROR("failed to bind mount \"/dev/pts/ptmx\" to \"/dev/ptmx\""); + } + + /* Remove the dummy /dev/ptmx file we created above. */ + ret = remove("/dev/ptmx"); + if (ret < 0) { + SYSERROR("failed to remove existing \"/dev/ptmx\""); + return -1; + } + + /* Fallback option: Create symlink /dev/ptmx -> /dev/pts/ptmx. */ + ret = symlink("/dev/pts/ptmx", "/dev/ptmx"); + if (ret < 0) { + SYSERROR("failed to create symlink \"/dev/ptmx\" -> \"/dev/pts/ptmx\""); + return -1; + } + DEBUG("created symlink \"/dev/ptmx\" -> \"/dev/pts/ptmx\""); -out: return 0; } @@ -1454,127 +1436,204 @@ static int setup_personality(int persona) return 0; } -static int setup_dev_console(const struct lxc_rootfs *rootfs, - const struct lxc_console *console) +static int lxc_setup_dev_console(const struct lxc_rootfs *rootfs, + const struct lxc_console *console) { char path[MAXPATHLEN]; int ret, fd; + if (console->path && !strcmp(console->path, "none")) + return 0; + ret = snprintf(path, sizeof(path), "%s/dev/console", rootfs->mount); - if (ret >= sizeof(path)) { - ERROR("console path too long"); + if (ret < 0 || (size_t)ret >= sizeof(path)) return -1; + + /* When we are asked to setup a console we remove any previous + * /dev/console bind-mounts. + */ + if (file_exists(path)) { + ret = lxc_unstack_mountpoint(path, false); + if (ret < 0) { + ERROR("failed to unmount \"%s\": %s", path, strerror(errno)); + return -ret; + } else { + DEBUG("cleared all (%d) mounts from \"%s\"", ret, path); + } + ret = unlink(path); + if (ret < 0) { + SYSERROR("error unlinking %s", path); + return -errno; + } } + /* For unprivileged containers autodev or automounts will already have + * taken care of creating /dev/console. + */ fd = open(path, O_CREAT | O_EXCL, S_IXUSR | S_IXGRP | S_IXOTH); if (fd < 0) { if (errno != EEXIST) { SYSERROR("failed to create console"); - return -1; + return -errno; } } else { close(fd); } - if (console->master < 0) { - INFO("no console"); - return 0; - } - if (chmod(console->name, S_IXUSR | S_IXGRP | S_IXOTH)) { - SYSERROR("failed to set mode '0%o' to '%s'", - S_IXUSR | S_IXGRP | S_IXOTH, console->name); - return -1; + SYSERROR("failed to set mode '0%o' to '%s'", S_IXUSR | S_IXGRP | S_IXOTH, console->name); + return -errno; } - if (safe_mount(console->name, path, "none", MS_BIND, 0, rootfs->mount)) { + if (safe_mount(console->name, path, "none", MS_BIND, 0, rootfs->mount) < 0) { ERROR("failed to mount '%s' on '%s'", console->name, path); return -1; } - INFO("console has been setup"); + DEBUG("mounted pts device \"%s\" onto \"%s\"", console->name, path); return 0; } -static int setup_ttydir_console(const struct lxc_rootfs *rootfs, - const struct lxc_console *console, - char *ttydir) +static int lxc_setup_ttydir_console(const struct lxc_rootfs *rootfs, + const struct lxc_console *console, + char *ttydir) { - char path[MAXPATHLEN], lxcpath[MAXPATHLEN]; int ret; + char path[MAXPATHLEN], lxcpath[MAXPATHLEN]; /* create rootfs/dev/<ttydir> directory */ - ret = snprintf(path, sizeof(path), "%s/dev/%s", rootfs->mount, - ttydir); - if (ret >= sizeof(path)) + ret = snprintf(path, sizeof(path), "%s/dev/%s", rootfs->mount, ttydir); + if (ret < 0 || (size_t)ret >= sizeof(path)) return -1; + ret = mkdir(path, 0755); if (ret && errno != EEXIST) { SYSERROR("failed with errno %d to create %s", errno, path); - return -1; - } - INFO("created %s", path); - - ret = snprintf(lxcpath, sizeof(lxcpath), "%s/dev/%s/console", - rootfs->mount, ttydir); - if (ret >= sizeof(lxcpath)) { - ERROR("console path too long"); - return -1; + return -errno; } + DEBUG("created directory for console and tty devices at \%s\"", path); - snprintf(path, sizeof(path), "%s/dev/console", rootfs->mount); - ret = unlink(path); - if (ret && errno != ENOENT) { - SYSERROR("error unlinking %s", path); + ret = snprintf(lxcpath, sizeof(lxcpath), "%s/dev/%s/console", rootfs->mount, ttydir); + if (ret < 0 || (size_t)ret >= sizeof(lxcpath)) return -1; - } ret = creat(lxcpath, 0660); - if (ret==-1 && errno != EEXIST) { + if (ret == -1 && errno != EEXIST) { SYSERROR("error %d creating %s", errno, lxcpath); - return -1; + return -errno; } if (ret >= 0) close(ret); - if (console->master < 0) { - INFO("no console"); - return 0; - } - - if (safe_mount(console->name, lxcpath, "none", MS_BIND, 0, rootfs->mount)) { - ERROR("failed to mount '%s' on '%s'", console->name, lxcpath); + ret = snprintf(path, sizeof(path), "%s/dev/console", rootfs->mount); + if (ret < 0 || (size_t)ret >= sizeof(lxcpath)) return -1; + + /* When we are asked to setup a console we remove any previous + * /dev/console bind-mounts. + */ + if (console->path && !strcmp(console->path, "none")) { + struct stat st; + ret = stat(path, &st); + if (ret < 0) { + if (errno == ENOENT) + return 0; + SYSERROR("failed stat() \"%s\"", path); + return -errno; + } + + /* /dev/console must be character device with major number 5 and + * minor number 1. If not, give benefit of the doubt and assume + * the user has mounted something else right there on purpose. + */ + if (((st.st_mode & S_IFMT) != S_IFCHR) || major(st.st_rdev) != 5 || minor(st.st_rdev) != 1) + return 0; + + /* In case the user requested a bind-mount for /dev/console and + * requests a ttydir we move the mount to the + * /dev/<ttydir/console. + * Note, we only move the uppermost mount and clear all other + * mounts underneath for safety. + * If it is a character device created via mknod() we simply + * rename it. + */ + ret = safe_mount(path, lxcpath, "none", MS_MOVE, NULL, rootfs->mount); + if (ret < 0) { + if (errno != EINVAL) { + ERROR("failed to MS_MOVE \"%s\" to \"%s\": %s", path, lxcpath, strerror(errno)); + return -errno; + } + /* path was not a mountpoint */ + ret = rename(path, lxcpath); + if (ret < 0) { + ERROR("failed to rename \"%s\" to \"%s\": %s", path, lxcpath, strerror(errno)); + return -errno; + } + DEBUG("renamed \"%s\" to \"%s\"", path, lxcpath); + } else { + DEBUG("moved mount \"%s\" to \"%s\"", path, lxcpath); + } + + /* Clear all remaining bind-mounts. */ + ret = lxc_unstack_mountpoint(path, false); + if (ret < 0) { + ERROR("failed to unmount \"%s\": %s", path, strerror(errno)); + return -ret; + } else { + DEBUG("cleared all (%d) mounts from \"%s\"", ret, path); + } + } else { + if (file_exists(path)) { + ret = lxc_unstack_mountpoint(path, false); + if (ret < 0) { + ERROR("failed to unmount \"%s\": %s", path, strerror(errno)); + return -ret; + } else { + DEBUG("cleared all (%d) mounts from \"%s\"", ret, path); + } + } + + if (safe_mount(console->name, lxcpath, "none", MS_BIND, 0, rootfs->mount) < 0) { + ERROR("failed to mount '%s' on '%s'", console->name, lxcpath); + return -1; + } + DEBUG("mounted \"%s\" onto \"%s\"", console->name, lxcpath); } - /* create symlink from rootfs/dev/console to 'lxc/console' */ + /* create symlink from rootfs /dev/console to '<ttydir>/console' */ ret = snprintf(lxcpath, sizeof(lxcpath), "%s/console", ttydir); - if (ret >= sizeof(lxcpath)) { - ERROR("lxc/console path too long"); + if (ret < 0 || (size_t)ret >= sizeof(lxcpath)) return -1; + + ret = unlink(path); + if (ret && errno != ENOENT) { + SYSERROR("error unlinking %s", path); + return -errno; } + ret = symlink(lxcpath, path); - if (ret) { - SYSERROR("failed to create symlink for console"); + if (ret < 0) { + SYSERROR("failed to create symlink for console from \"%s\" to \"%s\"", lxcpath, path); return -1; } - INFO("console has been setup on %s", lxcpath); - + DEBUG("console has been setup under \"%s\" and symlinked to \"%s\"", lxcpath, path); return 0; } -static int setup_console(const struct lxc_rootfs *rootfs, - const struct lxc_console *console, - char *ttydir) +static int lxc_setup_console(const struct lxc_rootfs *rootfs, + const struct lxc_console *console, char *ttydir) { - /* We don't have a rootfs, /dev/console will be shared */ - if (!rootfs->path) + /* We don't have a rootfs, /dev/console will be shared. */ + if (!rootfs->path) { + DEBUG("/dev/console will be shared with the host"); return 0; + } + if (!ttydir) - return setup_dev_console(rootfs, console); + return lxc_setup_dev_console(rootfs, console); - return setup_ttydir_console(rootfs, console, ttydir); + return lxc_setup_ttydir_console(rootfs, console, ttydir); } static int setup_kmsg(const struct lxc_rootfs *rootfs, @@ -3019,7 +3078,7 @@ bool lxc_delete_network(struct lxc_handler *handler) /* Explicitly delete host veth device to prevent lingering * devices. We had issues in LXD around this. */ - if (netdev->type == LXC_NET_VETH) { + if (netdev->type == LXC_NET_VETH && !am_unpriv()) { char *hostveth; if (netdev->priv.veth_attr.pair) { hostveth = netdev->priv.veth_attr.pair; @@ -3028,8 +3087,6 @@ bool lxc_delete_network(struct lxc_handler *handler) WARN("Failed to remove interface \"%s\" from host: %s.", hostveth, strerror(-ret)); } else { INFO("Removed interface \"%s\" from host.", hostveth); - free(netdev->priv.veth_attr.pair); - netdev->priv.veth_attr.pair = NULL; } } else if (strlen(netdev->priv.veth_attr.veth1) > 0) { hostveth = netdev->priv.veth_attr.veth1; @@ -3058,20 +3115,21 @@ static int unpriv_assign_nic(const char *lxcpath, char *lxcname, int bytes, pipefd[2]; char *token, *saveptr = NULL; char buffer[MAX_BUFFER_SIZE]; - char netdev_link[IFNAMSIZ+1]; + char netdev_link[IFNAMSIZ + 1]; if (netdev->type != LXC_NET_VETH) { ERROR("nic type %d not support for unprivileged use", - netdev->type); + netdev->type); return -1; } - if(pipe(pipefd) < 0) { + if (pipe(pipefd) < 0) { SYSERROR("pipe failed"); return -1; } - if ((child = fork()) < 0) { + child = fork(); + if (child < 0) { SYSERROR("fork"); close(pipefd[0]); close(pipefd[1]); @@ -3079,35 +3137,45 @@ static int unpriv_assign_nic(const char *lxcpath, char *lxcname, } if (child == 0) { // child - /* close the read-end of the pipe */ - close(pipefd[0]); - /* redirect the stdout to write-end of the pipe */ - dup2(pipefd[1], STDOUT_FILENO); - /* close the write-end of the pipe */ - close(pipefd[1]); + /* Call lxc-user-nic pid type bridge. */ + int ret; + char pidstr[LXC_NUMSTRLEN64]; - // Call lxc-user-nic pid type bridge - char pidstr[20]; - if (netdev->link) { + close(pipefd[0]); /* Close the read-end of the pipe. */ + + /* Redirect stdout to write-end of the pipe. */ + ret = dup2(pipefd[1], STDOUT_FILENO); + close(pipefd[1]); /* Close the write-end of the pipe. */ + if (ret < 0) { + SYSERROR("Failed to dup2() to redirect stdout to pipe file descriptor."); + exit(EXIT_FAILURE); + } + + if (netdev->link) strncpy(netdev_link, netdev->link, IFNAMSIZ); - } else { + else strncpy(netdev_link, "none", IFNAMSIZ); - } - snprintf(pidstr, 19, "%lu", (unsigned long) pid); - pidstr[19] = '\0'; + + ret = snprintf(pidstr, LXC_NUMSTRLEN64, "%d", pid); + if (ret < 0 || ret >= LXC_NUMSTRLEN64) + exit(EXIT_FAILURE); + pidstr[LXC_NUMSTRLEN64 - 1] = '\0'; + + INFO("Execing lxc-user-nic %s %s %s veth %s %s", lxcpath, + lxcname, pidstr, netdev_link, netdev->name); execlp(LXC_USERNIC_PATH, LXC_USERNIC_PATH, lxcpath, lxcname, - pidstr, "veth", netdev_link, netdev->name, NULL); - SYSERROR("execvp lxc-user-nic"); - exit(1); + pidstr, "veth", netdev_link, netdev->name, NULL); + + SYSERROR("Failed to exec lxc-user-nic."); + exit(EXIT_FAILURE); } /* close the write-end of the pipe */ close(pipefd[1]); bytes = read(pipefd[0], &buffer, MAX_BUFFER_SIZE); - if (bytes < 0) { - SYSERROR("read failed"); - } + if (bytes < 0) + SYSERROR("Failed to read from pipe file descriptor."); buffer[bytes - 1] = '\0'; if (wait_for_pid(child) != 0) { @@ -3122,21 +3190,23 @@ static int unpriv_assign_nic(const char *lxcpath, char *lxcname, token = strtok_r(buffer, ":", &saveptr); if (!token) return -1; - netdev->name = malloc(IFNAMSIZ+1); + + netdev->name = malloc(IFNAMSIZ + 1); if (!netdev->name) { - ERROR("Out of memory"); + SYSERROR("Failed to allocate memory."); return -1; } - memset(netdev->name, 0, IFNAMSIZ+1); + memset(netdev->name, 0, IFNAMSIZ + 1); strncpy(netdev->name, token, IFNAMSIZ); /* fill netdev->veth_attr.pair field */ token = strtok_r(NULL, ":", &saveptr); if (!token) return -1; + netdev->priv.veth_attr.pair = strdup(token); if (!netdev->priv.veth_attr.pair) { - ERROR("Out of memory"); + ERROR("Failed to allocate memory."); return -1; } @@ -3214,75 +3284,145 @@ static int write_id_mapping(enum idtype idtype, pid_t pid, const char *buf, return ret < 0 ? ret : closeret; } +/* Check whether a binary exist and has either CAP_SETUID, CAP_SETGID or both. */ +static int idmaptool_on_path_and_privileged(const char *binary, cap_value_t cap) +{ + char *path; + int ret; + struct stat st; + int fret = 0; + + path = on_path(binary, NULL); + if (!path) + return -ENOENT; + + ret = stat(path, &st); + if (ret < 0) { + fret = -errno; + goto cleanup; + } + + /* Check if the binary is setuid. */ + if (st.st_mode & S_ISUID) { + DEBUG("The binary \"%s\" does have the setuid bit set.", path); + fret = 1; + goto cleanup; + } + + #if HAVE_LIBCAP && LIBCAP_SUPPORTS_FILE_CAPABILITIES + /* Check if it has the CAP_SETUID capability. */ + if ((cap & CAP_SETUID) && + lxc_file_cap_is_set(path, CAP_SETUID, CAP_EFFECTIVE) && + lxc_file_cap_is_set(path, CAP_SETUID, CAP_PERMITTED)) { + DEBUG("The binary \"%s\" has CAP_SETUID in its CAP_EFFECTIVE " + "and CAP_PERMITTED sets.", path); + fret = 1; + goto cleanup; + } + + /* Check if it has the CAP_SETGID capability. */ + if ((cap & CAP_SETGID) && + lxc_file_cap_is_set(path, CAP_SETGID, CAP_EFFECTIVE) && + lxc_file_cap_is_set(path, CAP_SETGID, CAP_PERMITTED)) { + DEBUG("The binary \"%s\" has CAP_SETGID in its CAP_EFFECTIVE " + "and CAP_PERMITTED sets.", path); + fret = 1; + goto cleanup; + } + #else + /* If we cannot check for file capabilities we need to give the benefit + * of the doubt. Otherwise we might fail even though all the necessary + * file capabilities are set. + */ + DEBUG("Cannot check for file capabilites as full capability support is " + "missing. Manual intervention needed."); + fret = 1; + #endif + +cleanup: + free(path); + return fret; +} + int lxc_map_ids(struct lxc_list *idmap, pid_t pid) { - struct lxc_list *iterator; struct id_map *map; - int ret = 0, use_shadow = 0; + struct lxc_list *iterator; enum idtype type; - char *buf = NULL, *pos, *cmdpath = NULL; + char *pos; + int euid; + int ret = 0, use_shadow = 0; + int uidmap = 0, gidmap = 0; + char *buf = NULL; - /* - * If newuidmap exists, that is, if shadow is handing out subuid - * ranges, then insist that root also reserve ranges in subuid. This + euid = geteuid(); + + /* If new{g,u}idmap exists, that is, if shadow is handing out subuid + * ranges, then insist that root also reserve ranges in subuid. This * will protected it by preventing another user from being handed the * range by shadow. */ - cmdpath = on_path("newuidmap", NULL); - if (cmdpath) { - use_shadow = 1; - free(cmdpath); - } - - if (!use_shadow && geteuid()) { - ERROR("Missing newuidmap/newgidmap"); + uidmap = idmaptool_on_path_and_privileged("newuidmap", CAP_SETUID); + gidmap = idmaptool_on_path_and_privileged("newgidmap", CAP_SETGID); + if (uidmap > 0 && gidmap > 0) { + DEBUG("Functional newuidmap and newgidmap binary found."); + use_shadow = true; + } else if (uidmap == -ENOENT && gidmap == -ENOENT && !euid) { + DEBUG("No newuidmap and newgidmap binary found. Trying to " + "write directly with euid 0."); + use_shadow = false; + } else { + DEBUG("Either one or both of the newuidmap and newgidmap " + "binaries do not exist or are missing necessary " + "privilege."); return -1; } - for(type = ID_TYPE_UID; type <= ID_TYPE_GID; type++) { + for (type = ID_TYPE_UID; type <= ID_TYPE_GID; type++) { int left, fill; - int had_entry = 0; + bool had_entry = false; if (!buf) { - buf = pos = malloc(4096); + buf = pos = malloc(LXC_IDMAPLEN); if (!buf) return -ENOMEM; } pos = buf; if (use_shadow) - pos += sprintf(buf, "new%cidmap %d", - type == ID_TYPE_UID ? 'u' : 'g', - pid); + pos += sprintf(buf, "new%cidmap %d", type == ID_TYPE_UID ? 'u' : 'g', pid); lxc_list_for_each(iterator, idmap) { - /* The kernel only takes <= 4k for writes to /proc/<nr>/[ug]id_map */ + /* The kernel only takes <= 4k for writes to + * /proc/<nr>/[ug]id_map + */ map = iterator->elem; if (map->idtype != type) continue; - had_entry = 1; - left = 4096 - (pos - buf); + had_entry = true; + + left = LXC_IDMAPLEN - (pos - buf); fill = snprintf(pos, left, "%s%lu %lu %lu%s", - use_shadow ? " " : "", - map->nsid, map->hostid, map->range, + use_shadow ? " " : "", map->nsid, + map->hostid, map->range, use_shadow ? "" : "\n"); if (fill <= 0 || fill >= left) - SYSERROR("snprintf failed, too many mappings"); + SYSERROR("Too many {g,u}id mappings defined."); + pos += fill; } if (!had_entry) continue; if (!use_shadow) { - ret = write_id_mapping(type, pid, buf, pos-buf); + ret = write_id_mapping(type, pid, buf, pos - buf); } else { - left = 4096 - (pos - buf); + left = LXC_IDMAPLEN - (pos - buf); fill = snprintf(pos, left, "\n"); if (fill <= 0 || fill >= left) - SYSERROR("snprintf failed, too many mappings"); + SYSERROR("Too many {g,u}id mappings defined."); pos += fill; ret = system(buf); } - if (ret) break; } @@ -3617,20 +3757,21 @@ int ttys_shift_ids(struct lxc_conf *c) return 0; } -/* NOTE: not to be called from inside the container namespace! */ -int tmp_proc_mount(struct lxc_conf *lxc_conf) +/* NOTE: Must not be called from inside the container namespace! */ +int lxc_create_tmp_proc_mount(struct lxc_conf *conf) { int mounted; - mounted = mount_proc_if_needed(lxc_conf->rootfs.path ? lxc_conf->rootfs.mount : ""); + mounted = lxc_mount_proc_if_needed(conf->rootfs.path ? conf->rootfs.mount : ""); if (mounted == -1) { - SYSERROR("failed to mount /proc in the container."); + SYSERROR("failed to mount /proc in the container"); /* continue only if there is no rootfs */ - if (lxc_conf->rootfs.path) + if (conf->rootfs.path) return -1; } else if (mounted == 1) { - lxc_conf->tmp_umount_proc = 1; + conf->tmp_umount_proc = 1; } + return 0; } @@ -3892,19 +4033,17 @@ int lxc_setup(struct lxc_handler *handler) } if (lxc_conf->autodev > 0) { - bool mount_console = lxc_conf->console.path && !strcmp(lxc_conf->console.path, "none"); - if (run_lxc_hooks(name, "autodev", lxc_conf, lxcpath, NULL)) { ERROR("failed to run autodev hooks for container '%s'.", name); return -1; } - if (fill_autodev(&lxc_conf->rootfs, mount_console)) { + if (lxc_fill_autodev(&lxc_conf->rootfs)) { ERROR("failed to populate /dev in the container"); return -1; } } - if (!lxc_conf->is_execute && setup_console(&lxc_conf->rootfs, &lxc_conf->console, lxc_conf->ttydir)) { + if (!lxc_conf->is_execute && lxc_setup_console(&lxc_conf->rootfs, &lxc_conf->console, lxc_conf->ttydir)) { ERROR("failed to setup the console for '%s'", name); return -1; } @@ -3920,7 +4059,7 @@ int lxc_setup(struct lxc_handler *handler) } /* mount /proc if it's not already there */ - if (tmp_proc_mount(lxc_conf) < 0) { + if (lxc_create_tmp_proc_mount(lxc_conf) < 0) { ERROR("failed to LSM mount proc for '%s'", name); return -1; } @@ -3930,7 +4069,7 @@ int lxc_setup(struct lxc_handler *handler) return -1; } - if (setup_pts(lxc_conf->pts)) { + if (lxc_setup_devpts(lxc_conf->pts)) { ERROR("failed to setup the new pts instance"); return -1; } @@ -4151,10 +4290,14 @@ int lxc_clear_cgroups(struct lxc_conf *c, const char *key) { struct lxc_list *it,*next; bool all = false; - const char *k = key + 11; + const char *k = NULL; if (strcmp(key, "lxc.cgroup") == 0) all = true; + else if (strncmp(key, "lxc.cgroup.", sizeof("lxc.cgroup.")-1) == 0) + k = key + sizeof("lxc.cgroup.")-1; + else + return -1; lxc_list_for_each_safe(it, &c->cgroup, next) { struct lxc_cgroup *cg = it->elem; @@ -4216,11 +4359,15 @@ int lxc_clear_hooks(struct lxc_conf *c, const char *key) { struct lxc_list *it,*next; bool all = false, done = false; - const char *k = key + 9; + const char *k = NULL; int i; if (strcmp(key, "lxc.hook") == 0) all = true; + else if (strncmp(key, "lxc.hook.", sizeof("lxc.hook.")-1) == 0) + k = key + sizeof("lxc.hook.")-1; + else + return -1; for (i=0; i<NUM_LXC_HOOKS; i++) { if (all || strcmp(k, lxchook_names[i]) == 0) { @@ -4548,7 +4695,7 @@ void suggest_default_idmap(void) } fclose(f); - f = fopen(subuidfile, "r"); + f = fopen(subgidfile, "r"); if (!f) { ERROR("Your system is not configured with subgids"); free(gname); diff --git a/src/lxc/confile.c b/src/lxc/confile.c index 2abde72..9b22c6d 100644 --- a/src/lxc/confile.c +++ b/src/lxc/confile.c @@ -790,6 +790,9 @@ static int config_network_ipv4(const char *key, const char *value, struct lxc_list *list; char *cursor, *slash, *addr = NULL, *bcast = NULL, *prefix = NULL; + if (!value || !strlen(value)) + return lxc_clear_config_item(lxc_conf, key); + netdev = network_netdev(key, value, &lxc_conf->network); if (!netdev) return -1; @@ -917,6 +920,9 @@ static int config_network_ipv6(const char *key, const char *value, char *slash,*valdup; char *netmask; + if (!value || !strlen(value)) + return lxc_clear_config_item(lxc_conf, key); + netdev = network_netdev(key, value, &lxc_conf->network); if (!netdev) return -1; @@ -2873,21 +2879,21 @@ next: } \ } -static void new_hwaddr(char *hwaddr) +static bool new_hwaddr(char *hwaddr) { - FILE *f; - f = fopen("/dev/urandom", "r"); - if (f) { - unsigned int seed; - int ret = fread(&seed, sizeof(seed), 1, f); - if (ret != 1) - seed = time(NULL); - fclose(f); - srand(seed); - } else - srand(time(NULL)); - snprintf(hwaddr, 18, "00:16:3e:%02x:%02x:%02x", - rand() % 255, rand() % 255, rand() % 255); + int ret; + + /* COMMENT(brauner): Initialize random number generator. */ + (void)randseed(true); + + ret = snprintf(hwaddr, 18, "00:16:3e:%02x:%02x:%02x", rand() % 255, + rand() % 255, rand() % 255); + if (ret < 0 || ret >= 18) { + SYSERROR("Failed to call snprintf()."); + return false; + } + + return true; } /* @@ -2909,27 +2915,33 @@ bool network_new_hwaddrs(struct lxc_conf *conf) if (!conf->unexpanded_config) return true; + while (*lstart) { char newhwaddr[18], oldhwaddr[17]; + lend = strchr(lstart, '\n'); if (!lend) lend = lstart + strlen(lstart); else lend++; + if (strncmp(lstart, key, strlen(key)) != 0) { lstart = lend; continue; } + p = strchr(lstart+strlen(key), '='); if (!p) { lstart = lend; continue; } + p++; while (isblank(*p)) p++; if (!*p) return true; + p2 = p; while (*p2 && !isblank(*p2) && *p2 != '\n') p2++; @@ -2938,8 +2950,12 @@ bool network_new_hwaddrs(struct lxc_conf *conf) lstart = lend; continue; } + memcpy(oldhwaddr, p, 17); - new_hwaddr(newhwaddr); + + if (!new_hwaddr(newhwaddr)) + return false; + memcpy(p, newhwaddr, 17); lxc_list_for_each(it, &conf->network) { struct lxc_netdev *n = it->elem; @@ -2949,6 +2965,7 @@ bool network_new_hwaddrs(struct lxc_conf *conf) lstart = lend; } + return true; } diff --git a/src/lxc/console.c b/src/lxc/console.c index 908ead0..3baaed4 100644 --- a/src/lxc/console.c +++ b/src/lxc/console.c @@ -257,6 +257,14 @@ int lxc_setup_tios(int fd, struct termios *oldtios) return -1; } + /* ensure we don't end up in an endless loop: + * The kernel might fire SIGTTOU while an + * ioctl() in tcsetattr() is executed. When the ioctl() + * is resumed and retries, the signal handler interrupts it again. + */ + signal (SIGTTIN, SIG_IGN); + signal (SIGTTOU, SIG_IGN); + newtios = *oldtios; /* We use the same settings that ssh does. */ @@ -265,7 +273,7 @@ int lxc_setup_tios(int fd, struct termios *oldtios) #ifdef IUCLC newtios.c_iflag &= ~IUCLC; #endif - newtios.c_lflag &= ~(ISIG | ICANON | ECHO | ECHOE | ECHOK | ECHONL); + newtios.c_lflag &= ~(TOSTOP | ISIG | ICANON | ECHO | ECHOE | ECHOK | ECHONL); #ifdef IEXTEN newtios.c_lflag &= ~IEXTEN; #endif @@ -407,16 +415,17 @@ void lxc_console_free(struct lxc_conf *conf, int fd) } } -static void lxc_console_peer_default(struct lxc_console *console) +static int lxc_console_peer_default(struct lxc_console *console) { struct lxc_tty_state *ts; const char *path = console->path; + int fd; + int ret = 0; - /* if no console was given, try current controlling terminal, there - * won't be one if we were started as a daemon (-d) + /* If no console was given, try current controlling terminal, there + * won't be one if we were started as a daemon (-d). */ if (!path && !access("/dev/tty", F_OK)) { - int fd; fd = open("/dev/tty", O_RDWR); if (fd >= 0) { close(fd); @@ -424,25 +433,29 @@ static void lxc_console_peer_default(struct lxc_console *console) } } - if (!path) - goto out; - - DEBUG("opening %s for console peer", path); - console->peer = lxc_unpriv(open(path, O_CLOEXEC | O_RDWR | O_CREAT | - O_APPEND, 0600)); - if (console->peer < 0) + if (!path) { + errno = ENOTTY; + DEBUG("process does not have a controlling terminal"); goto out; + } - DEBUG("using '%s' as console", path); + console->peer = lxc_unpriv(open(path, O_CLOEXEC | O_RDWR | O_CREAT | O_APPEND, 0600)); + if (console->peer < 0) { + ERROR("failed to open \"%s\"", path); + return -ENOTTY; + } + DEBUG("using \"%s\" as peer tty device", path); - if (!isatty(console->peer)) - goto err1; + if (!isatty(console->peer)) { + ERROR("file descriptor for file \"%s\" does not refer to a tty device", path); + goto on_error1; + } ts = lxc_console_sigwinch_init(console->peer, console->master); console->tty_state = ts; if (!ts) { - WARN("Unable to install SIGWINCH"); - goto err1; + WARN("unable to install SIGWINCH handler"); + goto on_error1; } lxc_console_winsz(console->peer, console->master); @@ -450,23 +463,27 @@ static void lxc_console_peer_default(struct lxc_console *console) console->tios = malloc(sizeof(*console->tios)); if (!console->tios) { SYSERROR("failed to allocate memory"); - goto err1; + ret = -ENOMEM; + goto on_error1; } if (lxc_setup_tios(console->peer, console->tios) < 0) - goto err2; - - return; + goto on_error2; + else + goto out; -err2: +on_error2: free(console->tios); console->tios = NULL; -err1: + ret = -ENOTTY; + +on_error1: close(console->peer); console->peer = -1; + ret = -ENOTTY; + out: - DEBUG("no console peer"); - return; + return ret; } void lxc_console_delete(struct lxc_console *console) @@ -495,21 +512,24 @@ int lxc_console_create(struct lxc_conf *conf) int ret; if (conf->is_execute) { - INFO("no console for lxc-execute."); + INFO("not allocating a console device for lxc-execute."); return 0; } - if (!conf->rootfs.path) + if (!conf->rootfs.path) { + INFO("container does not have a rootfs, console device will be shared with the host"); return 0; + } - if (console->path && !strcmp(console->path, "none")) + if (console->path && !strcmp(console->path, "none")) { + INFO("no console requested"); return 0; + } process_lock(); - ret = openpty(&console->master, &console->slave, - console->name, NULL, NULL); + ret = openpty(&console->master, &console->slave, console->name, NULL, NULL); process_unlock(); - if (ret) { + if (ret < 0) { SYSERROR("failed to allocate a pty"); return -1; } @@ -524,17 +544,19 @@ int lxc_console_create(struct lxc_conf *conf) goto err; } - lxc_console_peer_default(console); + ret = lxc_console_peer_default(console); + if (ret < 0) { + ERROR("failed to allocate peer tty device"); + goto err; + } if (console->log_path) { - console->log_fd = lxc_unpriv(open(console->log_path, - O_CLOEXEC | O_RDWR | - O_CREAT | O_APPEND, 0600)); + console->log_fd = lxc_unpriv(open(console->log_path, O_CLOEXEC | O_RDWR | O_CREAT | O_APPEND, 0600)); if (console->log_fd < 0) { - SYSERROR("failed to open '%s'", console->log_path); + SYSERROR("failed to open console log file \"%s\"", console->log_path); goto err; } - DEBUG("using '%s' as console log", console->log_path); + DEBUG("using \"%s\" as console log file", console->log_path); } return 0; diff --git a/src/lxc/criu.c b/src/lxc/criu.c index 8a0702f..d757bef 100644 --- a/src/lxc/criu.c +++ b/src/lxc/criu.c @@ -334,8 +334,18 @@ static void exec_criu(struct criu_opts *opts) goto err; while (getmntent_r(mnts, &mntent, buf, sizeof(buf))) { - char *fmt, *key, *val; + char *fmt, *key, *val, *mntdata; char arg[2 * PATH_MAX + 2]; + unsigned long flags; + + if (parse_mntopts(mntent.mnt_opts, &flags, &mntdata) < 0) + goto err; + + free(mntdata); + + /* only add --ext-mount-map for actual bind mounts */ + if (!(flags & MS_BIND)) + continue; if (strcmp(opts->action, "dump") == 0) { fmt = "/%s:%s"; diff --git a/src/lxc/log.c b/src/lxc/log.c index 678bec7..c9b54dc 100644 --- a/src/lxc/log.c +++ b/src/lxc/log.c @@ -106,7 +106,7 @@ int lxc_unix_epoch_to_utc(char *buf, size_t bufsize, const struct timespec *time yoe = (doe - doe / 1460 + doe / 36524 - doe / 146096) / 365; /* Given year-of-era, and era, one can now compute the year. */ - year = (yoe) + era * 400; + year = yoe + era * 400; /* Also the day-of-year, again with the year beginning on Mar. 1, can be * computed from the day-of-era and year-of-era. @@ -126,6 +126,11 @@ int lxc_unix_epoch_to_utc(char *buf, size_t bufsize, const struct timespec *time */ month = mp + (mp < 10 ? 3 : -9); + /* The algorithm assumes that a year begins on 1 March, so add 1 before + * that. */ + if (month < 3) + year++; + /* Transform days in the epoch to seconds. */ d_in_s = epoch_to_days * 86400; diff --git a/src/lxc/lxc_user_nic.c b/src/lxc/lxc_user_nic.c index 409a53a..c93b4cc 100644 --- a/src/lxc/lxc_user_nic.c +++ b/src/lxc/lxc_user_nic.c @@ -18,37 +18,44 @@ */ #define _GNU_SOURCE /* See feature_test_macros(7) */ +#include <alloca.h> +#include <ctype.h> +#include <errno.h> +#include <fcntl.h> +#include <grp.h> +#include <pwd.h> +#include <sched.h> +#include <stdbool.h> #include <stdio.h> #include <stdlib.h> -#include <stdbool.h> -#include <sys/types.h> -#include <pwd.h> -#include <grp.h> -#include <unistd.h> -#include <fcntl.h> -#include <sys/file.h> -#include <alloca.h> #include <string.h> -#include <sched.h> -#include <sys/mman.h> -#include <sys/socket.h> -#include <errno.h> -#include <ctype.h> -#include <sys/stat.h> -#include <sys/ioctl.h> -#include <linux/netlink.h> +#include <unistd.h> #include <arpa/inet.h> -#include <net/if.h> -#include <net/if_arp.h> -#include <netinet/in.h> #include <linux/netlink.h> #include <linux/rtnetlink.h> #include <linux/sockios.h> +#include <net/if.h> +#include <net/if_arp.h> +#include <netinet/in.h> +#include <sys/file.h> +#include <sys/ioctl.h> +#include <sys/mman.h> #include <sys/param.h> +#include <sys/socket.h> +#include <sys/stat.h> +#include <sys/types.h> #include "config.h" -#include "utils.h" #include "network.h" +#include "utils.h" + +#define usernic_debug_stream(stream, format, ...) \ + do { \ + fprintf(stream, "%s: %d: %s: " format, __FILE__, __LINE__, \ + __func__, __VA_ARGS__); \ + } while (false) + +#define usernic_error(format, ...) usernic_debug_stream(stderr, format, __VA_ARGS__) static void usage(char *me, bool fail) { @@ -66,9 +73,8 @@ static int open_and_lock(char *path) fd = open(path, O_RDWR|O_CREAT, S_IWUSR | S_IRUSR); if (fd < 0) { - fprintf(stderr, "Failed to open %s: %s\n", - path, strerror(errno)); - return(fd); + usernic_error("Failed to open %s: %s.\n", path, strerror(errno)); + return -1; } lk.l_type = F_WRLCK; @@ -76,8 +82,7 @@ static int open_and_lock(char *path) lk.l_start = 0; lk.l_len = 0; if (fcntl(fd, F_SETLKW, &lk) < 0) { - fprintf(stderr, "Failed to lock %s: %s\n", - path, strerror(errno)); + usernic_error("Failed to lock %s: %s.\n", path, strerror(errno)); close(fd); return -1; } @@ -88,10 +93,11 @@ static int open_and_lock(char *path) static char *get_username(void) { - struct passwd *pwd = getpwuid(getuid()); + struct passwd *pwd; - if (pwd == NULL) { - perror("getpwuid"); + pwd = getpwuid(getuid()); + if (!pwd) { + usernic_error("Failed to call get username: %s.\n", strerror(errno)); return NULL; } @@ -101,10 +107,13 @@ static char *get_username(void) static void free_groupnames(char **groupnames) { int i; + if (!groupnames) return; + for (i = 0; groupnames[i]; i++) free(groupnames[i]); + free(groupnames); } @@ -117,53 +126,56 @@ static char **get_groupnames(void) struct group *gr; ngroups = getgroups(0, NULL); - - if (ngroups == -1) { - fprintf(stderr, "Failed to get number of groups user belongs to: %s\n", strerror(errno)); + if (ngroups < 0) { + usernic_error( + "Failed to get number of groups the user belongs to: %s.\n", + strerror(errno)); return NULL; } if (ngroups == 0) return NULL; - group_ids = (gid_t *)malloc(sizeof(gid_t)*ngroups); - - if (group_ids == NULL) { - fprintf(stderr, "Out of memory while getting groups the user belongs to\n"); + group_ids = malloc(sizeof(gid_t) * ngroups); + if (!group_ids) { + usernic_error("Failed to allocate memory while getting groups " + "the user belongs to: %s.\n", + strerror(errno)); return NULL; } ret = getgroups(ngroups, group_ids); - if (ret < 0) { free(group_ids); - fprintf(stderr, "Failed to get process groups: %s\n", strerror(errno)); + usernic_error("Failed to get process groups: %s.\n", + strerror(errno)); return NULL; } - groupnames = (char **)malloc(sizeof(char *)*(ngroups+1)); - - if (groupnames == NULL) { + groupnames = malloc(sizeof(char *) * (ngroups + 1)); + if (!groupnames) { free(group_ids); - fprintf(stderr, "Out of memory while getting group names\n"); + usernic_error("Failed to allocate memory while getting group " + "names: %s.\n", + strerror(errno)); return NULL; } - memset(groupnames, 0, sizeof(char *)*(ngroups+1)); + memset(groupnames, 0, sizeof(char *) * (ngroups + 1)); - for (i=0; i<ngroups; i++ ) { + for (i = 0; i < ngroups; i++) { gr = getgrgid(group_ids[i]); - - if (gr == NULL) { - fprintf(stderr, "Failed to get group name\n"); + if (!gr) { + usernic_error("Failed to get group name: %s.\n", + strerror(errno)); free(group_ids); free_groupnames(groupnames); return NULL; } groupnames[i] = strdup(gr->gr_name); - - if (groupnames[i] == NULL) { - fprintf(stderr, "Failed to copy group name: %s", gr->gr_name); + if (!groupnames[i]) { + usernic_error("Failed to copy group name \"%s\".", + gr->gr_name); free(group_ids); free_groupnames(groupnames); return NULL; @@ -177,8 +189,8 @@ static char **get_groupnames(void) static bool name_is_in_groupnames(char *name, char **groupnames) { - while (groupnames != NULL) { - if (strcmp(name, *groupnames) == 0) + while (groupnames) { + if (!strcmp(name, *groupnames)) return true; groupnames++; } @@ -195,23 +207,20 @@ static struct alloted_s *append_alloted(struct alloted_s **head, char *name, int { struct alloted_s *cur, *al; - if (head == NULL || name == NULL) { + if (!head || !name) { // sanity check. parameters should not be null - fprintf(stderr, "NULL parameters to append_alloted not allowed\n"); + usernic_error("%s\n", "Unexpected NULL argument."); return NULL; } - al = (struct alloted_s *)malloc(sizeof(struct alloted_s)); - - if (al == NULL) { - // unable to allocate memory to new struct - fprintf(stderr, "Out of memory in append_alloted\n"); + al = malloc(sizeof(struct alloted_s)); + if (!al) { + usernic_error("Failed to allocate memory: %s.\n", strerror(errno)); return NULL; } al->name = strdup(name); - - if (al->name == NULL) { + if (!al->name) { free(al); return NULL; } @@ -219,16 +228,16 @@ static struct alloted_s *append_alloted(struct alloted_s **head, char *name, int al->allowed = n; al->next = NULL; - if (*head == NULL) { + if (!*head) { *head = al; return al; } cur = *head; - while (cur->next != NULL) + while (cur->next) cur = cur->next; - cur->next = al; + return al; } @@ -236,13 +245,11 @@ static void free_alloted(struct alloted_s **head) { struct alloted_s *cur; - if (head == NULL) { + if (!head) return; - } cur = *head; - - while (cur != NULL) { + while (cur) { cur = cur->next; free((*head)->name); free(*head); @@ -261,49 +268,55 @@ static void free_alloted(struct alloted_s **head) */ static int get_alloted(char *me, char *intype, char *link, struct alloted_s **alloted) { - FILE *fin = fopen(LXC_USERNIC_CONF, "r"); - char *line = NULL; + int n, ret; char name[100], type[100], br[100]; - size_t len = 0; - int n, ret, count = 0; char **groups; + FILE *fin; + + int count = 0; + size_t len = 0; + char *line = NULL; + fin = fopen(LXC_USERNIC_CONF, "r"); if (!fin) { - fprintf(stderr, "Failed to open %s: %s\n", LXC_USERNIC_CONF, - strerror(errno)); + usernic_error("Failed to open \"%s\": %s.\n", LXC_USERNIC_CONF, strerror(errno)); return -1; } groups = get_groupnames(); while ((getline(&line, &len, fin)) != -1) { ret = sscanf(line, "%99[^ \t] %99[^ \t] %99[^ \t] %d", name, type, br, &n); - if (ret != 4) continue; if (strlen(name) == 0) continue; - if (strcmp(name, me) != 0) - { + if (strcmp(name, me)) { if (name[0] != '@') continue; - if (!name_is_in_groupnames(name+1, groups)) + + if (!name_is_in_groupnames(name + 1, groups)) continue; } - if (strcmp(type, intype) != 0) + + if (strcmp(type, intype)) continue; - if (strcmp(link, br) != 0) + + if (strcmp(link, br)) continue; - /* found the user or group with the appropriate settings, therefore finish the search. - * what to do if there are more than one applicable lines? not specified in the docs. - * since getline is implemented with realloc, we don't need to free line until exiting func. + /* Found the user or group with the appropriate settings, + * therefore finish the search. What to do if there are more + * than one applicable lines? not specified in the docs. Since + * getline is implemented with realloc, we don't need to free + * line until exiting func. * - * if append_alloted returns NULL, e.g. due to a malloc error, we set count to 0 and break the loop, - * allowing cleanup and then exiting from main() + * If append_alloted returns NULL, e.g. due to a malloc error, + * we set count to 0 and break the loop, allowing cleanup and + * then exiting from main(). */ - if (append_alloted(alloted, name, n) == NULL) { + if (!append_alloted(alloted, name, n)) { count = 0; break; } @@ -314,20 +327,20 @@ static int get_alloted(char *me, char *intype, char *link, struct alloted_s **al fclose(fin); free(line); - // now return the total number of nics that this user can create + /* Now return the total number of nics that this user can create. */ return count; } static char *get_eol(char *s, char *e) { - while (s<e && *s && *s != '\n') + while ((s < e) && *s && (*s != '\n')) s++; return s; } static char *get_eow(char *s, char *e) { - while (s<e && *s && !isblank(*s) && *s != '\n') + while ((s < e) && *s && !isblank(*s) && (*s != '\n')) s++; return s; } @@ -336,24 +349,34 @@ static char *find_line(char *p, char *e, char *u, char *t, char *l) { char *p1, *p2, *ret; - while (p<e && (p1 = get_eol(p, e)) < e) { + while ((p < e) && (p1 = get_eol(p, e)) < e) { ret = p; if (*p == '#') goto next; - while (p<e && isblank(*p)) p++; + + while ((p < e) && isblank(*p)) + p++; + p2 = get_eow(p, e); - if (!p2 || p2-p != strlen(u) || strncmp(p, u, strlen(u)) != 0) + if (!p2 || ((size_t)(p2 - p)) != strlen(u) || strncmp(p, u, strlen(u))) goto next; - p = p2+1; - while (p<e && isblank(*p)) p++; + + p = p2 + 1; + while ((p < e) && isblank(*p)) + p++; + p2 = get_eow(p, e); - if (!p2 || p2-p != strlen(t) || strncmp(p, t, strlen(t)) != 0) + if (!p2 || ((size_t)(p2 - p)) != strlen(t) || strncmp(p, t, strlen(t))) goto next; - p = p2+1; - while (p<e && isblank(*p)) p++; + + p = p2 + 1; + while ((p < e) && isblank(*p)) + p++; + p2 = get_eow(p, e); - if (!p2 || p2-p != strlen(l) || strncmp(p, l, strlen(l)) != 0) + if (!p2 || ((size_t)(p2 - p)) != strlen(l) || strncmp(p, l, strlen(l))) goto next; + return ret; next: p = p1 + 1; @@ -368,14 +391,17 @@ static bool nic_exists(char *nic) int ret; struct stat sb; - if (strcmp(nic, "none") == 0) + if (!strcmp(nic, "none")) return true; + ret = snprintf(path, MAXPATHLEN, "/sys/class/net/%s", nic); - if (ret < 0 || ret >= MAXPATHLEN) // should never happen! + if (ret < 0 || ret >= MAXPATHLEN) return false; + ret = stat(path, &sb); - if (ret != 0) + if (ret < 0) return false; + return true; } @@ -385,68 +411,81 @@ static int instantiate_veth(char *n1, char **n2) err = snprintf(*n2, IFNAMSIZ, "%sp", n1); if (err < 0 || err >= IFNAMSIZ) { - fprintf(stderr, "nic name too long\n"); + usernic_error("%s\n", "Could not create nic name."); return -1; } err = lxc_veth_create(n1, *n2); if (err) { - fprintf(stderr, "failed to create %s-%s : %s\n", n1, *n2, - strerror(-err)); + usernic_error("Failed to create %s-%s : %s.\n", n1, *n2, strerror(-err)); return -1; } - /* changing the high byte of the mac address to 0xfe, the bridge interface - * will always keep the host's mac address and not take the mac address - * of a container */ + /* Changing the high byte of the mac address to 0xfe, the bridge + * interface will always keep the host's mac address and not take the + * mac address of a container. */ err = setup_private_host_hw_addr(n1); - if (err) { - fprintf(stderr, "failed to change mac address of host interface '%s' : %s\n", - n1, strerror(-err)); - } + if (err) + usernic_error("Failed to change mac address of host interface " + "%s : %s.\n", + n1, strerror(-err)); return netdev_set_flag(n1, IFF_UP); } static int get_mtu(char *name) { - int idx = if_nametoindex(name); + int idx; + + idx = if_nametoindex(name); return netdev_get_mtu(idx); } static bool create_nic(char *nic, char *br, int pid, char **cnic) { char *veth1buf, *veth2buf; + int mtu, ret; + veth1buf = alloca(IFNAMSIZ); veth2buf = alloca(IFNAMSIZ); - int ret, mtu; + if (!veth1buf || !veth2buf) { + usernic_error("Failed allocate memory: %s.\n", strerror(errno)); + return false; + } ret = snprintf(veth1buf, IFNAMSIZ, "%s", nic); if (ret < 0 || ret >= IFNAMSIZ) { - fprintf(stderr, "host nic name too long\n"); + usernic_error("%s", "Could not create nic name.\n"); return false; } /* create the nics */ if (instantiate_veth(veth1buf, &veth2buf) < 0) { - fprintf(stderr, "Error creating veth tunnel\n"); + usernic_error("%s", "Error creating veth tunnel.\n"); return false; } - if (strcmp(br, "none") != 0) { + if (strcmp(br, "none")) { /* copy the bridge's mtu to both ends */ mtu = get_mtu(br); - if (mtu != -1) { - if (lxc_netdev_set_mtu(veth1buf, mtu) < 0 || - lxc_netdev_set_mtu(veth2buf, mtu) < 0) { - fprintf(stderr, "Failed setting mtu\n"); + if (mtu > 0) { + ret = lxc_netdev_set_mtu(veth1buf, mtu); + if (ret < 0) { + usernic_error("Failed to set mtu to %d on %s.\n", mtu, veth1buf); + goto out_del; + } + + ret = lxc_netdev_set_mtu(veth2buf, mtu); + if (ret < 0) { + usernic_error("Failed to set mtu to %d on %s.\n", mtu, veth2buf); goto out_del; } } /* attach veth1 to bridge */ - if (lxc_bridge_attach(lxcpath, lxcname, br, veth1buf) < 0) { - fprintf(stderr, "Error attaching %s to %s\n", veth1buf, br); + ret = lxc_bridge_attach(lxcpath, lxcname, br, veth1buf); + if (ret < 0) { + usernic_error("Error attaching %s to %s.\n", veth1buf, br); goto out_del; } } @@ -454,10 +493,16 @@ static bool create_nic(char *nic, char *br, int pid, char **cnic) /* pass veth2 to target netns */ ret = lxc_netdev_move_by_name(veth2buf, pid, NULL); if (ret < 0) { - fprintf(stderr, "Error moving %s to netns %d\n", veth2buf, pid); + usernic_error("Error moving %s to network namespace of %d.\n", veth2buf, pid); goto out_del; } + *cnic = strdup(veth2buf); + if (!*cnic) { + usernic_error("Failed to copy string \"%s\".\n", veth2buf); + return false; + } + return true; out_del: @@ -467,29 +512,34 @@ out_del: /* * Get a new nic. - * *dest will container the name (vethXXXXXX) which is attached + * *dest will contain the name (vethXXXXXX) which is attached * on the host to the lxc bridge */ static bool get_new_nicname(char **dest, char *br, int pid, char **cnic) { + int ret; char template[IFNAMSIZ]; - snprintf(template, sizeof(template), "vethXXXXXX"); - *dest = lxc_mkifname(template); - if (!create_nic(*dest, br, pid, cnic)) { + ret = snprintf(template, sizeof(template), "vethXXXXXX"); + if (ret < 0 || (size_t)ret >= sizeof(template)) return false; - } + + *dest = lxc_mkifname(template); + if (!create_nic(*dest, br, pid, cnic)) + return false; + return true; } static bool get_nic_from_line(char *p, char **nic) { - char user[100], type[100], br[100]; int ret; + char user[100], type[100], br[100]; ret = sscanf(p, "%99[^ \t\n] %99[^ \t\n] %99[^ \t\n] %99[^ \t\n]", user, type, br, *nic); if (ret != 4) return false; + return true; } @@ -501,35 +551,42 @@ struct entry_line { static bool cull_entries(int fd, char *me, char *t, char *br) { - struct stat sb; - char *buf, *p, *e, *nic; + int i, n = 0; off_t len; + char *buf, *p, *e, *nic; + struct stat sb; struct entry_line *entry_lines = NULL; - int i, n = 0; nic = alloca(100); + if (!nic) + return false; if (fstat(fd, &sb) < 0) { - fprintf(stderr, "Failed to fstat: %s\n", strerror(errno)); + usernic_error("Failed to fstat: %s.\n", strerror(errno)); return false; } + len = sb.st_size; if (len == 0) return true; + buf = mmap(NULL, len, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0); if (buf == MAP_FAILED) { - fprintf(stderr, "Failed to create mapping: %s\n", strerror(errno)); + usernic_error("Failed to establish shared memory mapping: %s.\n", strerror(errno)); return false; } p = buf; e = buf + len; - while ((p = find_line(p, e, me, t, br)) != NULL) { - struct entry_line *newe = realloc(entry_lines, sizeof(*entry_lines)*(n+1)); + while ((p = find_line(p, e, me, t, br))) { + struct entry_line *newe; + + newe = realloc(entry_lines, sizeof(*entry_lines) * (n + 1)); if (!newe) { free(entry_lines); return false; } + entry_lines = newe; entry_lines[n].start = p; entry_lines[n].len = get_eol(p, e) - entry_lines[n].start; @@ -537,35 +594,43 @@ static bool cull_entries(int fd, char *me, char *t, char *br) n++; if (!get_nic_from_line(p, &nic)) continue; + if (nic && !nic_exists(nic)) - entry_lines[n-1].keep = false; - p += entry_lines[n-1].len + 1; + entry_lines[n - 1].keep = false; + + p += entry_lines[n - 1].len + 1; if (p >= e) break; - } + } + p = buf; - for (i=0; i<n; i++) { + for (i = 0; i < n; i++) { if (!entry_lines[i].keep) continue; + memcpy(p, entry_lines[i].start, entry_lines[i].len); p += entry_lines[i].len; *p = '\n'; p++; } free(entry_lines); + munmap(buf, sb.st_size); - if (ftruncate(fd, p-buf)) - fprintf(stderr, "Failed to set new file size\n"); + if (ftruncate(fd, p - buf)) + usernic_error("Failed to set new file size: %s.\n", strerror(errno)); + return true; } static int count_entries(char *buf, off_t len, char *me, char *t, char *br) { - char *e = &buf[len]; + char *e; int count = 0; - while ((buf = find_line(buf, e, me, t, br)) != NULL) { + + e = &buf[len]; + while ((buf = find_line(buf, e, me, t, br))) { count++; - buf = get_eol(buf, e)+1; + buf = get_eol(buf, e) + 1; if (buf >= e) break; } @@ -577,16 +642,19 @@ static int count_entries(char *buf, off_t len, char *me, char *t, char *br) * The dbfile has lines of the format: * user type bridge nicname */ -static bool get_nic_if_avail(int fd, struct alloted_s *names, int pid, char *intype, char *br, int allowed, char **nicname, char **cnic) +static bool get_nic_if_avail(int fd, struct alloted_s *names, int pid, + char *intype, char *br, int allowed, + char **nicname, char **cnic) { + int ret; off_t len, slen; + char *newline, *owner; struct stat sb; - char *buf = NULL, *newline; - int ret, count = 0; - char *owner; struct alloted_s *n; + int count = 0; + char *buf = NULL; - for (n=names; n!=NULL; n=n->next) + for (n = names; n != NULL; n = n->next) cull_entries(fd, n->name, intype, br); if (allowed == 0) @@ -595,19 +663,20 @@ static bool get_nic_if_avail(int fd, struct alloted_s *names, int pid, char *int owner = names->name; if (fstat(fd, &sb) < 0) { - fprintf(stderr, "Failed to fstat: %s\n", strerror(errno)); + usernic_error("Failed to fstat: %s.\n", strerror(errno)); return false; } + len = sb.st_size; - if (len != 0) { + if (len > 0) { buf = mmap(NULL, len, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0); if (buf == MAP_FAILED) { - fprintf(stderr, "Failed to create mapping\n"); + usernic_error("Failed to establish shared memory mapping: %s.\n", strerror(errno)); return false; } owner = NULL; - for (n=names; n!=NULL; n=n->next) { + for (n = names; n != NULL; n = n->next) { count = count_entries(buf, len, n->name, intype, br); if (count >= n->allowed) @@ -623,115 +692,177 @@ static bool get_nic_if_avail(int fd, struct alloted_s *names, int pid, char *int if (!get_new_nicname(nicname, br, pid, cnic)) return false; + /* owner ' ' intype ' ' br ' ' *nicname + '\n' + '\0' */ slen = strlen(owner) + strlen(intype) + strlen(br) + strlen(*nicname) + 5; newline = alloca(slen); + if (!newline) { + usernic_error("Failed allocate memory: %s.\n", strerror(errno)); + return false; + } + ret = snprintf(newline, slen, "%s %s %s %s\n", owner, intype, br, *nicname); if (ret < 0 || ret >= slen) { if (lxc_netdev_delete_by_name(*nicname) != 0) - fprintf(stderr, "Error unlinking %s!\n", *nicname); + usernic_error("Error unlinking %s.\n", *nicname); return false; } if (len) munmap(buf, len); + if (ftruncate(fd, len + slen)) - fprintf(stderr, "Failed to set new file size\n"); + usernic_error("Failed to set new file size: %s.\n", strerror(errno)); + buf = mmap(NULL, len + slen, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0); if (buf == MAP_FAILED) { - fprintf(stderr, "Failed to create mapping after extending: %s\n", strerror(errno)); + usernic_error("Failed to establish shared memory mapping: %s.\n", strerror(errno)); if (lxc_netdev_delete_by_name(*nicname) != 0) - fprintf(stderr, "Error unlinking %s!\n", *nicname); + usernic_error("Error unlinking %s.\n", *nicname); return false; } - strcpy(buf+len, newline); - munmap(buf, len+slen); + + strcpy(buf + len, newline); + munmap(buf, len + slen); + return true; } static bool create_db_dir(char *fnam) { - char *p = alloca(strlen(fnam)+1); + char *p; + p = alloca(strlen(fnam) + 1); strcpy(p, fnam); fnam = p; p = p + 1; + again: - while (*p && *p != '/') p++; + while (*p && *p != '/') + p++; if (!*p) return true; + *p = '\0'; if (mkdir(fnam, 0755) && errno != EEXIST) { - fprintf(stderr, "failed to create %s\n", fnam); + usernic_error("Failed to create %s: %s.\n", fnam, strerror(errno)); *p = '/'; return false; } *(p++) = '/'; + goto again; } #define VETH_DEF_NAME "eth%d" - static int rename_in_ns(int pid, char *oldname, char **newnamep) { - int fd = -1, ofd = -1, ret, ifindex = -1; + uid_t ruid, suid, euid; + int fret = -1; + int fd = -1, ifindex = -1, ofd = -1, ret; bool grab_newname = false; ofd = lxc_preserve_ns(getpid(), "net"); if (ofd < 0) { - fprintf(stderr, "Failed opening network namespace path for '%d'.", getpid()); - return -1; + usernic_error("Failed opening network namespace path for '%d'.", getpid()); + return fret; } fd = lxc_preserve_ns(pid, "net"); if (fd < 0) { - fprintf(stderr, "Failed opening network namespace path for '%d'.", pid); - return -1; + usernic_error("Failed opening network namespace path for '%d'.", pid); + goto do_partial_cleanup; + } + + ret = getresuid(&ruid, &euid, &suid); + if (ret < 0) { + usernic_error("Failed to retrieve real, effective, and saved " + "user IDs: %s\n", + strerror(errno)); + goto do_partial_cleanup; + } + + ret = setns(fd, CLONE_NEWNET); + close(fd); + fd = -1; + if (ret < 0) { + usernic_error("Failed to setns() to the network namespace of " + "the container with PID %d: %s.\n", + pid, strerror(errno)); + goto do_partial_cleanup; } - if (setns(fd, 0) < 0) { - fprintf(stderr, "setns to container network namespace\n"); - goto out_err; + ret = setresuid(ruid, ruid, 0); + if (ret < 0) { + usernic_error("Failed to drop privilege by setting effective " + "user id and real user id to %d, and saved user " + "ID to 0: %s.\n", + ruid, strerror(errno)); + // COMMENT(brauner): It's ok to jump to do_full_cleanup here + // since setresuid() will succeed when trying to set real, + // effective, and saved to values they currently have. + goto do_full_cleanup; } - close(fd); fd = -1; + if (!*newnamep) { grab_newname = true; *newnamep = VETH_DEF_NAME; - if (!(ifindex = if_nametoindex(oldname))) { - fprintf(stderr, "failed to get netdev index\n"); - goto out_err; + + ifindex = if_nametoindex(oldname); + if (!ifindex) { + usernic_error("Failed to get netdev index: %s.\n", strerror(errno)); + goto do_full_cleanup; } } - if ((ret = lxc_netdev_rename_by_name(oldname, *newnamep)) < 0) { - fprintf(stderr, "Error %d renaming netdev %s to %s in container\n", ret, oldname, *newnamep); - goto out_err; + + ret = lxc_netdev_rename_by_name(oldname, *newnamep); + if (ret < 0) { + usernic_error("Error %d renaming netdev %s to %s in container.\n", ret, oldname, *newnamep); + goto do_full_cleanup; } + if (grab_newname) { - char ifname[IFNAMSIZ], *namep = ifname; + char ifname[IFNAMSIZ]; + char *namep = ifname; + if (!if_indextoname(ifindex, namep)) { - fprintf(stderr, "Failed to get new netdev name\n"); - goto out_err; + usernic_error("Failed to get new netdev name: %s.\n", strerror(errno)); + goto do_full_cleanup; } + *newnamep = strdup(namep); if (!*newnamep) - goto out_err; + goto do_full_cleanup; } - if (setns(ofd, 0) < 0) { - fprintf(stderr, "Error returning to original netns\n"); - close(ofd); - return -1; + + fret = 0; + +do_full_cleanup: + ret = setresuid(ruid, euid, suid); + if (ret < 0) { + usernic_error("Failed to restore privilege by setting effective " + "user id to %d, real user id to %d, and saved user " + "ID to %d: %s.\n", + ruid, euid, suid, strerror(errno)); + fret = -1; + // COMMENT(brauner): setns() should fail if setresuid() doesn't + // succeed but there's no harm in falling through; keeps the + // code cleaner. } - close(ofd); - return 0; + ret = setns(ofd, CLONE_NEWNET); + if (ret < 0) { + usernic_error("Failed to setns() to original network namespace " + "of PID %d: %s.\n", + ofd, strerror(errno)); + fret = -1; + } -out_err: - if (ofd >= 0) - close(ofd); - if (setns(ofd, 0) < 0) - fprintf(stderr, "Error returning to original network namespace\n"); +do_partial_cleanup: if (fd >= 0) close(fd); - return -1; + close(ofd); + + return fret; } /* @@ -747,61 +878,78 @@ static bool may_access_netns(int pid) bool may_access = false; ret = getresuid(&ruid, &euid, &suid); - if (ret) { - fprintf(stderr, "Failed to get my uids: %s\n", strerror(errno)); + if (ret < 0) { + usernic_error("Failed to retrieve real, effective, and saved " + "user IDs: %s\n", + strerror(errno)); return false; } + ret = setresuid(ruid, ruid, euid); - if (ret) { - fprintf(stderr, "Failed to set temp uids to (%d,%d,%d): %s\n", - (int)ruid, (int)ruid, (int)euid, strerror(errno)); + if (ret < 0) { + usernic_error("Failed to drop privilege by setting effective " + "user id and real user id to %d, and saved user " + "ID to %d: %s.\n", + ruid, euid, strerror(errno)); return false; } + ret = snprintf(s, 200, "/proc/%d/ns/net", pid); - if (ret < 0 || ret >= 200) // can't happen + if (ret < 0 || ret >= 200) return false; + ret = access(s, R_OK); - if (ret) { - fprintf(stderr, "Uid %d may not access %s: %s\n", - (int)ruid, s, strerror(errno)); + may_access = true; + if (ret < 0) { + may_access = false; + usernic_error("Uid %d may not access %s: %s\n", (int)ruid, s, strerror(errno)); } - may_access = ret == 0; + ret = setresuid(ruid, euid, suid); - if (ret) { - fprintf(stderr, "Failed to restore uids to (%d,%d,%d): %s\n", - (int)ruid, (int)euid, (int)suid, strerror(errno)); + if (ret < 0) { + usernic_error("Failed to restore user id to %d, real user id " + "to %d, and saved user ID to %d: %s.\n", + ruid, euid, suid, strerror(errno)); may_access = false; } + return may_access; } int main(int argc, char *argv[]) { int n, fd; - bool gotone = false; char *me; - char *nicname = alloca(40); - char *cnic = NULL; // created nic name in container is returned here. - char *vethname = NULL; + char *nicname; int pid; + char *cnic = NULL; /* Created nic name in container is returned here. */ + char *vethname = NULL; + bool gotone = false; struct alloted_s *alloted = NULL; + nicname = alloca(40); + if (!nicname) { + usernic_error("Failed allocate memory: %s.\n", strerror(errno)); + exit(EXIT_FAILURE); + } + /* set a sane env, because we are setuid-root */ if (clearenv() < 0) { - fprintf(stderr, "Failed to clear environment"); - exit(1); + usernic_error("%s", "Failed to clear environment.\n"); + exit(EXIT_FAILURE); } if (setenv("PATH", "/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin", 1) < 0) { - fprintf(stderr, "Failed to set PATH, exiting\n"); - exit(1); + usernic_error("%s", "Failed to set PATH, exiting.\n"); + exit(EXIT_FAILURE); } if ((me = get_username()) == NULL) { - fprintf(stderr, "Failed to get username\n"); - exit(1); + usernic_error("%s", "Failed to get username.\n"); + exit(EXIT_FAILURE); } if (argc < 6) usage(argv[0], true); + if (argc >= 7) vethname = argv[6]; @@ -809,26 +957,25 @@ int main(int argc, char *argv[]) lxcname = argv[2]; errno = 0; - pid = (int) strtol(argv[3], NULL, 10); + pid = strtol(argv[3], NULL, 10); if (errno) { - fprintf(stderr, "Could not read pid: %s\n", argv[1]); - exit(1); + usernic_error("Could not read pid: %s.\n", argv[1]); + exit(EXIT_FAILURE); } if (!create_db_dir(LXC_USERNIC_DB)) { - fprintf(stderr, "Failed to create directory for db file\n"); - exit(1); + usernic_error("%s", "Failed to create directory for db file.\n"); + exit(EXIT_FAILURE); } if ((fd = open_and_lock(LXC_USERNIC_DB)) < 0) { - fprintf(stderr, "Failed to lock %s\n", LXC_USERNIC_DB); - exit(1); + usernic_error("Failed to lock %s.\n", LXC_USERNIC_DB); + exit(EXIT_FAILURE); } if (!may_access_netns(pid)) { - fprintf(stderr, "User %s may not modify netns for pid %d\n", - me, pid); - exit(1); + usernic_error("User %s may not modify netns for pid %d.\n", me, pid); + exit(EXIT_FAILURE); } n = get_alloted(me, argv[4], argv[5], &alloted); @@ -838,17 +985,21 @@ int main(int argc, char *argv[]) close(fd); free_alloted(&alloted); if (!gotone) { - fprintf(stderr, "Quota reached\n"); - exit(1); + usernic_error("%s", "Quota reached.\n"); + exit(EXIT_FAILURE); } - // Now rename the link + /* Now rename the link. */ if (rename_in_ns(pid, cnic, &vethname) < 0) { - fprintf(stderr, "Failed to rename the link\n"); - exit(1); + usernic_error("%s", "Failed to rename the link.\n"); + if (lxc_netdev_delete_by_name(cnic) < 0) + usernic_error("Failed to delete link \"%s\" the link. Manual cleanup needed.\n", cnic); + exit(EXIT_FAILURE); } - // write the name of the interface pair to the stdout - like eth0:veth9MT2L4 + /* Write the name of the interface pair to the stdout - like + * eth0:veth9MT2L4. + */ fprintf(stdout, "%s:%s\n", vethname, nicname); - exit(0); + exit(EXIT_SUCCESS); } diff --git a/src/lxc/lxccontainer.c b/src/lxc/lxccontainer.c index 0dbbf2c..dc1d955 100644 --- a/src/lxc/lxccontainer.c +++ b/src/lxc/lxccontainer.c @@ -31,6 +31,7 @@ #include <stdio.h> #include <unistd.h> #include <arpa/inet.h> +#include <sys/sysmacros.h> #include <sys/mman.h> #include <sys/mount.h> #include <sys/syscall.h> @@ -64,9 +65,6 @@ #ifdef MAJOR_IN_MKDEV # include <sys/mkdev.h> #endif -#ifdef MAJOR_IN_SYSMACROS -# include <sys/sysmacros.h> -#endif #if HAVE_IFADDRS_H #include <ifaddrs.h> @@ -4372,7 +4370,10 @@ int list_active_containers(const char *lxcpath, char ***nret, *p2 = '\0'; if (is_hashed) { - if (strncmp(lxcpath, lxc_cmd_get_lxcpath(p), lxcpath_len) != 0) + char *recvpath = lxc_cmd_get_lxcpath(p); + if (!recvpath) + continue; + if (strncmp(lxcpath, recvpath, lxcpath_len) != 0) continue; p = lxc_cmd_get_name(p); } diff --git a/src/lxc/monitor.c b/src/lxc/monitor.c index d9b3e21..1758402 100644 --- a/src/lxc/monitor.c +++ b/src/lxc/monitor.c @@ -153,36 +153,52 @@ int lxc_monitor_close(int fd) return close(fd); } +/* Enforces \0-termination for the abstract unix socket. This is not required + * but allows us to print it out. + * + * Older version of liblxc only allowed for 105 bytes to be used for the + * abstract unix domain socket name because the code for our abstract unix + * socket handling performed invalid checks. Since we \0-terminate we could now + * have a maximum of 106 chars. But to not break backwards compatibility we keep + * the limit at 105. + */ int lxc_monitor_sock_name(const char *lxcpath, struct sockaddr_un *addr) { size_t len; int ret; - char *sockname; char *path; uint64_t hash; /* addr.sun_path is only 108 bytes, so we hash the full name and * then append as much of the name as we can fit. */ - sockname = &addr->sun_path[1]; memset(addr, 0, sizeof(*addr)); addr->sun_family = AF_UNIX; + /* strlen("lxc/") + strlen("/monitor-sock") + 1 = 18 */ len = strlen(lxcpath) + 18; path = alloca(len); ret = snprintf(path, len, "lxc/%s/monitor-sock", lxcpath); if (ret < 0 || (size_t)ret >= len) { - ERROR("Failed to create path for monitor."); + ERROR("failed to create name for monitor socket"); return -1; } + /* Note: snprintf() will \0-terminate addr->sun_path on the 106th byte + * and so the abstract socket name has 105 "meaningful" characters. This + * is absolutely intentional. For further info read the comment for this + * function above! + */ len = sizeof(addr->sun_path) - 1; hash = fnv_64a_buf(path, ret, FNV1A_64_INIT); - ret = snprintf(sockname, len, "lxc/%016" PRIx64 "/%s", hash, lxcpath); - if (ret < 0) + ret = snprintf(addr->sun_path, len, "@lxc/%016" PRIx64 "/%s", hash, lxcpath); + if (ret < 0) { + ERROR("failed to create hashed name for monitor socket"); return -1; + } - sockname[sizeof(addr->sun_path)-3] = '\0'; - INFO("Using monitor socket name \"%s\".", sockname); + /* replace @ with \0 */ + addr->sun_path[0] = '\0'; + INFO("using monitor socket name \"%s\" (length of socket name %zu must be <= %zu)", &addr->sun_path[1], strlen(&addr->sun_path[1]), sizeof(addr->sun_path) - 3); return 0; } @@ -193,7 +209,8 @@ int lxc_monitor_open(const char *lxcpath) int fd; size_t retry; size_t len; - int ret = 0, backoff_ms[] = {10, 50, 100}; + int ret = -1; + int backoff_ms[] = {10, 50, 100}; if (lxc_monitor_sock_name(lxcpath, &addr) < 0) return -1; @@ -201,28 +218,32 @@ int lxc_monitor_open(const char *lxcpath) fd = socket(PF_UNIX, SOCK_STREAM, 0); if (fd < 0) { ERROR("Failed to create socket: %s.", strerror(errno)); - return -1; + return -errno; } - len = strlen(&addr.sun_path[1]) + 1; + len = strlen(&addr.sun_path[1]); + DEBUG("opening monitor socket %s with len %zu", &addr.sun_path[1], len); if (len >= sizeof(addr.sun_path) - 1) { - ret = -1; errno = ENAMETOOLONG; + ret = -errno; + ERROR("name of monitor socket too long (%zu bytes): %s", len, strerror(errno)); goto on_error; } for (retry = 0; retry < sizeof(backoff_ms) / sizeof(backoff_ms[0]); retry++) { - ret = connect(fd, (struct sockaddr *)&addr, offsetof(struct sockaddr_un, sun_path) + len); - if (ret == 0 || errno != ECONNREFUSED) + fd = lxc_abstract_unix_connect(addr.sun_path); + if (fd < 0 || errno != ECONNREFUSED) break; - ERROR("Failed to connect to monitor socket. Retrying in %d ms.", backoff_ms[retry]); + ERROR("Failed to connect to monitor socket. Retrying in %d ms: %s", backoff_ms[retry], strerror(errno)); usleep(backoff_ms[retry] * 1000); } - if (ret < 0) { + if (fd < 0) { + ret = -errno; ERROR("Failed to connect to monitor socket: %s.", strerror(errno)); goto on_error; } + ret = 0; return fd; @@ -340,7 +361,7 @@ int lxc_monitord_spawn(const char *lxcpath) close(pipefd[0]); - DEBUG("Sucessfully synced with child process."); + DEBUG("Successfully synced with child process."); exit(EXIT_SUCCESS); } @@ -366,7 +387,7 @@ int lxc_monitord_spawn(const char *lxcpath) DEBUG("Using pipe file descriptor %d for monitord.", pipefd[1]); execvp(args[0], args); - ERROR("Failed to exec lxc-monitord."); + SYSERROR("failed to exec lxc-monitord"); exit(EXIT_FAILURE); } diff --git a/src/lxc/seccomp.c b/src/lxc/seccomp.c index 83b1cb4..3b4d7ed 100644 --- a/src/lxc/seccomp.c +++ b/src/lxc/seccomp.c @@ -119,6 +119,7 @@ enum lxc_hostarch_t { lxc_seccomp_arch_all = 0, lxc_seccomp_arch_native, lxc_seccomp_arch_i386, + lxc_seccomp_arch_x32, lxc_seccomp_arch_amd64, lxc_seccomp_arch_arm, lxc_seccomp_arch_arm64, @@ -152,6 +153,7 @@ int get_hostarch(void) } if (strcmp(uts.machine, "i686") == 0) return lxc_seccomp_arch_i386; + // no x32 kernels else if (strcmp(uts.machine, "x86_64") == 0) return lxc_seccomp_arch_amd64; else if (strncmp(uts.machine, "armv7", 5) == 0) @@ -181,6 +183,7 @@ scmp_filter_ctx get_new_ctx(enum lxc_hostarch_t n_arch, uint32_t default_policy_ switch(n_arch) { case lxc_seccomp_arch_i386: arch = SCMP_ARCH_X86; break; + case lxc_seccomp_arch_x32: arch = SCMP_ARCH_X32; break; case lxc_seccomp_arch_amd64: arch = SCMP_ARCH_X86_64; break; case lxc_seccomp_arch_arm: arch = SCMP_ARCH_ARM; break; #ifdef SCMP_ARCH_AARCH64 @@ -218,6 +221,11 @@ scmp_filter_ctx get_new_ctx(enum lxc_hostarch_t n_arch, uint32_t default_policy_ seccomp_release(ctx); return NULL; } +#ifdef SCMP_FLTATR_ATL_TSKIP + if (seccomp_attr_set(ctx, SCMP_FLTATR_ATL_TSKIP, 1)) { + WARN("Failed to turn on seccomp nop-skip, continuing"); + } +#endif ret = seccomp_arch_add(ctx, arch); if (ret != 0) { ERROR("Seccomp error %d (%s) adding arch: %d", ret, @@ -336,7 +344,10 @@ static int parse_config_v2(FILE *f, char *line, struct lxc_conf *conf) compat_arch[0] = SCMP_ARCH_X86; compat_ctx[0] = get_new_ctx(lxc_seccomp_arch_i386, default_policy_action); - if (!compat_ctx[0]) + compat_arch[1] = SCMP_ARCH_X32; + compat_ctx[1] = get_new_ctx(lxc_seccomp_arch_x32, + default_policy_action); + if (!compat_ctx[0] || !compat_ctx[1]) goto bad; #ifdef SCMP_ARCH_PPC } else if (native_arch == lxc_seccomp_arch_ppc64) { @@ -390,6 +401,11 @@ static int parse_config_v2(FILE *f, char *line, struct lxc_conf *conf) ERROR("Failed to turn off n-new-privs."); return -1; } +#ifdef SCMP_FLTATR_ATL_TSKIP + if (seccomp_attr_set(conf->seccomp_ctx, SCMP_FLTATR_ATL_TSKIP, 1)) { + WARN("Failed to turn on seccomp nop-skip, continuing"); + } +#endif } while (fgets(line, 1024, f)) { @@ -410,6 +426,13 @@ static int parse_config_v2(FILE *f, char *line, struct lxc_conf *conf) continue; } cur_rule_arch = lxc_seccomp_arch_i386; + } else if (strcmp(line, "[x32]") == 0 || + strcmp(line, "[X32]") == 0) { + if (native_arch != lxc_seccomp_arch_amd64) { + cur_rule_arch = lxc_seccomp_arch_unknown; + continue; + } + cur_rule_arch = lxc_seccomp_arch_x32; } else if (strcmp(line, "[X86_64]") == 0 || strcmp(line, "[x86_64]") == 0) { if (native_arch != lxc_seccomp_arch_amd64) { @@ -704,7 +727,7 @@ int lxc_read_seccomp_config(struct lxc_conf *conf) return -1; } -/* turn of no-new-privs. We don't want it in lxc, and it breaks +/* turn off no-new-privs. We don't want it in lxc, and it breaks * with apparmor */ #if HAVE_SCMP_FILTER_CTX check_seccomp_attr_set = seccomp_attr_set(conf->seccomp_ctx, SCMP_FLTATR_CTL_NNP, 0); @@ -715,6 +738,11 @@ int lxc_read_seccomp_config(struct lxc_conf *conf) ERROR("Failed to turn off n-new-privs."); return -1; } +#ifdef SCMP_FLTATR_ATL_TSKIP + if (seccomp_attr_set(conf->seccomp_ctx, SCMP_FLTATR_ATL_TSKIP, 1)) { + WARN("Failed to turn on seccomp nop-skip, continuing"); + } +#endif f = fopen(conf->seccomp, "r"); if (!f) { diff --git a/src/lxc/start.c b/src/lxc/start.c index c2c14a7..bca7f8e 100644 --- a/src/lxc/start.c +++ b/src/lxc/start.c @@ -46,7 +46,7 @@ #include <sys/un.h> #include <sys/wait.h> -#if HAVE_SYS_CAPABILITY_H +#if HAVE_LIBCAP #include <sys/capability.h> #endif @@ -319,7 +319,7 @@ static int signal_handler(int fd, uint32_t events, void *data, * by a process different from the container init. */ if (siginfo.ssi_pid != *pid) { - WARN("Invalid pid for SIGCHLD. Received pid %d, expected pid %d.", siginfo.ssi_pid, *pid); + NOTICE("Received SIGCHLD from pid %d instead of container init %d.", siginfo.ssi_pid, *pid); return init_died ? 1 : 0; } @@ -361,7 +361,7 @@ int lxc_poll(const char *name, struct lxc_handler *handler) } if (handler->conf->need_utmp_watch) { - #if HAVE_SYS_CAPABILITY_H + #if HAVE_LIBCAP if (lxc_utmp_mainloop_add(&descr, handler)) { ERROR("Failed to add utmp handler to LXC mainloop."); goto out_mainloop_open; @@ -773,7 +773,7 @@ static int do_start(void *data) goto out_warn_father; } - #if HAVE_SYS_CAPABILITY_H + #if HAVE_LIBCAP if (handler->conf->need_utmp_watch) { if (prctl(PR_CAPBSET_DROP, CAP_SYS_BOOT, 0, 0, 0)) { SYSERROR("Failed to remove the CAP_SYS_BOOT capability."); @@ -873,7 +873,11 @@ static int do_start(void *data) * further above. Only drop groups if we can, so ensure that we * have necessary privilege. */ - have_cap_setgid = lxc_cap_is_set(CAP_SETGID, CAP_EFFECTIVE); + #if HAVE_LIBCAP + have_cap_setgid = lxc_proc_cap_is_set(CAP_SETGID, CAP_EFFECTIVE); + #else + have_cap_setgid = false; + #endif if (lxc_list_empty(&handler->conf->id_map) && have_cap_setgid) { if (lxc_setgroups(0, NULL) < 0) goto out_warn_father; @@ -1042,6 +1046,13 @@ void resolve_clone_flags(struct lxc_handler *handler) INFO("Inheriting a UTS namespace."); } +/* lxc_spawn() performs crucial setup tasks and clone()s the new process which + * exec()s the requested container binary. + * Note that lxc_spawn() runs in the parent namespaces. Any operations performed + * right here should be double checked if they'd pose a security risk. (For + * example, any {u}mount() operations performed here will be reflected on the + * host!) + */ static int lxc_spawn(struct lxc_handler *handler) { int failed_before_rename = 0; @@ -1255,9 +1266,6 @@ static int lxc_spawn(struct lxc_handler *handler) if (lxc_sync_barrier_child(handler, LXC_SYNC_POST_CGROUP)) return -1; - if (detect_shared_rootfs()) - umount2(handler->conf->rootfs.mount, MNT_DETACH); - if (handler->ops->post_start(handler, handler->data)) goto out_abort; @@ -1308,7 +1316,7 @@ int __lxc_start(const char *name, struct lxc_conf *conf, handler->netnsfd = -1; if (must_drop_cap_sys_boot(handler->conf)) { - #if HAVE_SYS_CAPABILITY_H + #if HAVE_LIBCAP DEBUG("Dropping CAP_SYS_BOOT capability."); #else DEBUG("Not dropping CAP_SYS_BOOT capability as capabilities aren't supported."); diff --git a/src/lxc/tools/lxc-checkconfig.in b/src/lxc/tools/lxc-checkconfig.in index 61627e0..4182191 100644 --- a/src/lxc/tools/lxc-checkconfig.in +++ b/src/lxc/tools/lxc-checkconfig.in @@ -88,6 +88,24 @@ echo -n "Utsname namespace: " && is_enabled CONFIG_UTS_NS echo -n "Ipc namespace: " && is_enabled CONFIG_IPC_NS yes echo -n "Pid namespace: " && is_enabled CONFIG_PID_NS yes echo -n "User namespace: " && is_enabled CONFIG_USER_NS +if is_set CONFIG_USER_NS; then + if type newuidmap > /dev/null 2>&1; then + f=`type -P newuidmap` + if [ ! -u "${f}" ]; then + echo "Warning: newuidmap is not setuid-root" + fi + else + echo "newuidmap is not installed" + fi + if type newgidmap > /dev/null 2>&1; then + f=`type -P newgidmap` + if [ ! -u "${f}" ]; then + echo "Warning: newgidmap is not setuid-root" + fi + else + echo "newgidmap is not installed" + fi +fi echo -n "Network namespace: " && is_enabled CONFIG_NET_NS if ([ $KVER_MAJOR -lt 4 ]) || ([ $KVER_MAJOR -eq 4 ] && [ $KVER_MINOR -lt 7 ]); then echo -n "Multiple /dev/pts instances: " && is_enabled DEVPTS_MULTIPLE_INSTANCES diff --git a/src/lxc/tools/lxc-start-ephemeral.in b/src/lxc/tools/lxc-start-ephemeral.in index 7e0c8ea..90d5f6f 100644 --- a/src/lxc/tools/lxc-start-ephemeral.in +++ b/src/lxc/tools/lxc-start-ephemeral.in @@ -28,6 +28,7 @@ import argparse import gettext import lxc +import locale import os import sys import subprocess @@ -363,9 +364,14 @@ if os.path.exists("/proc/self/ns/pid"): if args.user: username = args.user - line = subprocess.check_output( - ["getent", "passwd", username], - universal_newlines=True).rstrip("\n") + # This should really just use universal_newlines=True, but we do + # the decoding by hand instead for compatibility with Python + # 3.2; that used locale.getpreferredencoding() internally rather + # than locale.getpreferredencoding(False), and the former breaks + # here because we can't reload codecs at this point unless the + # container has the same version of Python installed. + line = subprocess.check_output(["getent", "passwd", username]) + line = line.decode(locale.getpreferredencoding(False)).rstrip("\n") _, _, pw_uid, pw_gid, _, pw_dir, _ = line.split(":", 6) pw_uid = int(pw_uid) pw_gid = int(pw_gid) diff --git a/src/lxc/tools/lxc_attach.c b/src/lxc/tools/lxc_attach.c index ca66201..c5e319f 100644 --- a/src/lxc/tools/lxc_attach.c +++ b/src/lxc/tools/lxc_attach.c @@ -142,7 +142,7 @@ static int my_parser(struct lxc_arguments* args, int c, char* arg) * * then we memmove() * - * dest: del + 1 == ONT|PID + * dest: del + 1 == OUNT|PID * src: del + 3 == NT|PID */ while ((del = strstr(arg, "MOUNT"))) diff --git a/src/lxc/tools/lxc_execute.c b/src/lxc/tools/lxc_execute.c index fae2dca..f26105a 100644 --- a/src/lxc/tools/lxc_execute.c +++ b/src/lxc/tools/lxc_execute.c @@ -166,5 +166,5 @@ int main(int argc, char *argv[]) if (ret < 0) exit(EXIT_FAILURE); - exit(EXIT_SUCCESS); + exit(ret); } diff --git a/src/lxc/tools/lxc_info.c b/src/lxc/tools/lxc_info.c index 2888537..c977f29 100644 --- a/src/lxc/tools/lxc_info.c +++ b/src/lxc/tools/lxc_info.c @@ -204,7 +204,7 @@ static void print_net_stats(struct lxc_container *c) static void print_stats(struct lxc_container *c) { int i, ret; - char buf[256]; + char buf[4096]; ret = c->get_cgroup_item(c, "cpuacct.usage", buf, sizeof(buf)); if (ret > 0 && ret < sizeof(buf)) { diff --git a/src/lxc/tools/lxc_ls.c b/src/lxc/tools/lxc_ls.c index 363d3d2..63053b1 100644 --- a/src/lxc/tools/lxc_ls.c +++ b/src/lxc/tools/lxc_ls.c @@ -356,7 +356,7 @@ static int ls_get(struct ls **m, size_t *size, const struct lxc_arguments *args, } /* Do not do more work than is necessary right from the start. */ - if (args->ls_active || (args->ls_active && args->ls_frozen)) + if (args->ls_active || args->ls_frozen) num = list_active_containers(path, &containers, NULL); else num = list_all_containers(path, &containers, NULL); diff --git a/src/lxc/tools/lxc_top.c b/src/lxc/tools/lxc_top.c index d8e7247..797ff3c 100644 --- a/src/lxc/tools/lxc_top.c +++ b/src/lxc/tools/lxc_top.c @@ -513,5 +513,5 @@ int main(int argc, char *argv[]) err1: lxc_mainloop_close(&descr); out: - exit(EXIT_FAILURE); + exit(ret); } diff --git a/src/lxc/tools/lxc_unshare.c b/src/lxc/tools/lxc_unshare.c index 82c8244..a0f943f 100644 --- a/src/lxc/tools/lxc_unshare.c +++ b/src/lxc/tools/lxc_unshare.c @@ -225,7 +225,7 @@ int main(int argc, char *argv[]) * * then we memmove() * - * dest: del + 1 == ONT|PID + * dest: del + 1 == OUNT|PID * src: del + 3 == NT|PID */ while ((del = strstr(namespaces, "MOUNT"))) diff --git a/src/lxc/utils.c b/src/lxc/utils.c index 0227c32..778d4da 100644 --- a/src/lxc/utils.c +++ b/src/lxc/utils.c @@ -1014,7 +1014,7 @@ int randseed(bool srand_it) /* srand pre-seed function based on /dev/urandom */ - unsigned int seed=time(NULL)+getpid(); + unsigned int seed = time(NULL) + getpid(); FILE *f; f = fopen("/dev/urandom", "r"); @@ -1199,7 +1199,7 @@ bool detect_ramfs_rootfs(void) return false; } -char *on_path(char *cmd, const char *rootfs) { +char *on_path(const char *cmd, const char *rootfs) { char *path = NULL; char *entry = NULL; char *saveptr = NULL; @@ -1405,11 +1405,8 @@ char *get_template_path(const char *t) } /* - * Sets the process title to the specified title. Note: - * 1. this function requires root to succeed - * 2. it clears /proc/self/environ - * 3. it may not succed (e.g. if title is longer than /proc/self/environ + - * the original title) + * Sets the process title to the specified title. Note that this may fail if + * the kernel doesn't support PR_SET_MM_MAP (kernels <3.18). */ int setproctitle(char *title) { @@ -1463,34 +1460,24 @@ int setproctitle(char *title) if (!tmp) return -1; - i = sscanf(tmp, "%lu %lu %lu %lu %lu %lu %lu", + i = sscanf(tmp, "%lu %lu %lu %*u %*u %lu %lu", &start_data, &end_data, &start_brk, - &arg_start, - &arg_end, &env_start, &env_end); - if (i != 7) + if (i != 5) return -1; /* Include the null byte here, because in the calculations below we * want to have room for it. */ len = strlen(title) + 1; - /* If we don't have enough room by just overwriting the old proctitle, - * let's allocate a new one. - */ - if (len > arg_end - arg_start) { - void *m; - m = realloc(proctitle, len); - if (!m) - return -1; - proctitle = m; - - arg_start = (unsigned long) proctitle; - } + proctitle = realloc(proctitle, len); + if (!proctitle) + return -1; + arg_start = (unsigned long) proctitle; arg_end = arg_start + len; brk_val = syscall(__NR_brk, 0); @@ -1767,7 +1754,7 @@ int safe_mount(const char *src, const char *dest, const char *fstype, * * NOTE: not to be called from inside the container namespace! */ -int mount_proc_if_needed(const char *rootfs) +int lxc_mount_proc_if_needed(const char *rootfs) { char path[MAXPATHLEN]; char link[20]; @@ -1779,37 +1766,48 @@ int mount_proc_if_needed(const char *rootfs) SYSERROR("proc path name too long"); return -1; } + memset(link, 0, 20); linklen = readlink(path, link, 20); mypid = (int)getpid(); - INFO("I am %d, /proc/self points to '%s'", mypid, link); + INFO("I am %d, /proc/self points to \"%s\"", mypid, link); + ret = snprintf(path, MAXPATHLEN, "%s/proc", rootfs); if (ret < 0 || ret >= MAXPATHLEN) { SYSERROR("proc path name too long"); return -1; } - if (linklen < 0) /* /proc not mounted */ + + /* /proc not mounted */ + if (linklen < 0) { + if (mkdir(path, 0755) && errno != EEXIST) + return -1; goto domount; + } + if (lxc_safe_int(link, &link_to_pid) < 0) return -1; + + /* wrong /procs mounted */ if (link_to_pid != mypid) { - /* wrong /procs mounted */ - umount2(path, MNT_DETACH); /* ignore failure */ + /* ignore failure */ + umount2(path, MNT_DETACH); goto domount; } + /* the right proc is already mounted */ return 0; domount: - if (!strcmp(rootfs,"")) /* rootfs is NULL */ + /* rootfs is NULL */ + if (!strcmp(rootfs,"")) ret = mount("proc", path, "proc", 0, NULL); else ret = safe_mount("proc", path, "proc", 0, NULL, rootfs); - if (ret < 0) return -1; - INFO("Mounted /proc in container for security transition"); + INFO("mounted /proc in container for security transition"); return 1; } @@ -2083,3 +2081,157 @@ int lxc_setgroups(int size, gid_t list[]) return 0; } + +static int lxc_get_unused_loop_dev_legacy(char *loop_name) +{ + struct dirent *dp; + struct loop_info64 lo64; + DIR *dir; + int dfd = -1, fd = -1, ret = -1; + + dir = opendir("/dev"); + if (!dir) + return -1; + + while ((dp = readdir(dir))) { + if (!dp) + break; + + if (strncmp(dp->d_name, "loop", 4) != 0) + continue; + + dfd = dirfd(dir); + if (dfd < 0) + continue; + + fd = openat(dfd, dp->d_name, O_RDWR); + if (fd < 0) + continue; + + ret = ioctl(fd, LOOP_GET_STATUS64, &lo64); + if (ret < 0) { + if (ioctl(fd, LOOP_GET_STATUS64, &lo64) == 0 || + errno != ENXIO) { + close(fd); + fd = -1; + continue; + } + } + + ret = snprintf(loop_name, LO_NAME_SIZE, "/dev/%s", dp->d_name); + if (ret < 0 || ret >= LO_NAME_SIZE) { + close(fd); + fd = -1; + continue; + } + + break; + } + + closedir(dir); + + if (fd < 0) + return -1; + + return fd; +} + +static int lxc_get_unused_loop_dev(char *name_loop) +{ + int loop_nr, ret; + int fd_ctl = -1, fd_tmp = -1; + + fd_ctl = open("/dev/loop-control", O_RDWR | O_CLOEXEC); + if (fd_ctl < 0) + return -ENODEV; + + loop_nr = ioctl(fd_ctl, LOOP_CTL_GET_FREE); + if (loop_nr < 0) + goto on_error; + + ret = snprintf(name_loop, LO_NAME_SIZE, "/dev/loop%d", loop_nr); + if (ret < 0 || ret >= LO_NAME_SIZE) + goto on_error; + + fd_tmp = open(name_loop, O_RDWR | O_CLOEXEC); + if (fd_tmp < 0) + goto on_error; + +on_error: + close(fd_ctl); + return fd_tmp; +} + +int lxc_prepare_loop_dev(const char *source, char *loop_dev, int flags) +{ + int ret; + struct loop_info64 lo64; + int fd_img = -1, fret = -1, fd_loop = -1; + + fd_loop = lxc_get_unused_loop_dev(loop_dev); + if (fd_loop < 0) { + if (fd_loop == -ENODEV) + fd_loop = lxc_get_unused_loop_dev_legacy(loop_dev); + else + goto on_error; + } + + fd_img = open(source, O_RDWR | O_CLOEXEC); + if (fd_img < 0) + goto on_error; + + ret = ioctl(fd_loop, LOOP_SET_FD, fd_img); + if (ret < 0) + goto on_error; + + memset(&lo64, 0, sizeof(lo64)); + lo64.lo_flags = flags; + + ret = ioctl(fd_loop, LOOP_SET_STATUS64, &lo64); + if (ret < 0) + goto on_error; + + fret = 0; + +on_error: + if (fd_img >= 0) + close(fd_img); + + if (fret < 0 && fd_loop >= 0) { + close(fd_loop); + fd_loop = -1; + } + + return fd_loop; +} + +int lxc_unstack_mountpoint(const char *path, bool lazy) +{ + int ret; + int umounts = 0; + +pop_stack: + ret = umount2(path, lazy ? MNT_DETACH : 0); + if (ret < 0) { + /* We consider anything else than EINVAL deadly to prevent going + * into an infinite loop. (The other alternative is constantly + * parsing /proc/self/mountinfo which is yucky and probably + * racy.) + */ + if (errno != EINVAL) + return -errno; + } else { + /* Just stop counting when this happens. That'd just be so + * stupid that we won't even bother trying to report back the + * correct value anymore. + */ + if (umounts != INT_MAX) + umounts++; + /* We succeeded in umounting. Make sure that there's no other + * mountpoint stacked underneath. + */ + goto pop_stack; + } + + return umounts; +} diff --git a/src/lxc/utils.h b/src/lxc/utils.h index 2b56905..320aa6b 100644 --- a/src/lxc/utils.h +++ b/src/lxc/utils.h @@ -23,15 +23,19 @@ #ifndef __LXC_UTILS_H #define __LXC_UTILS_H +/* Properly support loop devices on 32bit systems. */ +#define _FILE_OFFSET_BITS 64 + #include "config.h" #include <errno.h> #include <stdarg.h> #include <stdio.h> #include <stdbool.h> +#include <unistd.h> +#include <linux/loop.h> #include <sys/syscall.h> #include <sys/types.h> -#include <unistd.h> #include "initutils.h" @@ -39,6 +43,7 @@ /* Maximum number for 64 bit integer is a string with 21 digits: 2^64 - 1 = 21 */ #define LXC_NUMSTRLEN64 21 #define LXC_LINELEN 4096 +#define LXC_IDMAPLEN 4096 /* returns 1 on success, 0 if there were any failures */ extern int lxc_rmdir_onedev(char *path, const char *exclude); @@ -163,6 +168,15 @@ static inline int signalfd(int fd, const sigset_t *mask, int flags) } #endif +/* loop devices */ +#ifndef LO_FLAGS_AUTOCLEAR +#define LO_FLAGS_AUTOCLEAR 4 +#endif + +#ifndef LOOP_CTL_GET_FREE +#define LOOP_CTL_GET_FREE 0x4C82 +#endif + /* Struct to carry child pid from lxc_popen() to lxc_pclose(). * Not an opaque struct to allow direct access to the underlying FILE * * (i.e., struct lxc_popen_FILE *file; fgets(buf, sizeof(buf), file->f)) @@ -301,7 +315,7 @@ uint64_t fnv_64a_buf(void *buf, size_t len, uint64_t hval); int detect_shared_rootfs(void); bool detect_ramfs_rootfs(void); -char *on_path(char *cmd, const char *rootfs); +char *on_path(const char *cmd, const char *rootfs); bool file_exists(const char *f); bool cgns_supported(void); char *choose_init(const char *rootfs); @@ -312,7 +326,7 @@ char *get_template_path(const char *t); int setproctitle(char *title); int safe_mount(const char *src, const char *dest, const char *fstype, unsigned long flags, const void *data, const char *rootfs); -int mount_proc_if_needed(const char *rootfs); +int lxc_mount_proc_if_needed(const char *rootfs); int open_devnull(void); int set_stdfds(int fd); int null_stdfds(void); @@ -331,4 +345,14 @@ int lxc_safe_long(const char *numstr, long int *converted); int lxc_switch_uid_gid(uid_t uid, gid_t gid); int lxc_setgroups(int size, gid_t list[]); +/* Find an unused loop device and associate it with source. */ +int lxc_prepare_loop_dev(const char *source, char *loop_dev, int flags); + +/* Clear all mounts on a given node. + * >= 0 successfully cleared. The number returned is the number of umounts + * performed. + * < 0 error umounting. Return -errno. + */ +int lxc_unstack_mountpoint(const char *path, bool lazy); + #endif /* __LXC_UTILS_H */ diff --git a/src/lxc/version.h b/src/lxc/version.h index 7ebf428..d65ed7e 100644 --- a/src/lxc/version.h +++ b/src/lxc/version.h @@ -26,8 +26,8 @@ #define LXC_DEVEL 0 #define LXC_VERSION_MAJOR 2 #define LXC_VERSION_MINOR 0 -#define LXC_VERSION_MICRO 7 +#define LXC_VERSION_MICRO 8 #define LXC_VERSION_ABI "1.2.0" -#define LXC_VERSION "2.0.7" +#define LXC_VERSION "2.0.8" #endif diff --git a/src/python-lxc/lxc.c b/src/python-lxc/lxc.c index 4f637d0..5f15072 100644 --- a/src/python-lxc/lxc.c +++ b/src/python-lxc/lxc.c @@ -353,7 +353,14 @@ LXC_get_global_config_item(PyObject *self, PyObject *args, PyObject *kwds) static PyObject * LXC_get_version(PyObject *self, PyObject *args) { - return PyUnicode_FromString(lxc_get_version()); + const char *rv = NULL; + + rv = lxc_get_version(); + if (!rv) { + return PyUnicode_FromString(""); + } + + return PyUnicode_FromString(rv); } static PyObject * @@ -407,6 +414,10 @@ LXC_list_containers(PyObject *self, PyObject *args, PyObject *kwds) /* Generate the tuple */ list = PyTuple_New(list_count); for (i = 0; i < list_count; i++) { + if (!names[i]) { + continue; + } + PyTuple_SET_ITEM(list, i, PyUnicode_FromString(names[i])); free(names[i]); } @@ -451,7 +462,7 @@ Container_init(Container *self, PyObject *args, PyObject *kwds) Py_XDECREF(fs_config_path); PyErr_Format(PyExc_RuntimeError, "%s:%s:%d: error during init for container '%s'.", - __FUNCTION__, __FILE__, __LINE__, name); + __FUNCTION__, __FILE__, __LINE__, name); return -1; } @@ -473,8 +484,14 @@ Container_new(PyTypeObject *type, PyObject *args, PyObject *kwds) static PyObject * Container_config_file_name(Container *self, void *closure) { - return PyUnicode_FromString( - self->container->config_file_name(self->container)); + char *rv = NULL; + + rv = self->container->config_file_name(self->container); + if (!rv) { + return PyUnicode_FromString(""); + } + + return PyUnicode_FromString(rv); } static PyObject * @@ -506,6 +523,10 @@ Container_init_pid(Container *self, void *closure) static PyObject * Container_name(Container *self, void *closure) { + if (!self->container->name) { + return PyUnicode_FromString(""); + } + return PyUnicode_FromString(self->container->name); } @@ -522,7 +543,15 @@ Container_running(Container *self, void *closure) static PyObject * Container_state(Container *self, void *closure) { - return PyUnicode_FromString(self->container->state(self->container)); + const char *rv = NULL; + + rv = self->container->state(self->container); + + if (!rv) { + return PyUnicode_FromString(""); + } + + return PyUnicode_FromString(rv); } /* Container Functions */ @@ -946,8 +975,15 @@ Container_get_config_item(Container *self, PyObject *args, PyObject *kwds) static PyObject * Container_get_config_path(Container *self, PyObject *args, PyObject *kwds) { - return PyUnicode_FromString( - self->container->get_config_path(self->container)); + const char *rv = NULL; + + rv = self->container->get_config_path(self->container); + + if (!rv) { + return PyUnicode_FromString(""); + } + + return PyUnicode_FromString(rv); } static PyObject * @@ -1011,6 +1047,11 @@ Container_get_interfaces(Container *self) /* Add the entries to the tuple and free the memory */ i = 0; while (interfaces[i]) { + if (!interfaces[i]) { + i++; + continue; + } + PyObject *unicode = PyUnicode_FromString(interfaces[i]); if (!unicode) { Py_DECREF(ret); @@ -1066,6 +1107,11 @@ Container_get_ips(Container *self, PyObject *args, PyObject *kwds) /* Add the entries to the tuple and free the memory */ i = 0; while (ips[i]) { + if (!ips[i]) { + i++; + continue; + } + PyObject *unicode = PyUnicode_FromString(ips[i]); if (!unicode) { Py_DECREF(ret); diff --git a/templates/lxc-alpine.in b/templates/lxc-alpine.in index 06616b3..e66c469 100644 --- a/templates/lxc-alpine.in +++ b/templates/lxc-alpine.in @@ -46,7 +46,7 @@ ebf31683b56410ecc4c00acd9f6e2839e237a3b62b5ae7ef686705c7ba0396a9 alpine-devel@l 12f899e55a7691225603d6fb3324940fc51cd7f133e7ead788663c2b7eecb00c alpine-de...@lists.alpinelinux.org-5261cecb.rsa.pub" readonly APK_KEYS_URI='http://alpinelinux.org/keys' -readonly MIRRORS_LIST_URL='http://rsync.alpinelinux.org/alpine/MIRRORS.txt' +readonly DEFAULT_MIRROR_URL='http://dl-cdn.alpinelinux.org/alpine' : ${APK_KEYS_DIR:=/etc/apk/keys} if ! ls "$APK_KEYS_DIR"/alpine* >/dev/null 2>&1; then @@ -76,7 +76,7 @@ usage() { to the host arch. -d, --debug Run this script in a debug mode (set -x and wget w/o -q). -F, --flush-cache Remove cached files before build. - -m URL --mirror=URL The Alpine mirror to use; defaults to random mirror. + -m URL --mirror=URL The Alpine mirror to use; defaults to $DEFAULT_MIRROR_URL. -r VER, --release=VER The Alpine release branch to install; default is the latest stable. @@ -130,11 +130,6 @@ parse_arch() { esac } -random_mirror_url() { - local url=$(fetch "$MIRRORS_LIST_URL" | shuf -n 1) - [ -n "$url" ] && echo "$url" -} - run_exclusively() { local lock_name="$1" local timeout=$2 @@ -266,8 +261,8 @@ install() { } install_packages() { - local arch="$1"; shift - local packages="$@" + local arch="$1" + local packages="$2" $APK --arch="$arch" --root=. --keys-dir="$APK_KEYS_DIR" \ --update-cache --initdb add $packages @@ -475,7 +470,7 @@ extra_packages="$@" # Set global variables. readonly DEBUG="$debug" readonly FLUSH_CACHE="$flush_cache" -readonly MIRROR_URL="${mirror_url:-$(random_mirror_url)}" +readonly MIRROR_URL="${mirror_url:-$DEFAULT_MIRROR_URL}" # Validate options. [ -n "$name" ] || die 1 'Missing required option --name' diff --git a/templates/lxc-altlinux.in b/templates/lxc-altlinux.in index 69c18d4..7accf24 100644 --- a/templates/lxc-altlinux.in +++ b/templates/lxc-altlinux.in @@ -43,7 +43,6 @@ cache_base=@LOCALSTATEDIR@/cache/lxc/altlinux/$arch default_path=@LXCPATH@ default_profile=default profile_dir=/etc/lxc/profiles -root_password=rooter lxc_network_type=veth lxc_network_link=virbr0 @@ -156,8 +155,10 @@ EOF mkdir -m 755 ${dev_path}/net mknod -m 666 ${dev_path}/net/tun c 10 200 - echo "setting root passwd to $root_password" - echo "root:$root_password" | chroot $rootfs_path chpasswd + if [ -n "${root_password}" ]; then + echo "setting root passwd to $root_password" + echo "root:$root_password" | chroot $rootfs_path chpasswd + fi return 0 } diff --git a/templates/lxc-archlinux.in b/templates/lxc-archlinux.in index c52459d..200b84e 100644 --- a/templates/lxc-archlinux.in +++ b/templates/lxc-archlinux.in @@ -42,7 +42,6 @@ export PATH=$PATH:/usr/sbin:/usr/bin:/sbin:/bin arch=$(uname -m) default_path="@LXCPATH@" default_locale="en-US.UTF-8" -default_timezone="UTC" pacman_config="/etc/pacman.conf" common_config="@LXCTEMPLATECONFIG@/common.conf" shared_config="@LXCTEMPLATECONFIG@/archlinux.common.conf" @@ -87,9 +86,6 @@ configure_arch() { # hostname and nameservers echo "${name}" > "${rootfs_path}/etc/hostname" - while read r; do - [ "${r#nameserver}" = "$r" ] || echo "$r" - done < /etc/resolv.conf > "${rootfs_path}/etc/resolv.conf" # network configuration cat > "${rootfs_path}/etc/systemd/network/eth0.network" << EOF @@ -104,7 +100,6 @@ EOF arch-chroot "${rootfs_path}" /bin/bash -s << EOF mkdir /run/lock locale-gen -ln -s /usr/share/zoneinfo/${default_timezone} /etc/localtime # set default boot target ln -s /lib/systemd/system/multi-user.target /etc/systemd/system/default.target # override getty@.service for container ttys diff --git a/templates/lxc-busybox.in b/templates/lxc-busybox.in index 336fa12..0d8db33 100644 --- a/templates/lxc-busybox.in +++ b/templates/lxc-busybox.in @@ -330,35 +330,6 @@ configure_busybox() chmod +s $rootfs/bin/passwd touch $rootfs/etc/shadow - # setting passwd for root - CHPASSWD_FILE=$rootfs/root/chpasswd.sh - - cat <<EOF >$CHPASSWD_FILE -echo "setting root password to \"root\"" - -mount -n --bind /lib $rootfs/lib -if [ \$? -ne 0 ]; then - echo "Failed bind-mounting /lib at $rootfs/lib" - exit 1 -fi - -chroot $rootfs chpasswd <<EOFF 2>/dev/null -root:root -EOFF - - -if [ \$? -ne 0 ]; then - echo "Failed to change root password" - exit 1 -fi - -umount $rootfs/lib - -EOF - - lxc-unshare -s MOUNT -- /bin/sh < $CHPASSWD_FILE - rm $CHPASSWD_FILE - return 0 } diff --git a/templates/lxc-debian.in b/templates/lxc-debian.in index f752ccd..4477aff 100644 --- a/templates/lxc-debian.in +++ b/templates/lxc-debian.in @@ -158,9 +158,6 @@ EOF echo "Timezone in container is not configured. Adjust it manually." fi - echo "root:root" | chroot "$rootfs" chpasswd - echo "Root password is 'root', please change !" - return 0 } @@ -291,9 +288,6 @@ openssh-server if [ ! -f $releasekeyring ]; then releasekeyring="$cache/archive-key.gpg" case $release in - "squeeze") - gpgkeyname="archive-key-6.0" - ;; "wheezy") gpgkeyname="archive-key-7.0" ;; diff --git a/templates/lxc-download.in b/templates/lxc-download.in index e0ffdb2..f09475d 100644 --- a/templates/lxc-download.in +++ b/templates/lxc-download.in @@ -34,7 +34,6 @@ DOWNLOAD_FLUSH_CACHE="false" DOWNLOAD_FORCE_CACHE="false" DOWNLOAD_INTERACTIVE="false" DOWNLOAD_KEYID="0xE7FB0CAEC8173D669066514CBAEFF88C22F6E216" -DOWNLOAD_KEYSERVER="hkp://pool.sks-keyservers.net" DOWNLOAD_LIST_IMAGES="false" DOWNLOAD_MODE="system" DOWNLOAD_READY_GPG="false" @@ -54,9 +53,13 @@ LXC_NAME= LXC_PATH= LXC_ROOTFS= -# Deal with GPG over http proxy -if [ -n "${http_proxy:-}" ]; then - DOWNLOAD_KEYSERVER="hkp://p80.pool.sks-keyservers.net:80" +if [ -z "${DOWNLOAD_KEYSERVER:-}" ]; then + DOWNLOAD_KEYSERVER="hkp://pool.sks-keyservers.net" + + # Deal with GPG over http proxy + if [ -n "${http_proxy:-}" ]; then + DOWNLOAD_KEYSERVER="hkp://p80.pool.sks-keyservers.net:80" + fi fi # Make sure the usual locations are in PATH @@ -201,7 +204,7 @@ Optional arguments: [ --variant <variant> ]: Variant of the image (default: "default") [ --server <server> ]: Image server (default: "images.linuxcontainers.org") [ --keyid <keyid> ]: GPG keyid (default: 0x...) -[ --keyserver <keyserver> ]: GPG keyserver to use +[ --keyserver <keyserver> ]: GPG keyserver to use. Environment variable: DOWNLOAD_KEYSERVER [ --no-validate ]: Disable GPG validation (not recommended) [ --flush-cache ]: Flush the local copy (if present) [ --force-cache ]: Force the use of the local copy even if expired @@ -212,6 +215,11 @@ LXC internal arguments (do not pass manually!): [ --rootfs <rootfs> ]: The path to the container's rootfs [ --mapped-uid <map> ]: A uid map (user namespaces) [ --mapped-gid <map> ]: A gid map (user namespaces) + +Environment Variables: +DOWNLOAD_KEYSERVER : The URL of the key server to use, instead of the default. + Can be further overridden by using optional argument --keyserver + EOF return 0 } diff --git a/templates/lxc-gentoo.in b/templates/lxc-gentoo.in index 2ad16e8..47f24d0 100644 --- a/templates/lxc-gentoo.in +++ b/templates/lxc-gentoo.in @@ -654,8 +654,6 @@ container_auth() printf " => done. if you didn't specify , default is 'toor'\n" if [[ -n "${forced_password}" ]]; then store_user_message "${user} has the password you give for him" - else - store_user_message "${user} has the default password 'toor', please change it ASAP" fi fi @@ -779,7 +777,6 @@ set_default_arch mirror="http://distfiles.gentoo.org" user="root" -password="toor" tty=1 settings="common" options=$(getopt -o hp:n:a:FcPv:t:S:u:w:s:m: -l help,rootfs:,path:,name:,arch:,flush-cache,cache-only,private-portage,variant:,portage-dir:,tarball:,auth-key:,user:,autologin,password:,settings:,mirror:,tty: -- "$@") diff --git a/templates/lxc-openmandriva.in b/templates/lxc-openmandriva.in index 12f9985..daba812 100644 --- a/templates/lxc-openmandriva.in +++ b/templates/lxc-openmandriva.in @@ -46,7 +46,6 @@ hostarch=$(uname -m) cache_base="${LXC_CACHE_PATH:-@LOCALSTATEDIR@/cache/lxc/openmandriva/$arch}" default_path=@LXCPATH@ default_profile=default -root_password=root lxc_network_type=veth lxc_network_link=br0 diff --git a/templates/lxc-opensuse.in b/templates/lxc-opensuse.in index 66176c3..5e8686b 100644 --- a/templates/lxc-opensuse.in +++ b/templates/lxc-opensuse.in @@ -112,7 +112,6 @@ EOF touch $rootfs/etc/sysconfig/kernel echo "Please change root-password !" - echo "root:root" | chpasswd -R $rootfs return 0 } @@ -459,7 +458,7 @@ fi if [ -z "$DISTRO" ]; then echo "" echo "No release selected, using openSUSE Leap 42.2" - DISTRO=42.2 + DISTRO="leap/42.2" else echo "" case "$DISTRO" in diff --git a/templates/lxc-oracle.in b/templates/lxc-oracle.in index 20c212b..19fe912 100644 --- a/templates/lxc-oracle.in +++ b/templates/lxc-oracle.in @@ -462,12 +462,10 @@ EOF fi fi - # add oracle user, set root password + # add oracle user chroot $container_rootfs useradd -m -s /bin/bash oracle - echo "oracle:oracle" | chroot $container_rootfs chpasswd - echo "root:root" | chroot $container_rootfs chpasswd - printf "Added container user:\033[1moracle\033[0m password:\033[1moracle\033[0m\n" - printf "Added container user:\033[1mroot\033[0m password:\033[1mroot\033[0m\n" + printf "Added container user:\033[1moracle\033[0m\n" + printf "Added container user:\033[1mroot\033[0m\n" } # create the container's lxc config file diff --git a/templates/lxc-plamo.in b/templates/lxc-plamo.in index 009fa4f..c96e23e 100644 --- a/templates/lxc-plamo.in +++ b/templates/lxc-plamo.in @@ -186,9 +186,6 @@ configure_plamo() { # glibc configure mv $rootfs/etc/ld.so.conf{.new,} chroot $rootfs ldconfig - # root password - echo "Setting root password to 'root'..." - echo "root:root" | chroot $rootfs chpasswd echo "Please change root password!" ed - $rootfs/etc/rc.d/rc.S <<- "EOF" /^mount -w -n -t proc/;/^mkdir \/dev\/shm/-1d diff --git a/templates/lxc-slackware.in b/templates/lxc-slackware.in index 5005918..216c7a7 100644 --- a/templates/lxc-slackware.in +++ b/templates/lxc-slackware.in @@ -471,10 +471,6 @@ sed -i 's/.*genpowerfail.*//' $rootfs/etc/inittab # add a message to rc.local that confirms successful container startup echo "echo ; echo \"* container $name started. *\" ; echo" >> $rootfs/etc/rc.d/rc.local -# set a default combination for the luggage -echo "root:root" | chroot $rootfs chpasswd -echo "Root default password is 'root', please change it!" - # borrow the time configuration from the local machine cp -a /etc/localtime $rootfs/etc/localtime diff --git a/templates/lxc-sparclinux.in b/templates/lxc-sparclinux.in index 70616ba..124c50b 100644 --- a/templates/lxc-sparclinux.in +++ b/templates/lxc-sparclinux.in @@ -296,12 +296,10 @@ EOF echo "Timezone in container is not configured. Adjust it manually." fi - # add oracle user, set root password + # add oracle user chroot $container_rootfs useradd -m -s /bin/bash oracle - echo "oracle:oracle" | chroot $container_rootfs chpasswd - echo "root:root" | chroot $container_rootfs chpasswd - printf "Added container user:\033[1moracle\033[0m password:\033[1moracle\033[0m\n" - printf "Added container user:\033[1mroot\033[0m password:\033[1mroot\033[0m\n" + printf "Added container user:\033[1moracle\033[0m\n" + printf "Added container user:\033[1mroot\033[0m\n" } # create the container's lxc config file diff --git a/templates/lxc-sshd.in b/templates/lxc-sshd.in index 9b07ea6..7db13cc 100644 --- a/templates/lxc-sshd.in +++ b/templates/lxc-sshd.in @@ -38,7 +38,6 @@ install_sshd() rootfs=$1 tree="\ -$rootfs/var/run/sshd \ $rootfs/var/empty/sshd \ $rootfs/var/lib/empty/sshd \ $rootfs/etc/init.d \ @@ -46,7 +45,7 @@ $rootfs/etc/rc.d \ $rootfs/etc/ssh \ $rootfs/etc/sysconfig/network-scripts \ $rootfs/dev/shm \ -$rootfs/run/shm \ +$rootfs/run/sshd \ $rootfs/proc \ $rootfs/sys \ $rootfs/bin \ @@ -63,6 +62,11 @@ $rootfs/lib64" return 1 fi + ln -s /run $rootfs/var/run + if [ $? -ne 0 ]; then + return 1 + fi + return 0 } @@ -90,17 +94,13 @@ Protocol 2 HostKey /etc/ssh/ssh_host_rsa_key HostKey /etc/ssh/ssh_host_dsa_key UsePrivilegeSeparation yes -KeyRegenerationInterval 3600 -ServerKeyBits 768 SyslogFacility AUTH LogLevel INFO LoginGraceTime 120 PermitRootLogin yes StrictModes yes -RSAAuthentication yes PubkeyAuthentication yes IgnoreRhosts yes -RhostsRSAAuthentication no HostbasedAuthentication no PermitEmptyPasswords yes ChallengeResponseAuthentication no @@ -141,7 +141,7 @@ lxc.mount.entry = /lib lib none ro,bind 0 0 lxc.mount.entry = /bin bin none ro,bind 0 0 lxc.mount.entry = /usr usr none ro,bind 0 0 lxc.mount.entry = /sbin sbin none ro,bind 0 0 -lxc.mount.entry = tmpfs var/run/sshd tmpfs mode=0644 0 0 +lxc.mount.entry = tmpfs run/sshd tmpfs mode=0644 0 0 lxc.mount.entry = @LXCTEMPLATEDIR@/lxc-sshd $init_path none ro,bind 0 0 lxc.mount.entry = /etc/init.d etc/init.d none ro,bind 0 0 diff --git a/templates/lxc-ubuntu.in b/templates/lxc-ubuntu.in index 8320993..ae3a22a 100644 --- a/templates/lxc-ubuntu.in +++ b/templates/lxc-ubuntu.in @@ -674,7 +674,7 @@ $1 -h|--help [-a|--arch] [-b|--bindhome <user>] [-d|--debug] [-F | --flush-cache] [-r|--release <release>] [-v|--variant] [ -S | --auth-key <keyfile>] [--rootfs <rootfs>] [--packages <packages>] [-u|--user <user>] [--password <password>] [--mirror <url>] [--security-mirror <url>] -release: the ubuntu release (e.g. precise): defaults to host release on ubuntu, otherwise uses latest LTS +release: the ubuntu release (e.g. xenial): defaults to host release on ubuntu, otherwise uses latest LTS variant: debootstrap variant to use (see debootstrap(8)) bindhome: bind <user>'s home into the container The ubuntu user will not be created, and <user> will have @@ -694,7 +694,7 @@ if [ $? -ne 0 ]; then fi eval set -- "$options" -release=precise # Default to the last Ubuntu LTS release for non-Ubuntu systems +release=xenial # Default to the last Ubuntu LTS release for non-Ubuntu systems if [ -f /etc/lsb-release ]; then . /etc/lsb-release if [ "$DISTRIB_ID" = "Ubuntu" ]; then