Demi Marie Obenour <[email protected]> writes:

> On 10/17/25 08:55, Leah Neukirchen wrote:
>> Hello,
>> 
>> I'm happy to announce a new release of "nitro", a new supervisor akin
>> to daemontools/runit/perp/s6.
>> 
>> These are the main applications nitro is designed for:
>> - As init for a Linux or NetBSD machine for embedded, desktop or
>>   server purposes
>> - As init for a Linux initramfs
>> - As init for a Linux container (Docker/Podman/LXC/Kubernetes)
>> - As unprivileged supervision daemon on generic POSIX systems
>> 
>> nitro 0.5 is pretty much feature complete and now supports (relative
>> to the the previous release announcement):
>> - usage as NetBSD init
>> - s6-compatible readiness notification
>> - event-driven nitroctl which can perform multiple actions in parallel
>> - improved usability and robustness
>> - new test suite, which passes on new and old Linux, FreeBSD, NetBSD, OpenBSD
>
> Do you plan to support cgroups on Linux?  Those allow for much
> more robust supervision, including of daemons that do nasty things
> like double-fork.  It's also necessary to be able to terminate child
> daemons reliably without the use of PID namespaces.

I considered it, but I don't think support in PID 1 itself is
necessary (for the double fork perhaps, but most software doesn't
double fork anymore and it hurts supervision anyway).

Basically, you can do everything in scripts, say:

/etc/nitro/sshd/run:
#!/bin/sh
mkdir -p /sys/fs/cgroup/nitro/sshd
echo "+memory +cpu +io" > /sys/fs/cgroup/cgroup.subtree_control
echo "+memory +cpu +io" > /sys/fs/cgroup/nitro/cgroup.subtree_control
echo $$ >/sys/fs/cgroup/nitro/sshd/cgroup.procs
exec /usr/bin/sshd -D

(you can configure limits here of course)

/etc/nitro/sshd/finish:
#!/bin/sh
echo 1 > /sys/fs/cgroup/nitro/sshd/cgroup.kill

Unfortunately, cgroups only can be killed using SIGTERM,
I wrote a small tool cgslay to do this a bit more gently (see attached).

/* cgslay - gently kill a cgroup
 *
 * Usage: cgslay [-s SIGNAL] [-t TIMEOUT] /sys/fs/cgroup/...
 * -s SIGNAL   send signal (number), default 15 (SIGTERM)
 * -t TIMEOUT  use cgroup.kill after TIMEOUT seconds
 */

#include <sys/poll.h>
#include <sys/timerfd.h>

#include <errno.h>
#include <fcntl.h>
#include <signal.h>
#include <string.h>
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>

#define fatal(...) do { fprintf(stderr, __VA_ARGS__); exit(2); } while (0)

int eventsfd;
struct pollfd fds[2];

int
check_events(char *needle)
{
        static char buf[512];
        if (strstr(buf, needle))
                return 1;

        poll(fds, sizeof fds / sizeof fds[0], -1);

        if (fds[0].revents & POLLIN)
                return -1;

        lseek(fds[1].fd, 0, SEEK_SET);
        ssize_t n = read(fds[1].fd, buf, sizeof buf - 1);
        if (n < 0)
                fatal("can't read from eventsfd: %s\n", strerror(errno));
        buf[n+1] = 0;
        if (strstr(buf, needle))
                return 1;

        return 0;
}

int
main(int argc, char *argv[])
{
        int sig = SIGTERM;
        int timeout = -1;

        int opt;
        while ((opt = getopt(argc, argv, "s:t:")) != -1)
                switch (opt) {
                case 's': sig = atoi(optarg); break;
                case 't': timeout = atoi(optarg); break;
                default: goto usage;
                }

        if (argc != optind + 1) {
usage:
                fatal("Usage: cgslay [-s SIGNAL] [-t TIMEOUT] 
/sys/fs/cgroup/...\n");
        }

        char *cgroup = argv[optind];

        int dirfd = open(cgroup, O_RDONLY | O_DIRECTORY);
        if (dirfd < 0)
                fatal("can't open cgroup directory %s: %s\n", cgroup, 
strerror(errno));

        eventsfd = openat(dirfd, "cgroup.events", O_RDONLY);
        if (eventsfd < 0)
                fatal("can't open cgroup.events: %s\n", strerror(errno));

        if (sig == SIGKILL)
                goto kill;

        if (timeout > 0) {
                int timerfd = timerfd_create(CLOCK_MONOTONIC, 0);
                if (timerfd < 0)
                        fatal("timerfd_create: %s\n", strerror(errno));
                timerfd_settime(timerfd, 0,
                    &(struct itimerspec) { .it_value = { .tv_sec = timeout } },
                    0);
                fds[0].fd = timerfd;
                fds[0].events = POLLIN;
        } else {
                fds[0].fd = -1;
        }

        fds[1].fd = eventsfd;
        fds[1].events = POLLPRI;

        int r;
        r = check_events("populated 0\n");
        if (r == 1)
                exit(0);

        int freezefd = openat(dirfd, "cgroup.freeze", O_WRONLY);
        if (freezefd < 0)
                fatal("can't open cgroup.freeze: %s\n", strerror(errno));

        write(freezefd, "1", 1);
        while ((r = check_events("frozen 1\n")) == 0)
                ;

        if (r < 0) {
                write(freezefd, "0", 1);
                goto kill;
        }

        int procsfd = openat(dirfd, "cgroup.procs", O_RDONLY);
        if (procsfd < 0) {
                write(freezefd, "0", 1);
                fatal("can't open cgroup.procs: %s\n", strerror(errno));
        }

        FILE *procsfile = fdopen(procsfd, "r");
        if (!procsfile) {
                write(freezefd, "0", 1);
                fatal("can't fdopen %d: %s\n", procsfd, strerror(errno));
        }

        long pid;
        while (fscanf(procsfile, "%ld%*[\n]", &pid) == 1) {
                kill(pid, sig);
                if (sig && sig != SIGCONT)
                        kill(pid, SIGCONT);
        }

        write(freezefd, "0", 1);
        while ((r = check_events("frozen 0\n")) == 0)
                ;

        if (timeout < 0)
                exit(0);

        while ((r = check_events("populated 0\n")) == 0)
                ;

        if (r == 1)
                exit(0);

kill:
        int killfd = openat(dirfd, "cgroup.kill", O_WRONLY);
        if (killfd < 0)
                fatal("can't open cgroup.kill: %s\n", strerror(errno));

        write(killfd, "1", 1);

        exit(0);
}
hth,
-- 
Leah Neukirchen  <[email protected]>  https://leahneukirchen.org

Reply via email to