The pid_max kselftest hardcodes pid_max values of 400 and 500, but the
kernel enforces a minimum of PIDS_PER_CPU_MIN * num_possible_cpus().
On machines with many possible CPUs (e.g. nr_cpu_ids=128 yields a
minimum of 1024), writing 400 or 500 to /proc/sys/kernel/pid_max
returns EINVAL and all three tests fail.

Compute these limits the same way as the kernel does and set outer_limit
and inner_limit dynamically based on the result. Original test semantics
are preserved (outer < inner, nested namespace capped by parent).

Signed-off-by: Bjoern Doebel <[email protected]>
---
 .../testing/selftests/pid_namespace/pid_max.c | 153 ++++++++++++++----
 1 file changed, 119 insertions(+), 34 deletions(-)

diff --git a/tools/testing/selftests/pid_namespace/pid_max.c 
b/tools/testing/selftests/pid_namespace/pid_max.c
index c9519e7385b6..8bd600f55421 100644
--- a/tools/testing/selftests/pid_namespace/pid_max.c
+++ b/tools/testing/selftests/pid_namespace/pid_max.c
@@ -12,10 +12,76 @@
 #include <syscall.h>
 #include <sys/mount.h>
 #include <sys/wait.h>
+#include <unistd.h>
 
 #include "kselftest_harness.h"
 #include "../pidfd/pidfd.h"
 
+/*
+ * The kernel computes the minimum allowed pid_max as:
+ *   max(RESERVED_PIDS + 1, PIDS_PER_CPU_MIN * num_possible_cpus())
+ * Mirror that here so the test values are always valid.
+ *
+ * Note: glibc's get_nprocs_conf() returns the number of *configured*
+ * (present) CPUs, not *possible* CPUs.  The kernel uses
+ * num_possible_cpus() which corresponds to /sys/devices/system/cpu/possible.
+ * These can differ significantly (e.g. 16 configured vs 128 possible).
+ */
+#define RESERVED_PIDS          300
+#define PIDS_PER_CPU_MIN       8
+
+/* Count CPUs from a range list like "0-31" or "0-15,32-47". */
+static int num_possible_cpus(void)
+{
+       FILE *f;
+       int count = 0;
+       int lo, hi;
+
+       f = fopen("/sys/devices/system/cpu/possible", "r");
+       if (!f)
+               return 0;
+
+       while (fscanf(f, "%d", &lo) == 1) {
+               if (fscanf(f, "-%d", &hi) == 1)
+                       count += hi - lo + 1;
+               else
+                       count++;
+               /* skip comma separator */
+               fscanf(f, ",");
+       }
+
+       fclose(f);
+       return count;
+}
+
+static int pid_min(void)
+{
+       int cpu_min = PIDS_PER_CPU_MIN * num_possible_cpus();
+
+       return cpu_min > (RESERVED_PIDS + 1) ? cpu_min : (RESERVED_PIDS + 1);
+}
+
+struct pid_max_cfg {
+       int outer;
+       int inner;
+};
+
+/*
+ * Outer and inner pid_max limits used by the tests.  The outer limit is
+ * the more restrictive ancestor; the inner limit is set higher in a
+ * nested namespace but must still be capped by the outer limit.
+ * Both are derived from the kernel's minimum so they are always writable.
+ */
+#define PID_MAX_CFG_INIT { .outer = pid_min() + 100, .inner = pid_min() + 200 }
+
+static int write_int_to_fd(int fd, int val)
+{
+       char buf[24];
+       int len = snprintf(buf, sizeof(buf), "%d", val);
+
+       return write(fd, buf, len);
+}
+
 #define __STACK_SIZE (8 * 1024 * 1024)
 static pid_t do_clone(int (*fn)(void *), void *arg, int flags)
 {
@@ -37,6 +103,7 @@ static pid_t do_clone(int (*fn)(void *), void *arg, int 
flags)
 
 static int pid_max_cb(void *data)
 {
+       struct pid_max_cfg *cfg = data;
        int fd, ret;
        pid_t pid;
 
@@ -60,18 +127,18 @@ static int pid_max_cb(void *data)
                return -1;
        }
 
-       ret = write(fd, "500", sizeof("500") - 1);
+       ret = write_int_to_fd(fd, cfg->inner);
        if (ret < 0) {
                fprintf(stderr, "%m - Failed to write pid_max\n");
                return -1;
        }
 
-       for (int i = 0; i < 501; i++) {
+       for (int i = 0; i < cfg->inner + 1; i++) {
                pid = fork();
                if (pid == 0)
                        exit(EXIT_SUCCESS);
                wait_for_pid(pid);
-               if (pid > 500) {
+               if (pid > cfg->inner) {
                        fprintf(stderr, "Managed to create pid number beyond 
limit\n");
                        return -1;
                }
@@ -82,6 +149,7 @@ static int pid_max_cb(void *data)
 
 static int pid_max_nested_inner(void *data)
 {
+       struct pid_max_cfg *cfg = data;
        int fret = -1;
        pid_t pids[2];
        int fd, i, ret;
@@ -106,7 +174,7 @@ static int pid_max_nested_inner(void *data)
                return fret;
        }
 
-       ret = write(fd, "500", sizeof("500") - 1);
+       ret = write_int_to_fd(fd, cfg->inner);
        close(fd);
        if (ret < 0) {
                fprintf(stderr, "%m - Failed to write pid_max\n");
@@ -133,8 +201,8 @@ static int pid_max_nested_inner(void *data)
                return fret;
        }
 
-       /* Now make sure that we wrap pids at 400. */
-       for (i = 0; i < 510; i++) {
+       /* Now make sure that we wrap pids at outer_limit. */
+       for (i = 0; i < cfg->inner + 10; i++) {
                pid_t pid;
 
                pid = fork();
@@ -145,7 +213,7 @@ static int pid_max_nested_inner(void *data)
                        exit(EXIT_SUCCESS);
 
                wait_for_pid(pid);
-               if (pid >= 500) {
+               if (pid >= cfg->inner) {
                        fprintf(stderr, "Managed to create process with pid %d 
beyond configured limit\n", pid);
                        return fret;
                }
@@ -156,15 +224,20 @@ static int pid_max_nested_inner(void *data)
 
 static int pid_max_nested_outer(void *data)
 {
-       int fret = -1, nr_procs = 400;
-       pid_t pids[1000];
-       int fd, i, ret;
+       struct pid_max_cfg *cfg = data;
+       int fret = -1, nr_procs = 0;
+       pid_t *pids;
+       int fd, ret;
        pid_t pid;
 
+       pids = malloc(cfg->outer * sizeof(pid_t));
+       if (!pids)
+               return -1;
+
        ret = mount("", "/", NULL, MS_PRIVATE | MS_REC, 0);
        if (ret) {
                fprintf(stderr, "%m - Failed to make rootfs private mount\n");
-               return fret;
+               goto out;
        }
 
        umount2("/proc", MNT_DETACH);
@@ -172,27 +245,27 @@ static int pid_max_nested_outer(void *data)
        ret = mount("proc", "/proc", "proc", 0, NULL);
        if (ret) {
                fprintf(stderr, "%m - Failed to mount proc\n");
-               return fret;
+               goto out;
        }
 
        fd = open("/proc/sys/kernel/pid_max", O_RDWR | O_CLOEXEC | O_NOCTTY);
        if (fd < 0) {
                fprintf(stderr, "%m - Failed to open pid_max\n");
-               return fret;
+               goto out;
        }
 
-       ret = write(fd, "400", sizeof("400") - 1);
+       ret = write_int_to_fd(fd, cfg->outer);
        close(fd);
        if (ret < 0) {
                fprintf(stderr, "%m - Failed to write pid_max\n");
-               return fret;
+               goto out;
        }
 
        /*
-        * Create 397 processes. This leaves room for do_clone() (398) and
-        * one more 399. So creating another process needs to fail.
+        * Create (outer - 4) processes. This leaves room for do_clone()
+        * and one more process. So creating another process needs to fail.
         */
-       for (nr_procs = 0; nr_procs < 396; nr_procs++) {
+       for (nr_procs = 0; nr_procs < cfg->outer - 4; nr_procs++) {
                pid = fork();
                if (pid < 0)
                        goto reap;
@@ -203,7 +276,7 @@ static int pid_max_nested_outer(void *data)
                pids[nr_procs] = pid;
        }
 
-       pid = do_clone(pid_max_nested_inner, NULL, CLONE_NEWPID | CLONE_NEWNS);
+       pid = do_clone(pid_max_nested_inner, cfg, CLONE_NEWPID | CLONE_NEWNS);
        if (pid < 0) {
                fprintf(stderr, "%m - Failed to clone nested pidns\n");
                goto reap;
@@ -220,20 +293,27 @@ static int pid_max_nested_outer(void *data)
        for (int i = 0; i < nr_procs; i++)
                wait_for_pid(pids[i]);
 
+out:
+       free(pids);
        return fret;
 }
 
 static int pid_max_nested_limit_inner(void *data)
 {
-       int fret = -1, nr_procs = 400;
+       struct pid_max_cfg *cfg = data;
+       int fret = -1, nr_procs = 0;
        int fd, ret;
        pid_t pid;
-       pid_t pids[1000];
+       pid_t *pids;
+
+       pids = malloc(cfg->inner * sizeof(pid_t));
+       if (!pids)
+               return -1;
 
        ret = mount("", "/", NULL, MS_PRIVATE | MS_REC, 0);
        if (ret) {
                fprintf(stderr, "%m - Failed to make rootfs private mount\n");
-               return fret;
+               goto out;
        }
 
        umount2("/proc", MNT_DETACH);
@@ -241,23 +321,23 @@ static int pid_max_nested_limit_inner(void *data)
        ret = mount("proc", "/proc", "proc", 0, NULL);
        if (ret) {
                fprintf(stderr, "%m - Failed to mount proc\n");
-               return fret;
+               goto out;
        }
 
        fd = open("/proc/sys/kernel/pid_max", O_RDWR | O_CLOEXEC | O_NOCTTY);
        if (fd < 0) {
                fprintf(stderr, "%m - Failed to open pid_max\n");
-               return fret;
+               goto out;
        }
 
-       ret = write(fd, "500", sizeof("500") - 1);
+       ret = write_int_to_fd(fd, cfg->inner);
        close(fd);
        if (ret < 0) {
                fprintf(stderr, "%m - Failed to write pid_max\n");
-               return fret;
+               goto out;
        }
 
-       for (nr_procs = 0; nr_procs < 500; nr_procs++) {
+       for (nr_procs = 0; nr_procs < cfg->inner; nr_procs++) {
                pid = fork();
                if (pid < 0)
                        break;
@@ -268,7 +348,7 @@ static int pid_max_nested_limit_inner(void *data)
                pids[nr_procs] = pid;
        }
 
-       if (nr_procs >= 400) {
+       if (nr_procs >= cfg->outer) {
                fprintf(stderr, "Managed to create processes beyond the 
configured outer limit\n");
                goto reap;
        }
@@ -279,11 +359,14 @@ static int pid_max_nested_limit_inner(void *data)
        for (int i = 0; i < nr_procs; i++)
                wait_for_pid(pids[i]);
 
+out:
+       free(pids);
        return fret;
 }
 
 static int pid_max_nested_limit_outer(void *data)
 {
+       struct pid_max_cfg *cfg = data;
        int fd, ret;
        pid_t pid;
 
@@ -307,14 +390,14 @@ static int pid_max_nested_limit_outer(void *data)
                return -1;
        }
 
-       ret = write(fd, "400", sizeof("400") - 1);
+       ret = write_int_to_fd(fd, cfg->outer);
        close(fd);
        if (ret < 0) {
                fprintf(stderr, "%m - Failed to write pid_max\n");
                return -1;
        }
 
-       pid = do_clone(pid_max_nested_limit_inner, NULL, CLONE_NEWPID | 
CLONE_NEWNS);
+       pid = do_clone(pid_max_nested_limit_inner, cfg, CLONE_NEWPID | 
CLONE_NEWNS);
        if (pid < 0) {
                fprintf(stderr, "%m - Failed to clone nested pidns\n");
                return -1;
@@ -330,28 +413,30 @@ static int pid_max_nested_limit_outer(void *data)
 
 TEST(pid_max_simple)
 {
+       struct pid_max_cfg cfg = PID_MAX_CFG_INIT;
        pid_t pid;
 
-
-       pid = do_clone(pid_max_cb, NULL, CLONE_NEWPID | CLONE_NEWNS);
+       pid = do_clone(pid_max_cb, &cfg, CLONE_NEWPID | CLONE_NEWNS);
        ASSERT_GT(pid, 0);
        ASSERT_EQ(0, wait_for_pid(pid));
 }
 
 TEST(pid_max_nested_limit)
 {
+       struct pid_max_cfg cfg = PID_MAX_CFG_INIT;
        pid_t pid;
 
-       pid = do_clone(pid_max_nested_limit_outer, NULL, CLONE_NEWPID | 
CLONE_NEWNS);
+       pid = do_clone(pid_max_nested_limit_outer, &cfg, CLONE_NEWPID | 
CLONE_NEWNS);
        ASSERT_GT(pid, 0);
        ASSERT_EQ(0, wait_for_pid(pid));
 }
 
 TEST(pid_max_nested)
 {
+       struct pid_max_cfg cfg = PID_MAX_CFG_INIT;
        pid_t pid;
 
-       pid = do_clone(pid_max_nested_outer, NULL, CLONE_NEWPID | CLONE_NEWNS);
+       pid = do_clone(pid_max_nested_outer, &cfg, CLONE_NEWPID | CLONE_NEWNS);
        ASSERT_GT(pid, 0);
        ASSERT_EQ(0, wait_for_pid(pid));
 }
-- 
2.50.1




Amazon Web Services Development Center Germany GmbH
Tamara-Danz-Str. 13
10243 Berlin
Geschaeftsfuehrung: Christof Hellmis, Andreas Stieger
Eingetragen am Amtsgericht Charlottenburg unter HRB 257764 B
Sitz: Berlin
Ust-ID: DE 365 538 597


Reply via email to