The test verifies the capability of migrating contents of process'
lruvecs from one NUMA node to one or multiple destination nodes.

Program arguments are controlled by the test harness itself. Therefore,
support for an environment variable has been added to enforce migration
to a single destination node: KSFT_CG_NUMA_MIGRATE_DST_SINGLE. The
default behavior is to migrate pages to all suitable nodes that have
enough memory (apart from the source node itself executing the test upon
the initial initialization).

The current error tolerance is set to 15%.

The inactive_anon test is executed with MGLRU (lru_gen) disabled because
no reliable way has been found to make MGLRU report anon pages as
inactive (https://virtuozzo.atlassian.net/browse/VSTOR-120426).
File-backed pages can be turned to inactive with MADV_COLD. Such a call
has no (visible) effect for anon pages when MGLRU is used.

https://virtuozzo.atlassian.net/browse/VSTOR-114785

Signed-off-by: Dmitry Sepp <[email protected]>
---
 tools/testing/selftests/cgroup/Makefile       |   2 +
 tools/testing/selftests/cgroup/config         |   3 +
 .../selftests/cgroup/test_numa_migrate.c      | 765 ++++++++++++++++++
 3 files changed, 770 insertions(+)
 create mode 100644 tools/testing/selftests/cgroup/test_numa_migrate.c

diff --git a/tools/testing/selftests/cgroup/Makefile 
b/tools/testing/selftests/cgroup/Makefile
index ff290321d7da..c534bd88a3d1 100644
--- a/tools/testing/selftests/cgroup/Makefile
+++ b/tools/testing/selftests/cgroup/Makefile
@@ -18,6 +18,7 @@ TEST_GEN_PROGS += test_memcontrol
 TEST_GEN_PROGS += test_pids
 TEST_GEN_PROGS += test_zswap
 TEST_GEN_PROGS += test_cache
+TEST_GEN_PROGS += test_numa_migrate
 
 LOCAL_HDRS += $(selfdir)/clone3/clone3_selftests.h $(selfdir)/pidfd/pidfd.h
 
@@ -34,3 +35,4 @@ $(OUTPUT)/test_memcontrol: cgroup_util.c
 $(OUTPUT)/test_pids: cgroup_util.c
 $(OUTPUT)/test_zswap: cgroup_util.c
 $(OUTPUT)/test_cache: cgroup_util.c
+$(OUTPUT)/test_numa_migrate: LDLIBS += -lnuma cgroup_util.c
diff --git a/tools/testing/selftests/cgroup/config 
b/tools/testing/selftests/cgroup/config
index 641ed9bc26b4..e006098a645a 100644
--- a/tools/testing/selftests/cgroup/config
+++ b/tools/testing/selftests/cgroup/config
@@ -5,3 +5,6 @@ CONFIG_CGROUP_SCHED=y
 CONFIG_MEMCG=y
 CONFIG_PAGE_COUNTER=y
 CONFIG_CACHESTAT_SYSCALL=y
+CONFIG_NUMA=y
+CONFIG_MEMFD_CREATE=y
+CONFIG_TRANSPARENT_HUGEPAGE=y
diff --git a/tools/testing/selftests/cgroup/test_numa_migrate.c 
b/tools/testing/selftests/cgroup/test_numa_migrate.c
new file mode 100644
index 000000000000..29599c16306c
--- /dev/null
+++ b/tools/testing/selftests/cgroup/test_numa_migrate.c
@@ -0,0 +1,765 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2025 Virtuozzo International GmbH. All rights reserved.
+ *
+ * NUMA migration of lruvecs test set
+ *
+ * This test set verifies the capability of migrating contents of process'
+ * lruvecs from one NUMA node to one or multiple destination nodes using
+ * memory.numa_migrate cgroup v2 interface.
+ *
+ * Test variants:
+ *  - disk_file:          Migrates active file pages from a regular disk file
+ *  - anon:               Migrates active anonymous pages
+ *  - anon_thp:           Migrates anonymous transparent huge pages (THP)
+ *  - tmpfs_file:         Migrates shared memory (tmpfs) file pages
+ *  - tmpfs_thp:          Migrates shared memory (tmpfs) transparent huge pages
+ *  - inactive_anon:      Migrates inactive anonymous pages (disables lru_gen)
+ *  - inactive_disk_file: Migrates inactive file pages
+ *
+ * Each test creates a memory mapping, populates it with pages on the source
+ * NUMA node, triggers migration via memory.numa_migrate, and verifies that
+ * pages are correctly distributed across destination nodes.
+ */
+
+#define _GNU_SOURCE
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdint.h>
+#include <unistd.h>
+#include <stdbool.h>
+#include <fcntl.h>
+#include <numa.h>
+#include <signal.h>
+#include <sys/mman.h>
+#include <sys/wait.h>
+#include <sys/prctl.h>
+#include <linux/prctl.h>
+#include <linux/limits.h>
+
+#include "../kselftest_harness.h"
+#include "cgroup_util.h"
+
+static char root[PATH_MAX];
+static const size_t file_size = MB(1024);
+static int node_count;
+
+static int src_node;
+static struct bitmask *src_mask;
+
+static char dst_str[PAGE_SIZE];
+static int dst_count;
+static struct bitmask *dst_mask;
+
+enum memory_type {
+       MEMORY_TYPE_ACTIVE_FILE,
+       MEMORY_TYPE_INACTIVE_FILE,
+       MEMORY_TYPE_ACTIVE_ANON,
+       MEMORY_TYPE_INACTIVE_ANON,
+       MEMORY_TYPE_ANON_THP,
+       MEMORY_TYPE_TMPFS_FILE,
+       MEMORY_TYPE_TMPFS_FILE_THP,
+};
+
+struct test_context {
+       struct _test_data_numa_migrate *self;
+       const struct _fixture_variant_numa_migrate *variant;
+       struct __test_metadata *metadata;
+};
+
+FIXTURE(numa_migrate) {
+       int fd;
+       char *cg_test;
+       int pipefd[2];
+       struct test_context ctx;
+};
+
+FIXTURE_VARIANT(numa_migrate) {
+       enum memory_type mem_type;
+       const char *stat;
+       int mmap_flags;
+       int madvise;
+};
+
+FIXTURE_VARIANT_ADD(numa_migrate, disk_file) {
+       .mem_type = MEMORY_TYPE_ACTIVE_FILE,
+       .stat = "active_file",
+       .mmap_flags = MAP_SHARED,
+       .madvise = MADV_NOHUGEPAGE,
+};
+
+FIXTURE_VARIANT_ADD(numa_migrate, anon) {
+       .mem_type = MEMORY_TYPE_ACTIVE_ANON,
+       .stat = "active_anon",
+       .mmap_flags = MAP_PRIVATE | MAP_ANONYMOUS,
+       .madvise = MADV_NOHUGEPAGE,
+};
+
+FIXTURE_VARIANT_ADD(numa_migrate, anon_thp) {
+       .mem_type = MEMORY_TYPE_ANON_THP,
+       .stat = "anon_thp",
+       .mmap_flags = MAP_PRIVATE | MAP_ANONYMOUS,
+       .madvise = MADV_HUGEPAGE,
+};
+
+FIXTURE_VARIANT_ADD(numa_migrate, tmpfs_file) {
+       .mem_type = MEMORY_TYPE_TMPFS_FILE,
+       .stat = "shmem",
+       .mmap_flags = MAP_SHARED,
+       .madvise = MADV_NOHUGEPAGE,
+};
+
+FIXTURE_VARIANT_ADD(numa_migrate, tmpfs_thp) {
+       .mem_type = MEMORY_TYPE_TMPFS_FILE_THP,
+       .stat = "shmem_thp",
+       .mmap_flags = MAP_SHARED,
+       .madvise = MADV_HUGEPAGE,
+};
+
+FIXTURE_VARIANT_ADD(numa_migrate, inactive_anon) {
+       .mem_type = MEMORY_TYPE_INACTIVE_ANON,
+       .stat = "inactive_anon",
+       .mmap_flags = MAP_PRIVATE | MAP_ANONYMOUS,
+       .madvise = MADV_NOHUGEPAGE,
+};
+
+FIXTURE_VARIANT_ADD(numa_migrate, inactive_disk_file) {
+       .mem_type = MEMORY_TYPE_INACTIVE_FILE,
+       .stat = "inactive_file",
+       .mmap_flags = MAP_SHARED,
+       .madvise = MADV_NOHUGEPAGE,
+};
+
+static void dump_memory_numa_stat(size_t *node_stat)
+{
+       int n;
+
+       ksft_print_msg("%s", "");
+       for (n = 0; n < node_count; n++)
+               printf("node%i=%zu ", n, node_stat[n]);
+       printf("\n");
+}
+
+static int vm_drop_caches(void)
+{
+       int fd;
+       int ret = EXIT_SUCCESS;
+
+       fd = open("/proc/sys/vm/drop_caches", O_WRONLY);
+       if (fd < 0) {
+               ksft_perror("failed to open drop_caches");
+               return EXIT_FAILURE;
+       }
+
+       if (write(fd, "3", 1) != 1) {
+               ksft_perror("failed to write to drop_caches");
+               ret = EXIT_FAILURE;
+       }
+
+       if (close(fd)) {
+               ksft_perror("failed to close drop_caches");
+               ret = EXIT_FAILURE;
+       }
+
+       return ret;
+}
+
+static int setup_disk_file(struct test_context *ctx)
+{
+       int fd;
+       struct _test_data_numa_migrate *self = ctx->self;
+
+       fd = get_temp_fd();
+       if (fd < 0) {
+               ksft_perror("failed to get temp fd");
+               return EXIT_FAILURE;
+       }
+
+       if (ftruncate(fd, file_size) < 0) {
+               ksft_perror("failed to truncate temp fd");
+               close(fd);
+               return EXIT_FAILURE;
+       }
+
+       self->fd = fd;
+
+       return EXIT_SUCCESS;
+}
+
+static int setup_tmpfs_file(struct test_context *ctx)
+{
+       int fd;
+       struct _test_data_numa_migrate *self = ctx->self;
+
+       fd = memfd_create("kselftest", 0);
+       if (fd < 0) {
+               ksft_perror("failed to create memfd");
+               return EXIT_FAILURE;
+       }
+
+       if (ftruncate(fd, file_size) < 0) {
+               ksft_perror("failed to truncate memfd");
+               close(fd);
+               return EXIT_FAILURE;
+       }
+
+       self->fd = fd;
+
+       return EXIT_SUCCESS;
+}
+
+static int mm_lru_gen_control(bool do_disable)
+{
+       static char lru_gen_control_str[PAGE_SIZE];
+
+       int fd;
+       size_t len;
+       ssize_t count;
+       int ret = EXIT_SUCCESS;
+
+       fd = open("/sys/kernel/mm/lru_gen/enabled", O_RDWR);
+       if (fd < 0) {
+               ksft_perror("failed to open lru_gen control");
+               return EXIT_FAILURE;
+       }
+
+       if (do_disable) {
+               if (strlen(lru_gen_control_str) != 0) {
+                       ksft_print_msg("lru_gen control has already been 
set\n");
+                       goto out;
+
+               }
+               count = read(fd, lru_gen_control_str, PAGE_SIZE - 1);
+               if (count < 0) {
+                       ksft_perror("failed to read lru_gen control");
+                       ret = EXIT_FAILURE;
+                       goto out;
+               };
+               lru_gen_control_str[count] = '\0';
+               if (write(fd, "0", 1) != 1) {
+                       ksft_print_msg("failed to write lru_gen control");
+                       ret = EXIT_FAILURE;
+                       goto out;
+               }
+       } else {
+               len = strlen(lru_gen_control_str);
+               if (len == 0) {
+                       ksft_print_msg("lru_gen control hasn't been set\n");
+                       goto out;
+
+               }
+               if (write(fd, lru_gen_control_str, len) != len) {
+                       ksft_print_msg("failed to write lru_gen control");
+                       ret = EXIT_FAILURE;
+                       goto out;
+               }
+               lru_gen_control_str[0] = '\0';
+       }
+out:
+       if (close(fd)) {
+               ksft_perror("failed to close lru_gen control");
+               ret = EXIT_FAILURE;
+       }
+
+       return ret;
+}
+
+static int thp_shmem_control(bool do_set)
+{
+       static char thp_shmem_control_str[PAGE_SIZE];
+
+       int fd;
+       size_t len;
+       ssize_t count;
+       char *control;
+       char control_buf[PAGE_SIZE];
+       int ret = EXIT_SUCCESS;
+       static const char *delim = "[]";
+       static const char *advise = "advise";
+
+       fd = open("/sys/kernel/mm/transparent_hugepage/shmem_enabled", O_RDWR);
+       if (fd < 0) {
+               ksft_perror("failed to open thp control");
+               return EXIT_FAILURE;
+       }
+
+       if (do_set) {
+               if (strlen(thp_shmem_control_str) != 0) {
+                       ksft_print_msg("thp control has already been set\n");
+                       goto out;
+
+               }
+               count = read(fd, control_buf, PAGE_SIZE - 1);
+               if (count < 0) {
+                       ksft_perror("failed to read thp control");
+                       ret = EXIT_FAILURE;
+                       goto out;
+               };
+               control_buf[count] = '\0';
+
+               control = strtok(control_buf, delim);
+               if (control_buf[0] != '[')
+                       control = strtok(NULL, delim);
+
+               len = strlen(advise);
+               if (write(fd, advise, len) != len) {
+                       ksft_print_msg("failed to write thp control");
+                       ret = EXIT_FAILURE;
+                       goto out;
+               }
+               strncpy(thp_shmem_control_str, control, PAGE_SIZE - 1);
+       } else {
+               len = strlen(thp_shmem_control_str);
+               if (len == 0) {
+                       ksft_print_msg("thp control hasn't been set\n");
+                       goto out;
+
+               }
+               if (write(fd, thp_shmem_control_str, len) != len) {
+                       ksft_print_msg("failed to write thp control");
+                       ret = EXIT_FAILURE;
+                       goto out;
+               }
+               thp_shmem_control_str[0] = '\0';
+       }
+out:
+       if (close(fd)) {
+               ksft_perror("failed to close thp control");
+               ret = EXIT_FAILURE;
+       }
+
+       return ret;
+}
+
+FIXTURE_TEARDOWN(numa_migrate)
+{
+       int ret = EXIT_SUCCESS;
+
+       if (cg_destroy(self->cg_test)) {
+               ksft_print_msg("failed to destroy %s\n", self->cg_test);
+               ret = EXIT_FAILURE;
+       }
+       free(self->cg_test);
+
+       if (self->fd != -1) {
+               if (close(self->fd)) {
+                       ksft_perror("failed to close test fd");
+                       ret = EXIT_FAILURE;
+               }
+       }
+
+       switch (variant->mem_type) {
+       case MEMORY_TYPE_TMPFS_FILE_THP:
+               if (thp_shmem_control(false)) {
+                       ksft_print_msg("failed to restore thp shmem\n");
+                       ret = EXIT_FAILURE;
+               }
+               break;
+       case MEMORY_TYPE_INACTIVE_ANON:
+               if (mm_lru_gen_control(false)) {
+                       ksft_print_msg("failed to restore mglru\n");
+                       ret = EXIT_FAILURE;
+               }
+               break;
+       default:
+               break;
+       }
+
+       if (ret != EXIT_SUCCESS) {
+               ksft_print_msg("teardown error, system might be in an 
inconsistent state\n");
+               exit(KSFT_FAIL);
+       }
+};
+#define TEARDOWN_LOG(fmt, ...) do { \
+       TH_LOG(fmt, ##__VA_ARGS__); \
+       numa_migrate_teardown(_metadata, self, variant); \
+} while (0)
+
+FIXTURE_SETUP(numa_migrate)
+{
+       ASSERT_EQ(0, pipe(self->pipefd));
+
+       /* Store the context to make it accessible outside of the harness */
+       self->ctx.self = self;
+       self->ctx.variant = variant;
+       self->ctx.metadata = _metadata;
+
+       self->cg_test = cg_name(root, "kselftest");
+       ASSERT_EQ(0, cg_create(self->cg_test))
+               TEARDOWN_LOG("failed to create a test cgroup");
+
+       self->fd = -1;
+       switch (variant->mem_type) {
+       case MEMORY_TYPE_ACTIVE_FILE:
+       case MEMORY_TYPE_INACTIVE_FILE:
+               ASSERT_EQ(EXIT_SUCCESS, setup_disk_file(&self->ctx))
+                       TEARDOWN_LOG("failed to setup a disk file\n");
+               break;
+       case MEMORY_TYPE_ACTIVE_ANON:
+       case MEMORY_TYPE_ANON_THP:
+               break;
+       case MEMORY_TYPE_INACTIVE_ANON:
+               ASSERT_EQ(EXIT_SUCCESS, mm_lru_gen_control(true))
+                       TEARDOWN_LOG("failed to disable mglru\n");
+               break;
+       case MEMORY_TYPE_TMPFS_FILE_THP:
+               ASSERT_EQ(EXIT_SUCCESS, thp_shmem_control(true))
+                       TEARDOWN_LOG("failed to enable thp shmem\n");
+               /* fallthrough; */
+       case MEMORY_TYPE_TMPFS_FILE:
+               ASSERT_EQ(EXIT_SUCCESS, setup_tmpfs_file(&self->ctx))
+                       TEARDOWN_LOG("failed to setup a tmpfs file\n");
+               break;
+       default:
+               SKIP(return, "unknown page backing\n");
+               break;
+       }
+
+       ASSERT_EQ(EXIT_SUCCESS, vm_drop_caches())
+               TEARDOWN_LOG("failed to drop vm caches\n");
+};
+
+#ifndef __maybe_unused
+#define __maybe_unused __attribute__((__unused__))
+#endif
+
+#define EXPAND_CTX(ctx) \
+       struct _test_data_numa_migrate __maybe_unused *self = ctx->self; \
+       const struct _fixture_variant_numa_migrate __maybe_unused *variant = 
ctx->variant; \
+       struct __test_metadata __maybe_unused *_metadata = ctx->metadata
+
+static int parse_memory_numa_stat(struct test_context *ctx, const char *cgroup,
+                                 const char *key, size_t *node_stat)
+{
+       char *stat_str;
+       char *token;
+       size_t node;
+       size_t node_bytes;
+       int step;
+       char buf[PAGE_SIZE];
+       static const char *stat_delim = "\n";
+       static const char *node_delim = " =";
+       int parsed_nodes = 0;
+       bool key_found = false;
+
+       EXPAND_CTX(ctx);
+
+       ASSERT_EQ(0, cg_read(cgroup, "memory.numa_stat", buf, sizeof(buf)))
+               ksft_print_msg("failed to read memory.numa_stat\n");
+
+       stat_str = strtok(buf, stat_delim);
+       while (stat_str != NULL) {
+               if (!strncmp(stat_str, key, strlen(key))) {
+                       key_found = true;
+                       break;
+               }
+               stat_str = strtok(NULL, stat_delim);
+       }
+       ASSERT_TRUE(key_found)
+               ksft_print_msg("failed to find key %s\n", key);
+
+       token = strtok(stat_str, node_delim);
+       /* skip the key itself */
+       token = strtok(NULL, node_delim);
+       step = 1;
+       while (token != NULL) {
+               if (step % 2) {
+                       node = strtoul(token + 1, NULL, 10);
+                       ASSERT_GT(node_count, node)
+                                 ksft_print_msg("bogus node id: %zu\n", node);
+               } else {
+                       node_bytes = strtoull(token, NULL, 10);
+                       node_stat[node] = node_bytes;
+                       parsed_nodes++;
+               }
+               step++;
+               token = strtok(NULL, node_delim);
+       }
+
+       return parsed_nodes;
+}
+
+static void verify_stats(struct test_context *ctx, const char *cgroup,
+                        const char *stat)
+{
+       int parsed_nodes;
+       int n;
+       size_t per_node;
+       size_t node_stat[NUMA_NUM_NODES];
+       int ret = EXIT_SUCCESS;
+       int retries_left = 100; /* 10s */
+
+       EXPAND_CTX(ctx);
+retry:
+       retries_left--;
+       memset(node_stat, 0, node_count * sizeof(size_t));
+
+       parsed_nodes = parse_memory_numa_stat(ctx, cgroup, stat, node_stat);
+       ASSERT_EQ(node_count, parsed_nodes)
+               ksft_print_msg("failed to parse numa stat\n");
+
+       /* values_close() does not work well with 0 */
+       if (node_stat[src_node] > file_size / 100 * 15) {
+               if (retries_left > 0) {
+                       usleep(100000); /* 100ms */
+                       goto retry;
+               }
+               ksft_print_msg("too much memory left on node%i\n",
+                               src_node);
+               ret = EXIT_FAILURE;
+               goto dump_stat;
+       }
+
+       per_node = file_size / dst_count;
+       for (n = 0; n < numa_max_possible_node(); n++) {
+               if (numa_bitmask_isbitset(dst_mask, n) &&
+               !values_close(node_stat[n], per_node, 15)) {
+                       if (retries_left > 0) {
+                               usleep(100000); /* 100ms */
+                               goto retry;
+                       }
+                       ksft_print_msg("not enough memory moved to node%i\n", 
n);
+                       ret = EXIT_FAILURE;
+               }
+       }
+
+dump_stat:
+       if (ret != EXIT_SUCCESS) {
+               ksft_print_msg("size=%li\n", file_size);
+               dump_memory_numa_stat(node_stat);
+       }
+
+       ASSERT_EQ(EXIT_SUCCESS, ret);
+}
+
+static int touch_pages(const char *cgroup, void *arg)
+{
+       void *mmap_ptr;
+       volatile uint8_t *ptr;
+       enum memory_type mem_type;
+       char tmp = 's';
+       struct test_context *ctx = arg;
+       int ret = EXIT_SUCCESS;
+
+       EXPAND_CTX(ctx);
+       mem_type = variant->mem_type;
+
+       numa_bind(src_mask);
+
+       if (close(self->pipefd[0])) {
+               ksft_perror("failed to close pipe read end\n");
+               return EXIT_FAILURE;
+       }
+
+       if (prctl(PR_SET_PDEATHSIG, SIGTERM) < 0) {
+               ksft_perror("failed to set parent death signal\n");
+               return EXIT_FAILURE;
+       }
+
+       mmap_ptr = mmap(NULL, file_size, PROT_READ | PROT_WRITE,
+                       variant->mmap_flags, self->fd, 0);
+       if (mmap_ptr == MAP_FAILED) {
+               ksft_perror("failed to map test file\n");
+               return EXIT_FAILURE;
+       }
+
+       if (variant->madvise) {
+               if (madvise(mmap_ptr, file_size, variant->madvise) < 0) {
+                       ksft_perror("failed to madvise\n");
+                       ret = EXIT_FAILURE;
+                       goto err_unmap;
+               }
+       }
+
+       if (madvise(mmap_ptr, file_size, MADV_POPULATE_WRITE) < 0) {
+               ksft_perror("failed to populate test mapping\n");
+               ret = EXIT_FAILURE;
+               goto err_unmap;
+       }
+
+       if (write(self->pipefd[1], &tmp, 1) != 1) {
+               ksft_print_msg("failed to write to pipe\n");
+               close(self->pipefd[1]);
+               ret = EXIT_FAILURE;
+               goto err_unmap;
+       }
+
+       if (close(self->pipefd[1])) {
+               ksft_perror("failed to close pipe write end\n");
+               ret = EXIT_FAILURE;
+               goto err_unmap;
+       }
+
+       if (mem_type == MEMORY_TYPE_INACTIVE_FILE ||
+           mem_type == MEMORY_TYPE_INACTIVE_ANON) {
+               if (madvise(mmap_ptr, file_size, MADV_COLD) < 0) {
+                       ksft_perror("failed to mark test mapping cold\n");
+                       ret = EXIT_FAILURE;
+                       goto err_unmap;
+               }
+               pause();
+       }
+
+       srand(getpid());
+       while (1) {
+               ptr = mmap_ptr + rand() % file_size;
+               if (rand() % 2)
+                       *ptr = rand() % UINT8_MAX;
+               else
+                       *ptr;
+       }
+
+err_unmap:
+       if (munmap(mmap_ptr, file_size) < 0) {
+               ksft_perror("failed to unmap test file\n");
+               ret = EXIT_FAILURE;
+       }
+
+       return ret;
+}
+
+static int wait_src_node_stat(struct test_context *ctx, const char *cgroup,
+                             const char *stat, size_t value)
+{
+       int parsed_nodes;
+       size_t node_stat[NUMA_NUM_NODES];
+       int retries_left = 30; /* 3s */
+
+       EXPAND_CTX(ctx);
+retry:
+       retries_left--;
+       memset(node_stat, 0, node_count * sizeof(size_t));
+
+       parsed_nodes = parse_memory_numa_stat(ctx, cgroup, stat, node_stat);
+       ASSERT_EQ(node_count, parsed_nodes);
+       if (values_close(node_stat[src_node], value, 15))
+               return EXIT_SUCCESS;
+       if (retries_left > 0) {
+               usleep(100000); /* 100ms */
+               goto retry;
+       }
+
+       return EXIT_FAILURE;
+}
+
+TEST_F(numa_migrate, migrate)
+{
+       int child_pid;
+       char tmp;
+       char buf[PAGE_SIZE];
+
+       child_pid = cg_run_nowait(self->cg_test, touch_pages, &self->ctx);
+       ASSERT_NE(-1, child_pid);
+       ASSERT_EQ(0, close(self->pipefd[1]));
+
+       /* Wait for the child to enter cgroup */
+       ASSERT_EQ(0, cg_wait_for_proc_count(self->cg_test, 1));
+
+       /* Wait for the child to populate the page cache */
+       ASSERT_EQ(1, read(self->pipefd[0], &tmp, 1));
+       ASSERT_EQ(0, close(self->pipefd[0]));
+
+       ASSERT_EQ(EXIT_SUCCESS, wait_src_node_stat(&self->ctx, self->cg_test,
+                                                  variant->stat, file_size));
+
+       cg_write(self->cg_test, "memory.numa_migrate", dst_str);
+
+       verify_stats(&self->ctx, self->cg_test, variant->stat);
+
+       ASSERT_EQ(0, cg_killall(self->cg_test));
+
+       /* Wait for cgroup to be empty */
+       while (1) {
+               ASSERT_EQ(0, cg_read(self->cg_test, "cgroup.procs", buf,
+                                    sizeof(buf)));
+               if (buf[0] == '\0')
+                       break;
+               usleep(1000);
+       }
+
+       ASSERT_NE(-1, waitpid(child_pid, NULL, 0));
+}
+
+static void configure_numa(void)
+{
+       int cnt;
+       int n;
+       char *ptr;
+       long freep;
+       bool to_single = false;
+
+       node_count = numa_max_node() + 1;
+
+       /* Cannot use args as they are owned by the harness */
+       if (getenv("KSFT_CG_NUMA_MIGRATE_DST_SINGLE"))
+               to_single = true;
+
+       src_mask = numa_allocate_nodemask();
+       if (src_mask == NULL)
+               ksft_exit_fail_msg("failed to allocate src nodemask\n");
+
+       dst_mask = numa_allocate_nodemask();
+       if (dst_mask == NULL)
+               ksft_exit_fail_msg("failed to allocate dst nodemask\n");
+
+       for (n = 0; n < numa_max_possible_node(); n++) {
+               if (numa_bitmask_isbitset(numa_all_nodes_ptr, n)) {
+                       src_node = n;
+                       numa_bitmask_setbit(src_mask, n);
+                       break;
+               }
+       }
+
+       ptr = dst_str;
+       for (++n; n < numa_max_possible_node(); n++) {
+               if (numa_bitmask_isbitset(numa_all_nodes_ptr, n)) {
+                       numa_node_size(n, &freep);
+                       /*
+                        * Assume we want the node to be able to accomodate the
+                        * whole test allocation to avoid recalculating
+                        * proportions after each newly considered node.
+                        */
+                       if (freep < file_size)
+                               continue;
+                       dst_count++;
+                       numa_bitmask_setbit(dst_mask, n);
+                       cnt = sprintf(ptr, "%i,", n);
+                       ptr += cnt;
+                       if (to_single)
+                               break;
+               }
+       }
+       if (dst_count == 0)
+               ksft_exit_skip("no suitable nodes to migrate to\n");
+       *(--ptr) = '\0';
+}
+
+int main(int argc, char *argv[])
+{
+       int ret;
+
+       if (geteuid())
+               ksft_exit_skip("needs root to run\n");
+
+       if (cg_find_unified_root(root, sizeof(root), NULL))
+               ksft_exit_skip("cgroup v2 isn't mounted\n");
+
+       if (numa_available() < 0)
+               ksft_exit_skip("numa is not available\n");
+
+       if (numa_num_configured_nodes() < 2)
+               ksft_exit_skip("at least 2 numa nodes are required\n");
+
+       if (vm_drop_caches())
+               ksft_exit_fail();
+
+       configure_numa();
+
+       ret = test_harness_run(argc, argv);
+
+       /* Never reached in fact, the harness calls exit(). */
+       numa_bitmask_free(src_mask);
+       numa_bitmask_free(dst_mask);
+
+       ksft_exit(ret);
+}
-- 
2.47.1

_______________________________________________
Devel mailing list
[email protected]
https://lists.openvz.org/mailman/listinfo/devel

Reply via email to