Please find replies inline.
On Mittwoch, 3. Dezember 2025 22:24:41 Mitteleuropäische Normalzeit Konstantin
Khorenko wrote:
> On 12/1/25 11:42, Dmitry Sepp wrote:
> > The test verifies the capability of migrating contents of process'
> > lruvecs from one NUMA node to one or multiple destination nodes.
> >
> > Program arguments are controlled by the test harness itself. Therefore,
> > an environment variable has been added to enforce migration to a single
> > destination node: KSFT_NUMA_MIGRATE_DST_SINGLE. The default behavior is
> > to migrate pages to all suitable nodes that have enough memory (apart
> > from the source node itself executing the test upon the initial
> > initialization).
> >
> > The inactive lists are populated by setting memory.max to 50% of the
> > test file size.
> >
> > The test further verifies data integrity by performing reads and writes
> > at random offsets of the mapping and calculating crc32 of the whole
> > mapping on each iteration (by the means of zlib).
> >
> > The tests verifying migration of inactive anon pages are currently
> > marked as expected to fail as the migration of the mentioned pages does
> > not work correctly
> > (https://virtuozzo.atlassian.net/browse/VSTOR-119909).
> >
> > The current error tolerance is set to 15%.
> >
> > https://virtuozzo.atlassian.net/browse/VSTOR-114785
> >
> > Signed-off-by: Dmitry Sepp <[email protected]>
> > ---
> >
> > tools/testing/selftests/cgroup/Makefile | 2 +
> > tools/testing/selftests/cgroup/config | 3 +
> > .../selftests/cgroup/test_numa_migrate.c | 630 ++++++++++++++++++
> > 3 files changed, 635 insertions(+)
> > create mode 100644 tools/testing/selftests/cgroup/test_numa_migrate.c
> >
> > diff --git a/tools/testing/selftests/cgroup/Makefile
> > b/tools/testing/selftests/cgroup/Makefile index
> > ff290321d7da..fe253c8173aa 100644
> > --- a/tools/testing/selftests/cgroup/Makefile
> > +++ b/tools/testing/selftests/cgroup/Makefile
> > @@ -18,6 +18,7 @@ TEST_GEN_PROGS += test_memcontrol
> >
> > TEST_GEN_PROGS += test_pids
> > TEST_GEN_PROGS += test_zswap
> > TEST_GEN_PROGS += test_cache
> >
> > +TEST_GEN_PROGS += test_numa_migrate
> >
> > LOCAL_HDRS += $(selfdir)/clone3/clone3_selftests.h
> > $(selfdir)/pidfd/pidfd.h
> >
> > @@ -34,3 +35,4 @@ $(OUTPUT)/test_memcontrol: cgroup_util.c
> >
> > $(OUTPUT)/test_pids: cgroup_util.c
> > $(OUTPUT)/test_zswap: cgroup_util.c
> > $(OUTPUT)/test_cache: cgroup_util.c
> >
> > +$(OUTPUT)/test_numa_migrate: LDLIBS += -lnuma -lz cgroup_util.c
> > diff --git a/tools/testing/selftests/cgroup/config
> > b/tools/testing/selftests/cgroup/config index 641ed9bc26b4..e006098a645a
> > 100644
> > --- a/tools/testing/selftests/cgroup/config
> > +++ b/tools/testing/selftests/cgroup/config
> > @@ -5,3 +5,6 @@ CONFIG_CGROUP_SCHED=y
> >
> > CONFIG_MEMCG=y
> > CONFIG_PAGE_COUNTER=y
> > CONFIG_CACHESTAT_SYSCALL=y
> >
> > +CONFIG_NUMA=y
> > +CONFIG_MEMFD_CREATE=y
> > +CONFIG_TRANSPARENT_HUGEPAGE=y
> > diff --git a/tools/testing/selftests/cgroup/test_numa_migrate.c
> > b/tools/testing/selftests/cgroup/test_numa_migrate.c new file mode 100644
> > index 000000000000..8f5e9e62ddc4
> > --- /dev/null
> > +++ b/tools/testing/selftests/cgroup/test_numa_migrate.c
> > @@ -0,0 +1,630 @@
> > +// SPDX-License-Identifier: GPL-2.0
> > +/*
> > + * Copyright (C) 2025 Virtuozzo International GmbH. All rights reserved.
> > + */
> > +
> > +#define _GNU_SOURCE
> > +#include <stdio.h>
> > +#include <stdlib.h>
> > +#include <stdint.h>
> > +#include <unistd.h>
> > +#include <stdbool.h>
> > +#include <fcntl.h>
> > +#include <numa.h>
> > +#include <signal.h>
> > +#include <zlib.h>
> > +#include <sys/mman.h>
> > +#include <sys/wait.h>
> > +#include <sys/prctl.h>
> > +#include <linux/prctl.h>
> > +#include <linux/limits.h>
> > +
> > +#include "../kselftest_harness.h"
> > +#include "cgroup_util.h"
> > +
> > +static char root[PATH_MAX];
> > +static const size_t file_size = MB(1024);
> > +static int src_node;
> > +static struct bitmask *src_mask;
> > +static char dst_str[PAGE_SIZE];
> > +static int dst_count;
> > +static struct bitmask *dst_mask;
> > +
> > +enum memory_type {
> > + MEMORY_TYPE_ACTIVE_FILE,
> > + MEMORY_TYPE_INACTIVE_FILE,
> > + MEMORY_TYPE_ACTIVE_ANON,
> > + MEMORY_TYPE_INACTIVE_ANON,
> > + MEMORY_TYPE_ANON_THP,
> > + MEMORY_TYPE_TMPFS_FILE,
> > + MEMORY_TYPE_TMPFS_FILE_THP,
> > +};
> > +
> > +struct test_context {
> > + struct _test_data_numa_migrate *self;
> > + const struct _fixture_variant_numa_migrate *variant;
> > + struct __test_metadata *metadata;
> > +};
> > +
> > +static void dump_memory_numa_stat(size_t *node_stat)
> > +{
> > + int n;
> > + int node_count = numa_max_node() + 1;
> > +
> > + ksft_print_msg("%s", "");
> > + for (n = 0; n < node_count; n++)
> > + printf("node%i=%zu ", n, node_stat[n]);
> > + printf("\n");
> > +}
> > +
> > +static int parse_memory_numa_stat(struct test_context *ctx, const char
> > *cgroup, + const char *key,
size_t **node_stat)
> > +{
> > + char *stat_str;
> > + char *token;
> > + size_t node;
> > + size_t node_bytes;
> > + int node_count;
> > + int step;
> > + char buf[PAGE_SIZE];
> > + static const char *stat_delim = "\n";
> > + static const char *node_delim = " =";
> > + int parsed_nodes = 0;
> > + bool key_found = false;
> > + struct __test_metadata *_metadata = ctx->metadata;
> > +
> > + EXPECT_NE(-1, cg_read(cgroup, "memory.numa_stat", buf,
sizeof(buf)));
> > +
> > + stat_str = strtok(buf, stat_delim);
>
> 2 spaces
>
> > + while (stat_str != NULL) {
> > + if (!strncmp(stat_str, key, strlen(key))) {
> > + key_found = true;
> > + break;
> > + }
> > + stat_str = strtok(NULL, stat_delim);
> > + }
> > + EXPECT_TRUE(key_found);
> > +
> > + node_count = numa_max_node() + 1;
> > + *node_stat = calloc(node_count, sizeof(size_t));
> > + EXPECT_NE(NULL, *node_stat);
>
> ASSERT_NE ?
> Otherwise the test will be marked as Failed, but the execution will continue
> and the test will segffault.
True, but segfaults are handled gracefully by the harness, so this per se is
not a big issue. Please see more comments on ASSERT below.
>
> And generally - please review all the places EXPECT_XX is used.
> i think most of cases are to be substituted with ASSERT_X.
>
> For example, EXPECT_TRUE(key_found); - why to continue execution at all? We
> really need key_found there, or the rest of the test execution does not
> make sense, no?
>
> Or > + EXPECT_NE(-1, cg_read(cgroup, "memory.numa_stat", buf,
sizeof(buf)));
> If you failed to read the file, why to continue?
>
While this is a valid statement of course, the large problem with ASSERT is
that it does not invoke the teardown callback for a test, in contrast to
EXPECT. This might lead to e.g. a leftover cgroup that would be not removed
properly if a test case fails with an ASSERT. This in turn will prevent the
next test from running. This case I should probably handle better anyway
though...
> > +
> > + token = strtok(stat_str, node_delim);
> > + /* skip the key itself */
> > + token = strtok(NULL, node_delim);
> > + step = 1;
> > + while (token != NULL) {
> > + if (step % 2) {
> > + node = strtoul(token + 1, NULL, 10);
> > + EXPECT_GT(node_count, node) {
> > + ksft_print_msg("bogus node id:
%zu\n", node);
> > + }
> > + } else {
> > + node_bytes = strtoull(token, NULL, 10);
> > + (*node_stat)[node] = node_bytes;
> > + parsed_nodes++;
> > + }
> > + step++;
> > + token = strtok(NULL, node_delim);
> > + }
> > +
> > + return parsed_nodes;
> > +}
> > +
> > +static void verify_active(struct test_context *ctx, const char *cgroup,
> > + const char *stat)
> > +{
> > + int ret = EXIT_SUCCESS;
> > + int parsed_nodes;
> > + int n;
> > + size_t per_node;
> > + size_t *node_stat;
> > + struct __test_metadata *_metadata = ctx->metadata;
> > +
> > + parsed_nodes = parse_memory_numa_stat(ctx, cgroup, stat,
&node_stat);
> > + EXPECT_NE(-1, parsed_nodes);
> > +
> > + /* values_close() does not work well with 0 */
> > + if (node_stat[src_node] > file_size / 100 * 15) {
> > + ksft_print_msg("too much memory on node%i\n",
src_node);
> > + ret = EXIT_FAILURE;
> > + goto dump_stat;
> > + }
> > +
> > + per_node = file_size / dst_count;
>
> Just to make sure: you are aware and prepared to the fact NUMA nodes on a
> Node might have a different amount of memory assigned?
>
> > + for (n = 0; n < numa_max_possible_node(); n++) {
> > + if (numa_bitmask_isbitset(dst_mask, n) &&
> > + !values_close(node_stat[n], per_node, 15)) {
> > + ksft_print_msg("not enough pages on
node%i\n", n);
> > + ret = EXIT_FAILURE;
> > + }
> > + }
> > +
> > +dump_stat:
> > + if (ret != EXIT_SUCCESS) {
> > + ksft_print_msg("size=%li\n", file_size);
> > + dump_memory_numa_stat(node_stat);
> > + }
> > +
> > + free(node_stat);
> > +
> > + EXPECT_EQ(EXIT_SUCCESS, ret);
> > +}
> > +
> > +static void verify_inactive(struct test_context *ctx, const char *cgroup,
> > + const char *stat)
> > +{
> > + int ret = EXIT_SUCCESS;
> > + int parsed_nodes;
> > + int n;
> > + long memory_max;
> > + size_t per_node;
> > + size_t migrated_to_node;
> > + size_t *active_stat = NULL;
> > + size_t *inactive_stat = NULL;
> > + char active[PAGE_SIZE] = "active_";
> > + char inactive[PAGE_SIZE] = "inactive_";
> > + struct __test_metadata *_metadata = ctx->metadata;
> > +
> > + memory_max = cg_read_long(cgroup, "memory.max");
> > + EXPECT_NE(-1, memory_max);
> > +
> > + strncat(active + strlen(active), stat, strlen(stat));
> > + strncat(inactive + strlen(inactive), stat, strlen(stat));
> > +
> > + parsed_nodes = parse_memory_numa_stat(ctx, cgroup, active,
> > &active_stat);
> > + EXPECT_NE(-1, parsed_nodes);
> > +
> > + parsed_nodes = parse_memory_numa_stat(ctx, cgroup, inactive,
> > &inactive_stat); + EXPECT_NE(-1, parsed_nodes);
> > +
> > + /* values_close() does not work well with 0 */
> > + if ((active_stat[src_node] > memory_max / 100 * 15) ||
> > + (inactive_stat[src_node] > memory_max / 100 * 15)) {
> > + ksft_print_msg("too much memory on node%i\n",
src_node);
> > + ret = EXIT_FAILURE;
> > + goto dump_stat;
> > + }
> > +
> > + per_node = memory_max / dst_count;
>
> Just to make sure: you are aware and prepared to the fact NUMA nodes on a
> Node might have a different amount of memory assigned?
>
> > + for (n = 0; n < numa_max_possible_node(); n++) {
> > + if (!numa_bitmask_isbitset(dst_mask, n))
> > + continue;
> > + migrated_to_node = active_stat[n] + inactive_stat[n];
> > + if (!values_close(migrated_to_node, per_node, 15)) {
> > + ksft_print_msg("not enough memory moved to
node%i\n", n);
> > + ret = EXIT_FAILURE;
> > + }
> > + }
> > +
> > +dump_stat:
> > + if (ret != EXIT_SUCCESS) {
> > + ksft_print_msg("size=%li\n", memory_max);
> > + ksft_print_msg("%s:\n", active);
> > + dump_memory_numa_stat(active_stat);
> > + ksft_print_msg("%s:\n", inactive);
> > + dump_memory_numa_stat(inactive_stat);
> > + }
> > +
> > + if (inactive_stat)
> > + free(inactive_stat);
> > +
> > + if (active_stat)
> > + free(active_stat);
> > +
> > + EXPECT_EQ(EXIT_SUCCESS, ret);
> > +}
> > +
> > +static int vm_drop_caches(void)
> > +{
> > + int fd;
> > + int ret = EXIT_SUCCESS;
> > +
> > + fd = open("/proc/sys/vm/drop_caches", O_WRONLY);
> > + if (fd < 0) {
> > + ksft_perror("failed to open drop_caches");
> > + return EXIT_FAILURE;
> > + }
> > +
> > + if (write(fd, "3", 1) < 0) {
> > + ksft_perror("failed to write to drop_caches");
> > + ret = EXIT_FAILURE;
> > + }
> > +
> > + if (close(fd) < 0) {
> > + ksft_perror("failed to close drop_caches");
> > + ret = EXIT_FAILURE;
> > + }
> > +
> > + return ret;
> > +}
> > +
> > +FIXTURE(numa_migrate) {
> > + int fd;
> > + char *cg_test;
> > + char *thp_shmem_control_str;
> > + int pipefd[2];
> > + struct test_context ctx;
> > +};
> > +
> > +FIXTURE_VARIANT(numa_migrate) {
> > + enum memory_type mem_type;
> > + void (*verify)(struct test_context *ctx, const char *cgroup,
> > + const char *stat);
> > + const char *stat;
> > + int mmap_flags;
> > + int madvise;
> > + bool with_mem_limit;
> > +};
> > +
> > +FIXTURE_VARIANT_ADD(numa_migrate, disk_file) {
> > + .mem_type = MEMORY_TYPE_ACTIVE_FILE,
> > + .verify = verify_active,
> > + .stat = "active_file",
> > + .mmap_flags = MAP_SHARED,
> > + .madvise = MADV_NOHUGEPAGE,
> > + .with_mem_limit = false,
> > +};
> > +
> > +FIXTURE_VARIANT_ADD(numa_migrate, anon) {
> > + .mem_type = MEMORY_TYPE_ACTIVE_ANON,
> > + .verify = verify_active,
> > + .stat = "active_anon",
> > + .mmap_flags = MAP_PRIVATE | MAP_ANONYMOUS,
> > + .madvise = MADV_NOHUGEPAGE,
> > + .with_mem_limit = false,
> > +};
> > +
> > +FIXTURE_VARIANT_ADD(numa_migrate, anon_thp) {
> > + .mem_type = MEMORY_TYPE_ANON_THP,
> > + .verify = verify_active,
> > + .stat = "anon_thp",
> > + .mmap_flags = MAP_PRIVATE | MAP_ANONYMOUS,
> > + .madvise = MADV_HUGEPAGE,
> > + .with_mem_limit = false,
> > +};
> > +
> > +FIXTURE_VARIANT_ADD(numa_migrate, tmpfs_file) {
> > + .mem_type = MEMORY_TYPE_TMPFS_FILE,
> > + .verify = verify_active,
> > + .stat = "shmem",
> > + .mmap_flags = MAP_SHARED,
> > + .madvise = MADV_NOHUGEPAGE,
> > + .with_mem_limit = false,
> > +};
> > +
> > +FIXTURE_VARIANT_ADD(numa_migrate, tmpfs_thp) {
> > + .mem_type = MEMORY_TYPE_TMPFS_FILE_THP,
> > + .verify = verify_active,
> > + .stat = "shmem_thp",
> > + .mmap_flags = MAP_SHARED,
> > + .madvise = MADV_HUGEPAGE,
> > + .with_mem_limit = false,
> > +};
> > +
> > +FIXTURE_VARIANT_ADD(numa_migrate, inactive_anon) {
> > + .mem_type = MEMORY_TYPE_INACTIVE_ANON,
> > + .verify = verify_inactive,
> > + .stat = "anon",
> > + .mmap_flags = MAP_PRIVATE | MAP_ANONYMOUS,
> > + .madvise = MADV_NOHUGEPAGE,
> > + .with_mem_limit = true,
> > +};
> > +
> > +FIXTURE_VARIANT_ADD(numa_migrate, inactive_disk_file) {
> > + .mem_type = MEMORY_TYPE_INACTIVE_FILE,
> > + .verify = verify_inactive,
> > + .stat = "file",
> > + .mmap_flags = MAP_SHARED,
> > + .madvise = MADV_NOHUGEPAGE,
> > + .with_mem_limit = true,
> > +};
> > +
> > +static void setup_disk_file(struct test_context *ctx)
> > +{
> > + int fd;
> > + struct _test_data_numa_migrate *self = ctx->self;
> > + struct __test_metadata *_metadata = ctx->metadata;
> > +
> > + fd = get_temp_fd();
> > + EXPECT_NE(-1, fd);
> > + EXPECT_NE(-1, ftruncate(fd, file_size));
> > +
> > + self->fd = fd;
> > +}
> > +
> > +static void setup_tmpfs_file(struct test_context *ctx)
> > +{
> > + int fd;
> > + struct _test_data_numa_migrate *self = ctx->self;
> > + struct __test_metadata *_metadata = ctx->metadata;
> > +
> > + fd = memfd_create("kselftest", 0);
> > + EXPECT_NE(-1, fd);
> > + EXPECT_NE(-1, ftruncate(fd, file_size));
> > +
> > + self->fd = fd;
> > +}
> > +
> > +static void thp_shmem_control(struct test_context *ctx, bool do_set)
> > +{
> > + int fd;
> > + ssize_t count;
> > + char *control;
> > + char control_buf[PAGE_SIZE];
> > + static const char *delim = "[]";
> > + static const char *advise = "advise";
> > + struct _test_data_numa_migrate *self = ctx->self;
> > + struct __test_metadata *_metadata = ctx->metadata;
> > +
> > + fd = open("/sys/kernel/mm/transparent_hugepage/shmem_enabled",
O_RDWR);
> > + EXPECT_NE(-1, fd);
> > +
> > + if (do_set) {
> > + EXPECT_NULL(self->thp_shmem_control_str);
> > + count = read(fd, control_buf, PAGE_SIZE - 1);
> > + EXPECT_NE(-1, count);
> > + control_buf[count] = '\0';
> > +
> > + control = strtok(control_buf, delim);
> > + if (control_buf[0] != '[')
> > + control = strtok(NULL, delim);
> > +
> > + EXPECT_NE(-1, write(fd, advise, strlen(advise)));
> > + self->thp_shmem_control_str = strdup(control);
> > + } else {
> > + EXPECT_NE(NULL, self->thp_shmem_control_str);
> > + EXPECT_NE(-1, write(fd, self->thp_shmem_control_str,
> > + strlen(self->thp_shmem_control_str)));
> > + free(self->thp_shmem_control_str);
> > + self->thp_shmem_control_str = NULL;
> > + }
> > +
> > + EXPECT_NE(-1, close(fd));
> > +}
> > +
> > +FIXTURE_SETUP(numa_migrate)
> > +{
> > + size_t memory_max;
> > + int mm_len;
> > + char *mm_str;
> > +
> > + EXPECT_NE(-1, pipe(self->pipefd));
> > +
> > + /* Store the context to make it accessible outside of the harness
*/
> > + self->ctx.self = self;
> > + self->ctx.variant = variant;
> > + self->ctx.metadata = _metadata;
> > +
> > + self->cg_test = cg_name(root, "kselftest");
> > + EXPECT_NE(-1, cg_create(self->cg_test));
> > +
> > + if (variant->with_mem_limit) {
> > + memory_max = file_size / 10 * 5;
> > + mm_len = snprintf(NULL, 0, "%zu", memory_max) + 1;
> > + mm_str = alloca((mm_len) * sizeof(char));
>
> Honestly using a static buffer would be much more clear. From my point of
> view, of course.
> > + snprintf(mm_str, mm_len, "%zu", memory_max);
> > + EXPECT_NE(-1, cg_write(self->cg_test, "memory.max",
mm_str));
> > + }
> > +
> > + self->fd = -1;
> > + self->thp_shmem_control_str = NULL;
> > + switch (variant->mem_type) {
> > + case MEMORY_TYPE_ACTIVE_FILE:
> > + case MEMORY_TYPE_INACTIVE_FILE:
> > + setup_disk_file(&self->ctx);
> > + break;
> > + case MEMORY_TYPE_ACTIVE_ANON:
> > + case MEMORY_TYPE_INACTIVE_ANON:
> > + case MEMORY_TYPE_ANON_THP:
> > + break;
> > + case MEMORY_TYPE_TMPFS_FILE:
> > + setup_tmpfs_file(&self->ctx);
> > + break;
> > + case MEMORY_TYPE_TMPFS_FILE_THP:
> > + thp_shmem_control(&self->ctx, true);
> > + setup_tmpfs_file(&self->ctx);
> > + break;
> > + default:
> > + SKIP(return, "unknown page backing\n");
> > + break;
> > + }
> > +
> > + EXPECT_EQ(EXIT_SUCCESS, vm_drop_caches());
> > +};
> > +
> > +FIXTURE_TEARDOWN(numa_migrate)
> > +{
> > + EXPECT_NE(-1, cg_destroy(self->cg_test));
> > + free(self->cg_test);
> > + if (self->fd != -1)
> > + EXPECT_NE(-1, close(self->fd));
> > + if (self->thp_shmem_control_str)
> > + thp_shmem_control(&self->ctx, false);
> > +};
> > +
> > +static int touch_pages(const char *cgroup, void *arg)
> > +{
> > + void *mmap_ptr;
> > + volatile uint8_t *ptr;
> > + size_t data_size;
> > + uint32_t crc;
> > + uint32_t *crc_ptr;
> > + bool op_is_store;
> > + char tmp = 's';
> > + struct test_context *ctx = arg;
> > + struct _test_data_numa_migrate *self = ctx->self;
> > + const struct _fixture_variant_numa_migrate *variant = ctx->variant;
> > + struct __test_metadata *_metadata = ctx->metadata;
> > +
> > + numa_bind(src_mask);
> > +
> > + EXPECT_NE(-1, close(self->pipefd[0]));
> > + EXPECT_NE(-1, prctl(PR_SET_PDEATHSIG, SIGTERM));
> > +
> > + mmap_ptr = mmap(NULL, file_size, PROT_READ | PROT_WRITE,
> > + variant->mmap_flags, self->fd, 0);
> > + EXPECT_NE(MAP_FAILED, mmap_ptr);
> > +
> > + if (variant->madvise)
> > + EXPECT_NE(-1, madvise(mmap_ptr, file_size, variant-
>madvise));
> > +
> > + EXPECT_NE(-1, madvise(mmap_ptr, file_size, MADV_POPULATE_WRITE));
> > +
> > + EXPECT_NE(-1, write(self->pipefd[1], &tmp, 1));
> > + EXPECT_NE(-1, close(self->pipefd[1]));
> > +
> > + srand(getpid());
> > +
> > + data_size = file_size - sizeof(uint32_t);
> > + crc_ptr = mmap_ptr + data_size;
> > +
> > + crc = crc32_z(0, mmap_ptr, data_size);
> > + *crc_ptr = crc;
> > + while (1) {
> > + crc = crc32_z(0, mmap_ptr, data_size);
> > + EXPECT_EQ(crc, *crc_ptr) {
> > + ksft_print_msg("crc32 mismatch: calc=%08x
read=%08x\n",
> > + crc, *crc_ptr);
> > + }
> > +
> > + ptr = mmap_ptr + rand() % data_size;
> > + op_is_store = !!(rand() % 2);
> > + if (op_is_store) {
> > + *ptr = rand() % UINT8_MAX;
> > + crc = crc32_z(0, mmap_ptr, data_size);
> > + *crc_ptr = crc;
> > + } else {
> > + *ptr;
> > + }
> > + }
> > +
> > + munmap(mmap_ptr, file_size);
> > +
> > + return EXIT_SUCCESS;
> > +}
> > +
> > +TEST_F(numa_migrate, migrate)
> > +{
> > + int child_pid;
> > + char tmp;
> > + char buf[PAGE_SIZE];
> > +
> > + child_pid = cg_run_nowait(self->cg_test, touch_pages, &self->ctx);
> > + EXPECT_NE(-1, child_pid);
> > + EXPECT_NE(-1, close(self->pipefd[1]));
> > +
> > + /* Wait for the child to enter cgroup */
> > + EXPECT_NE(-1, cg_wait_for_proc_count(self->cg_test, 1));
> > +
> > + /* Wait for the child to populate the page cache */
> > + EXPECT_NE(-1, read(self->pipefd[0], &tmp, 1));
> > + EXPECT_NE(-1, close(self->pipefd[0]));
> > +
> > + /* Let it run for a while */
> > + sleep(3);
>
> i would really add a while with much less sleep timeout and a check if the
> amount of populated pages is big enough for us. And increased the max
> timeout at the same time. This would allow to may be decrease the usual
> test execution time and at the same time allow us to pass the test on a
> heavily loaded VM.
>
> > +
> > + cg_write(self->cg_test, "memory.numa_migrate", dst_str);
> > +
> > + /* Let it run for a while */
> > + sleep(3);
>
> same here.
>
> > +
> > + variant->verify(&self->ctx, self->cg_test, variant->stat);
> > +
> > + EXPECT_NE(-1, cg_killall(self->cg_test));
> > +
> > + /* Wait for cgroup to be empty */
> > + while (1) {
> > + EXPECT_NE(-1, cg_read(self->cg_test, "cgroup.procs",
buf,
> > + sizeof(buf)));
> > + if (buf[0] == '\0')
> > + break;
> > + usleep(1000);
> > + }
> > +
> > + EXPECT_NE(-1, waitpid(child_pid, NULL, 0));
> > +}
> > +
> > +XFAIL_ADD(numa_migrate, inactive_anon, migrate)
> > +XFAIL_ADD(numa_migrate, inactive_disk_file, migrate)
> > +
> > +static void configure_numa(void)
> > +{
> > + int cnt;
> > + int n;
> > + char *ptr;
> > + long freep;
> > + bool to_single = false;
> > +
> > + /* Cannot use args as they are owned by the harness */
> > + if (getenv("KSFT_NUMA_MIGRATE_DST_SINGLE"))
> > + to_single = true;
> > +
> > + src_mask = numa_allocate_nodemask();
> > + if (src_mask == NULL)
> > + ksft_exit_fail_msg("failed to allocate nodemask\n");
> > +
> > + dst_mask = numa_allocate_nodemask();
> > + if (src_mask == NULL)
>
> should be dst_mask == NULL
>
> > + ksft_exit_fail_msg("failed to allocate nodemask\n");
> > +
> > + for (n = 0; n < numa_max_possible_node(); n++) {
> > + if (numa_bitmask_isbitset(numa_all_nodes_ptr, n)) {
> > + src_node = n;
> > + numa_bitmask_setbit(src_mask, n);
> > + break;
> > + }
> > + }
> > +
> > + ptr = dst_str;
> > + for (++n; n < numa_max_possible_node(); n++) {
> > + if (numa_bitmask_isbitset(numa_all_nodes_ptr, n)) {
> > + numa_node_size(n, &freep);
> > + /*
> > + * Assume we want the node to be able to
accomodate the
> > + * whole test allocation to avoid
recalculating
> > + * proportions after each newly considered
node.
> > + */
> > + if (freep < file_size)
> > + continue;
> > + dst_count++;
> > + numa_bitmask_setbit(dst_mask, n);
> > + cnt = sprintf(ptr, "%i,", n);
> > + ptr += cnt;
> > + if (to_single)
> > + break;
> > + }
> > + }
> > + if (dst_count == 0)
> > + ksft_exit_skip("no suitable nodes to migrate to\n");
> > + *(--ptr) = '\0';
> > +}
> > +
> > +int main(int argc, char *argv[])
> > +{
> > + int ret;
> > +
> > + if (geteuid())
> > + ksft_exit_skip("needs root to run\n");
> > +
> > + if (cg_find_unified_root(root, sizeof(root), NULL))
> > + ksft_exit_skip("cgroup v2 isn't mounted\n");
> > +
> > + if (numa_available() < 0)
> > + ksft_exit_skip("numa is not available\n");
> > +
> > + if (numa_num_configured_nodes() < 2)
> > + ksft_exit_skip("at least 2 numa nodes are required\n");
> > +
> > + if (vm_drop_caches())
> > + ksft_exit_fail();
> > +
> > + configure_numa();
> > +
> > + ret = test_harness_run(argc, argv);
> > +
> > + numa_bitmask_free(src_mask);
> > + numa_bitmask_free(dst_mask);
> > +
> > + ksft_exit(ret);
> > +}
_______________________________________________
Devel mailing list
[email protected]
https://lists.openvz.org/mailman/listinfo/devel