The commit is pushed to "branch-rh10-6.12.0-55.13.1.3.x.vz10-ovz" and will
appear at [email protected]:openvz/vzkernel.git
after rh10-6.12.0-55.13.1.3.25.vz10
------>
commit 8bbeb8167249b101fae90d02e80d34d8161b5514
Author: Dmitry Sepp <[email protected]>
Date: Mon Dec 8 11:46:23 2025 +0000
selftests: cgroup: test page cache limiting feature
The tests validate memory.cache file functionality in terms of limiting
the amount of page cache used and being able to correctly account it.
The page cache is populated by prefetching contents of a 1 GiB file. The
cgroup used for testing limit cache usage to 256 MiB.
The current error tolerance is set to 15%.
https://virtuozzo.atlassian.net/browse/VSTOR-112174
Signed-off-by: Dmitry Sepp <[email protected]>
Reviewed-by: Pavel Tikhomirov <[email protected]>
======
Patchset description:
selftests: Add selftests to verify cache limiting and numa migration
The patches introduce a set of tests used to verify the following
functionality:
- Controlling the amount of memory used by page cache with memory.cache
- Enforcing migration of cgroup's pages to one or multiple target NUMA
nodes
with memory.numa_migrate.
Notes for reviewers:
- The kselftest harness is designed in such a way that it does not allow to
skip any tests that have been registered before the execution started.
This
means, there is no safe and sane way to interrupt the overall flow if
one of
the tests has failed. I tried using a state variable but each of the
individual tests seems to be executed in a subprocess so if it fails it
has
no way to change anything outside. I also tried creating a marker file
but it
is also troublesome as the main harness function terminates the calling
process on its own without letting the external caller perform any
cleanup
(e.g. to remove the marker file after all tests have finished).
- Regarding the ASSERT macro and invocation of the teardown function:
- Teardown is not automatically called if ASSERT is used in
FIXTURE_SETUP.
- Teardown is called if ASSERT is used from the test functions defined
with
the TEST macro and its variants.
- Teardown will be called if ASSERT is used from any non-harness-macro
defined function if the "context" variables (_metadata etc.) are
visible
within the function's scope and the function is called from the process
running the main test function.
- Teardown can theoretically be called if ASSERT is used from a child
process
started by the main test function if "context" is passed to the
function(s)
executed by the child process but in reality it makes little sense as
it is
possible that teardown would be then executed twice leading to
potentially
harmful outcome (once by the ASSERT plumbing from the child process and
once by the main test function because the child has failed).
---
tools/testing/selftests/cgroup/Makefile | 2 +
tools/testing/selftests/cgroup/config | 1 +
tools/testing/selftests/cgroup/test_cache.c | 327 ++++++++++++++++++++++++++++
3 files changed, 330 insertions(+)
diff --git a/tools/testing/selftests/cgroup/Makefile
b/tools/testing/selftests/cgroup/Makefile
index 1b897152bab6e..ff290321d7da8 100644
--- a/tools/testing/selftests/cgroup/Makefile
+++ b/tools/testing/selftests/cgroup/Makefile
@@ -17,6 +17,7 @@ TEST_GEN_PROGS += test_kmem
TEST_GEN_PROGS += test_memcontrol
TEST_GEN_PROGS += test_pids
TEST_GEN_PROGS += test_zswap
+TEST_GEN_PROGS += test_cache
LOCAL_HDRS += $(selfdir)/clone3/clone3_selftests.h $(selfdir)/pidfd/pidfd.h
@@ -32,3 +33,4 @@ include ../lib.mk
$(OUTPUT)/test_memcontrol: cgroup_util.c
$(OUTPUT)/test_pids: cgroup_util.c
$(OUTPUT)/test_zswap: cgroup_util.c
+$(OUTPUT)/test_cache: cgroup_util.c
diff --git a/tools/testing/selftests/cgroup/config
b/tools/testing/selftests/cgroup/config
index 39f979690dd3b..641ed9bc26b47 100644
--- a/tools/testing/selftests/cgroup/config
+++ b/tools/testing/selftests/cgroup/config
@@ -4,3 +4,4 @@ CONFIG_CGROUP_FREEZER=y
CONFIG_CGROUP_SCHED=y
CONFIG_MEMCG=y
CONFIG_PAGE_COUNTER=y
+CONFIG_CACHESTAT_SYSCALL=y
diff --git a/tools/testing/selftests/cgroup/test_cache.c
b/tools/testing/selftests/cgroup/test_cache.c
new file mode 100644
index 0000000000000..15c68685c282c
--- /dev/null
+++ b/tools/testing/selftests/cgroup/test_cache.c
@@ -0,0 +1,327 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2025 Virtuozzo International GmbH. All rights reserved.
+ *
+ * This test set verifies the page cache limiting feature. Each test creates a
+ * file-backed memory mapping and populates it with pages.
+ *
+ * Test variants:
+ * - limit_before_access: Sets a limit of 256 MiB, then generates 1 GiB of
page
+ * cache
+ * - limit_after_access: Generates 1 GiB of page cache, then sets a limit of
+ * 256 MiB
+ *
+ * The correctnes of cache accounting is further verified with the cachestat
+ * syscall.
+ */
+
+#define _GNU_SOURCE
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include <signal.h>
+#include <sys/mman.h>
+#include <sys/syscall.h>
+#include <sys/prctl.h>
+#include <linux/prctl.h>
+#include <linux/mman.h>
+#include <linux/limits.h>
+
+#include "../kselftest_harness.h"
+#include "cgroup_util.h"
+
+static char root[PATH_MAX];
+static const size_t file_size = MB(1024);
+static char *cg_test_cache_max_str = "256M";
+
+static int vm_drop_caches(void)
+{
+ int fd;
+ int ret = EXIT_SUCCESS;
+
+ fd = open("/proc/sys/vm/drop_caches", O_WRONLY);
+ if (fd < 0) {
+ ksft_perror("failed to open drop_caches");
+ return EXIT_FAILURE;
+ }
+
+ if (write(fd, "3", 1) != 1) {
+ ksft_perror("failed to write to drop_caches");
+ ret = EXIT_FAILURE;
+ }
+
+ if (close(fd) < 0) {
+ ksft_perror("failed to close drop_caches");
+ ret = EXIT_FAILURE;
+ }
+
+ return ret;
+}
+
+struct test_context {
+ struct _test_data_cache_control *self;
+ struct __test_metadata *metadata;
+};
+
+FIXTURE(cache_control) {
+ int fd;
+ char *cg_test;
+ int pipefd[2];
+ struct test_context ctx;
+};
+
+FIXTURE_TEARDOWN(cache_control)
+{
+ ASSERT_EQ(0, close(self->fd));
+ ASSERT_EQ(0, cg_destroy(self->cg_test));
+ free(self->cg_test);
+}
+#define TEARDOWN_LOG(fmt, ...) do { \
+ TH_LOG(fmt, ##__VA_ARGS__); \
+ cache_control_teardown(_metadata, self, variant); \
+} while (0)
+
+FIXTURE_SETUP(cache_control)
+{
+ ASSERT_EQ(0, pipe(self->pipefd));
+
+ self->fd = get_temp_fd();
+ ASSERT_NE(-1, self->fd);
+ ASSERT_EQ(0, ftruncate(self->fd, file_size))
+ TEARDOWN_LOG("failed to truncate temp fd");
+
+ self->cg_test = cg_name(root, "kselftest");
+ ASSERT_EQ(0, cg_create(self->cg_test))
+ TEARDOWN_LOG("failed to create a test cgroup");
+
+ /* Store the context to make it accessible outside of the harness */
+ self->ctx.self = self;
+ self->ctx.metadata = _metadata;
+
+ ASSERT_EQ(EXIT_SUCCESS, vm_drop_caches())
+ TEARDOWN_LOG("failed to drop caches");
+};
+
+#ifndef __maybe_unused
+#define __maybe_unused __attribute__((__unused__))
+#endif
+
+#define EXPAND_CTX(ctx) \
+ struct _test_data_cache_control __maybe_unused *self = ctx->self; \
+ struct __test_metadata __maybe_unused *_metadata = ctx->metadata
+
+static int access_file(const char *cgroup, void *arg)
+{
+ void *mmap_ptr;
+ char tmp = 's';
+ int ret = EXIT_SUCCESS;
+ struct test_context *ctx = arg;
+
+ EXPAND_CTX(ctx);
+
+ if (close(self->pipefd[0])) {
+ ksft_perror("failed to close pipe read end\n");
+ return EXIT_FAILURE;
+ }
+
+ if (prctl(PR_SET_PDEATHSIG, SIGTERM) < 0) {
+ ksft_perror("failed to set parent death signal\n");
+ return EXIT_FAILURE;
+ }
+
+ mmap_ptr = mmap(NULL, file_size, PROT_READ | PROT_WRITE,
+ MAP_SHARED | MAP_POPULATE, self->fd, 0);
+ if (mmap_ptr == MAP_FAILED) {
+ ksft_perror("failed to map test file\n");
+ return EXIT_FAILURE;
+ }
+
+ if (write(self->pipefd[1], &tmp, 1) != 1) {
+ ksft_print_msg("failed to write to pipe\n");
+ close(self->pipefd[1]);
+ ret = EXIT_FAILURE;
+ goto err_unmap;
+ }
+
+ if (close(self->pipefd[1])) {
+ ksft_perror("failed to close pipe write end\n");
+ ret = EXIT_FAILURE;
+ goto err_unmap;
+ }
+
+ pause();
+
+err_unmap:
+ if (munmap(mmap_ptr, file_size) < 0) {
+ ksft_perror("failed to unmap test file\n");
+ ret = EXIT_FAILURE;
+ }
+
+ return ret;
+}
+
+static int test_process_start(struct test_context *ctx)
+{
+ int child_pid;
+ char tmp;
+
+ EXPAND_CTX(ctx);
+
+ child_pid = cg_run_nowait(self->cg_test, access_file,
+ (void *)&self->ctx);
+ ASSERT_NE(-1, child_pid);
+ ASSERT_EQ(0, close(self->pipefd[1]));
+
+ /* Wait for the child to enter cgroup */
+ ASSERT_EQ(0, cg_wait_for_proc_count(self->cg_test, 1));
+
+ /* Wait for the child to populate the page cache */
+ ASSERT_EQ(1, read(self->pipefd[0], &tmp, 1));
+ ASSERT_EQ(0, close(self->pipefd[0]));
+
+ return child_pid;
+}
+
+static void test_process_stop(int child_pid, struct test_context *ctx)
+{
+ char buf[PAGE_SIZE];
+
+ EXPAND_CTX(ctx);
+
+ ASSERT_EQ(0, cg_killall(self->cg_test));
+
+ /* Wait for cgroup to be empty */
+ while (1) {
+ ASSERT_EQ(0, cg_read(self->cg_test, "cgroup.procs", buf,
+ sizeof(buf)));
+ if (buf[0] == '\0')
+ break;
+ usleep(1000);
+ }
+
+ ASSERT_EQ(child_pid, waitpid(child_pid, NULL, 0));
+}
+
+static void verify_cache_usage(struct test_context *ctx)
+{
+ long cache_current;
+ long cache_max;
+ int retries_left = 100; /* 10s */
+ int ret = EXIT_SUCCESS;
+
+ EXPAND_CTX(ctx);
+retry:
+ retries_left--;
+
+ cache_current = cg_read_long(self->cg_test, "memory.cache.current");
+ ASSERT_NE(-1, cache_current);
+
+ cache_max = cg_read_long(self->cg_test, "memory.cache.max");
+ ASSERT_NE(-1, cache_max);
+
+ /*
+ * It's OK if current is anywhere below max but it should not exceed max
+ * too much
+ */
+ if ((cache_current <= cache_max) || values_close(cache_current,
cache_max, 15))
+ goto out;
+
+ if (retries_left > 0) {
+ usleep(100000); /* 100ms */
+ goto retry;
+ } else {
+ ksft_print_msg("Incorrect cache usage: current=%li max=%li\n",
+ cache_current, cache_max);
+ ret = EXIT_FAILURE;
+ }
+
+out:
+ ASSERT_EQ(EXIT_SUCCESS, ret);
+}
+
+static void verify_cache_accounting(struct test_context *ctx)
+{
+ long cache_current;
+ size_t stat_size;
+ struct cachestat cs;
+ struct cachestat_range cs_range = { 0, file_size };
+ int retries_left = 100; /* 10s */
+ int ret = EXIT_SUCCESS;
+
+ EXPAND_CTX(ctx);
+retry:
+ retries_left--;
+
+ cache_current = cg_read_long(self->cg_test, "memory.cache.current");
+ ASSERT_NE(-1, cache_current);
+
+ ASSERT_NE(-1, syscall(__NR_cachestat, self->fd, &cs_range, &cs, 0));
+ stat_size = cs.nr_cache * PAGE_SIZE;
+
+ /*
+ * It's OK if current is anywhere below stat but it should not exceed
+ * stat too much
+ */
+ if ((cache_current <= stat_size) || values_close(cache_current,
stat_size, 15))
+ goto out;
+
+ if (retries_left > 0) {
+ usleep(100000); /* 100ms */
+ goto retry;
+ } else {
+ ksft_print_msg("Incorrect cache accounting: cg=%li
cachestat=%llu\n",
+ cache_current, cs.nr_cache * PAGE_SIZE);
+ }
+
+out:
+ ASSERT_EQ(EXIT_SUCCESS, ret);
+}
+
+TEST_F(cache_control, limit_before_access)
+{
+ int child_pid;
+
+ ASSERT_EQ(0, cg_write(self->cg_test, "memory.cache.max",
+ cg_test_cache_max_str));
+
+ child_pid = test_process_start(&self->ctx);
+
+ verify_cache_usage(&self->ctx);
+ verify_cache_accounting(&self->ctx);
+
+ test_process_stop(child_pid, &self->ctx);
+}
+
+TEST_F(cache_control, limit_after_access)
+{
+ int child_pid;
+
+ child_pid = test_process_start(&self->ctx);
+
+ verify_cache_accounting(&self->ctx);
+
+ ASSERT_EQ(0, cg_write(self->cg_test, "memory.cache.max",
+ cg_test_cache_max_str));
+
+ verify_cache_usage(&self->ctx);
+ verify_cache_accounting(&self->ctx);
+
+ test_process_stop(child_pid, &self->ctx);
+}
+
+int main(int argc, char *argv[])
+{
+ int ret;
+
+ if (geteuid())
+ ksft_exit_skip("needs root to run\n");
+
+ if (cg_find_unified_root(root, sizeof(root), NULL))
+ ksft_exit_skip("cgroup v2 isn't mounted\n");
+
+ ret = test_harness_run(argc, argv);
+
+ /* Never reached in fact, the harness calls exit(). */
+ ksft_exit(ret);
+}
_______________________________________________
Devel mailing list
[email protected]
https://lists.openvz.org/mailman/listinfo/devel