Since 18.05 and the memory subsystem rework, EAL reserves some big (unused) mappings.
In testpmd, we have been locking all pages to avoid page faults during benchmark/performance regression tests [1]. However, asking for locking all the pages triggers issues on FreeBSD [2] and becomes really heavy in some Linux configurations (see [3], [4]). This patch changes the behavior so that testpmd only lock pages containing .text by default. 1: https://git.dpdk.org/dpdk/commit/?id=1c036b16c284 2: https://git.dpdk.org/dpdk/commit/?id=fb7b8b32cd95 3: https://bugzilla.redhat.com/show_bug.cgi?id=1786923 4: http://mails.dpdk.org/archives/dev/2020-February/158477.html Signed-off-by: David Marchand <[email protected]> --- app/test-pmd/parameters.c | 4 +-- app/test-pmd/testpmd.c | 53 +++++++++++++++++++++++++++++++-------- app/test-pmd/testpmd.h | 5 +++- 3 files changed, 49 insertions(+), 13 deletions(-) diff --git a/app/test-pmd/parameters.c b/app/test-pmd/parameters.c index 404dba2b2..952371e6c 100644 --- a/app/test-pmd/parameters.c +++ b/app/test-pmd/parameters.c @@ -1304,9 +1304,9 @@ launch_args_parse(int argc, char** argv) if (!strcmp(lgopts[opt_idx].name, "hot-plug")) hot_plug = 1; if (!strcmp(lgopts[opt_idx].name, "mlockall")) - do_mlockall = 1; + do_mlock = TESTPMD_MLOCK_ALL; if (!strcmp(lgopts[opt_idx].name, "no-mlockall")) - do_mlockall = 0; + do_mlock = TESTPMD_MLOCK_NONE; if (!strcmp(lgopts[opt_idx].name, "noisy-tx-sw-buffer-size")) { n = atoi(optarg); diff --git a/app/test-pmd/testpmd.c b/app/test-pmd/testpmd.c index 035836adf..9a1716321 100644 --- a/app/test-pmd/testpmd.c +++ b/app/test-pmd/testpmd.c @@ -13,6 +13,7 @@ #include <sys/types.h> #include <errno.h> #include <stdbool.h> +#include <link.h> #include <sys/queue.h> #include <sys/stat.h> @@ -390,9 +391,9 @@ uint32_t event_print_mask = (UINT32_C(1) << RTE_ETH_EVENT_UNKNOWN) | (UINT32_C(1) << RTE_ETH_EVENT_MACSEC) | (UINT32_C(1) << RTE_ETH_EVENT_INTR_RMV); /* - * Decide if all memory are locked for performance. + * Decide which part of memory is locked for performance. */ -int do_mlockall = 0; +int do_mlock = TESTPMD_MLOCK_TEXT; /* * NIC bypass mode configuration options. @@ -3455,6 +3456,42 @@ signal_handler(int signum) } } +static void +lock_pages(const void *_addr, size_t _len, const char *prefix) +{ + const void *addr; + size_t pagesize; + size_t len; + + /* While Linux does not care, FreeBSD mlock expects page aligned + * address (according to the man). + */ + pagesize = sysconf(_SC_PAGESIZE); + addr = RTE_PTR_ALIGN_FLOOR(_addr, pagesize); + len = _len + ((uintptr_t)_addr & (pagesize - 1)); + if (mlock(addr, len)) { + TESTPMD_LOG(NOTICE, "%s: mlock %p (0x%zx) aligned to %p (0x%zx) failed with error \"%s\"\n", + prefix, _addr, _len, addr, len, strerror(errno)); + } +} + +static int +lock_text_cb(struct dl_phdr_info *info, __rte_unused size_t size, + __rte_unused void *data) +{ + int i; + + for (i = 0; i < info->dlpi_phnum; i++) { + void *addr; + + if (info->dlpi_phdr[i].p_memsz == 0) + continue; + addr = (void *)(info->dlpi_addr + info->dlpi_phdr[i].p_vaddr); + lock_pages(addr, info->dlpi_phdr[i].p_memsz, info->dlpi_name); + } + return 0; +} + int main(int argc, char** argv) { @@ -3514,19 +3551,15 @@ main(int argc, char** argv) latencystats_enabled = 0; #endif - /* on FreeBSD, mlockall() is disabled by default */ -#ifdef RTE_EXEC_ENV_FREEBSD - do_mlockall = 0; -#else - do_mlockall = 1; -#endif - argc -= diag; argv += diag; if (argc > 1) launch_args_parse(argc, argv); - if (do_mlockall && mlockall(MCL_CURRENT | MCL_FUTURE)) { + if (do_mlock == TESTPMD_MLOCK_TEXT ) { + dl_iterate_phdr(lock_text_cb, NULL); + } else if (do_mlock == TESTPMD_MLOCK_ALL && + mlockall(MCL_CURRENT | MCL_FUTURE)) { TESTPMD_LOG(NOTICE, "mlockall() failed with error \"%s\"\n", strerror(errno)); } diff --git a/app/test-pmd/testpmd.h b/app/test-pmd/testpmd.h index 7a7c73f79..a38e5a1f5 100644 --- a/app/test-pmd/testpmd.h +++ b/app/test-pmd/testpmd.h @@ -343,7 +343,10 @@ extern uint32_t event_print_mask; /**< set by "--print-event xxxx" and "--mask-event xxxx parameters */ extern bool setup_on_probe_event; /**< disabled by port setup-on iterator */ extern uint8_t hot_plug; /**< enable by "--hot-plug" parameter */ -extern int do_mlockall; /**< set by "--mlockall" or "--no-mlockall" parameter */ +#define TESTPMD_MLOCK_NONE 0 +#define TESTPMD_MLOCK_TEXT 1 +#define TESTPMD_MLOCK_ALL 2 +extern int do_mlock; /**< set by "--mlockall" or "--no-mlockall" parameter */ extern uint8_t clear_ptypes; /**< disabled by set ptype cmd */ #ifdef RTE_LIBRTE_IXGBE_BYPASS -- 2.23.0

