Since 18.05 and the memory subsystem rework, EAL reserves some big
(unused) mappings.

In testpmd, we have been locking all pages to avoid page faults during
benchmark/performance regression tests [1].
However, asking for locking all the pages triggers issues on FreeBSD [2]
and becomes really heavy in some Linux configurations (see [3], [4]).

This patch changes the behavior so that testpmd only lock pages
containing .text by default.

1: https://git.dpdk.org/dpdk/commit/?id=1c036b16c284
2: https://git.dpdk.org/dpdk/commit/?id=fb7b8b32cd95
3: https://bugzilla.redhat.com/show_bug.cgi?id=1786923
4: http://mails.dpdk.org/archives/dev/2020-February/158477.html

Signed-off-by: David Marchand <david.march...@redhat.com>
---
 app/test-pmd/parameters.c |  4 +--
 app/test-pmd/testpmd.c    | 53 +++++++++++++++++++++++++++++++--------
 app/test-pmd/testpmd.h    |  5 +++-
 3 files changed, 49 insertions(+), 13 deletions(-)

diff --git a/app/test-pmd/parameters.c b/app/test-pmd/parameters.c
index 404dba2b2..952371e6c 100644
--- a/app/test-pmd/parameters.c
+++ b/app/test-pmd/parameters.c
@@ -1304,9 +1304,9 @@ launch_args_parse(int argc, char** argv)
                        if (!strcmp(lgopts[opt_idx].name, "hot-plug"))
                                hot_plug = 1;
                        if (!strcmp(lgopts[opt_idx].name, "mlockall"))
-                               do_mlockall = 1;
+                               do_mlock = TESTPMD_MLOCK_ALL;
                        if (!strcmp(lgopts[opt_idx].name, "no-mlockall"))
-                               do_mlockall = 0;
+                               do_mlock = TESTPMD_MLOCK_NONE;
                        if (!strcmp(lgopts[opt_idx].name,
                                    "noisy-tx-sw-buffer-size")) {
                                n = atoi(optarg);
diff --git a/app/test-pmd/testpmd.c b/app/test-pmd/testpmd.c
index 035836adf..9a1716321 100644
--- a/app/test-pmd/testpmd.c
+++ b/app/test-pmd/testpmd.c
@@ -13,6 +13,7 @@
 #include <sys/types.h>
 #include <errno.h>
 #include <stdbool.h>
+#include <link.h>
 
 #include <sys/queue.h>
 #include <sys/stat.h>
@@ -390,9 +391,9 @@ uint32_t event_print_mask = (UINT32_C(1) << 
RTE_ETH_EVENT_UNKNOWN) |
                            (UINT32_C(1) << RTE_ETH_EVENT_MACSEC) |
                            (UINT32_C(1) << RTE_ETH_EVENT_INTR_RMV);
 /*
- * Decide if all memory are locked for performance.
+ * Decide which part of memory is locked for performance.
  */
-int do_mlockall = 0;
+int do_mlock = TESTPMD_MLOCK_TEXT;
 
 /*
  * NIC bypass mode configuration options.
@@ -3455,6 +3456,42 @@ signal_handler(int signum)
        }
 }
 
+static void
+lock_pages(const void *_addr, size_t _len, const char *prefix)
+{
+       const void *addr;
+       size_t pagesize;
+       size_t len;
+
+       /* While Linux does not care, FreeBSD mlock expects page aligned
+        * address (according to the man).
+        */
+       pagesize = sysconf(_SC_PAGESIZE);
+       addr = RTE_PTR_ALIGN_FLOOR(_addr, pagesize);
+       len = _len + ((uintptr_t)_addr & (pagesize - 1));
+       if (mlock(addr, len)) {
+               TESTPMD_LOG(NOTICE, "%s: mlock %p (0x%zx) aligned to %p (0x%zx) 
failed with error \"%s\"\n",
+                       prefix, _addr, _len, addr, len, strerror(errno));
+       }
+}
+
+static int
+lock_text_cb(struct dl_phdr_info *info, __rte_unused size_t size,
+               __rte_unused void *data)
+{
+       int i;
+
+       for (i = 0; i < info->dlpi_phnum; i++) {
+               void *addr;
+
+               if (info->dlpi_phdr[i].p_memsz == 0)
+                       continue;
+               addr = (void *)(info->dlpi_addr + info->dlpi_phdr[i].p_vaddr);
+               lock_pages(addr, info->dlpi_phdr[i].p_memsz, info->dlpi_name);
+       }
+       return 0;
+}
+
 int
 main(int argc, char** argv)
 {
@@ -3514,19 +3551,15 @@ main(int argc, char** argv)
        latencystats_enabled = 0;
 #endif
 
-       /* on FreeBSD, mlockall() is disabled by default */
-#ifdef RTE_EXEC_ENV_FREEBSD
-       do_mlockall = 0;
-#else
-       do_mlockall = 1;
-#endif
-
        argc -= diag;
        argv += diag;
        if (argc > 1)
                launch_args_parse(argc, argv);
 
-       if (do_mlockall && mlockall(MCL_CURRENT | MCL_FUTURE)) {
+       if (do_mlock == TESTPMD_MLOCK_TEXT ) {
+               dl_iterate_phdr(lock_text_cb, NULL);
+       } else if (do_mlock == TESTPMD_MLOCK_ALL &&
+                       mlockall(MCL_CURRENT | MCL_FUTURE)) {
                TESTPMD_LOG(NOTICE, "mlockall() failed with error \"%s\"\n",
                        strerror(errno));
        }
diff --git a/app/test-pmd/testpmd.h b/app/test-pmd/testpmd.h
index 7a7c73f79..a38e5a1f5 100644
--- a/app/test-pmd/testpmd.h
+++ b/app/test-pmd/testpmd.h
@@ -343,7 +343,10 @@ extern uint32_t event_print_mask;
 /**< set by "--print-event xxxx" and "--mask-event xxxx parameters */
 extern bool setup_on_probe_event; /**< disabled by port setup-on iterator */
 extern uint8_t hot_plug; /**< enable by "--hot-plug" parameter */
-extern int do_mlockall; /**< set by "--mlockall" or "--no-mlockall" parameter 
*/
+#define TESTPMD_MLOCK_NONE 0
+#define TESTPMD_MLOCK_TEXT 1
+#define TESTPMD_MLOCK_ALL  2
+extern int do_mlock; /**< set by "--mlockall" or "--no-mlockall" parameter */
 extern uint8_t clear_ptypes; /**< disabled by set ptype cmd */
 
 #ifdef RTE_LIBRTE_IXGBE_BYPASS
-- 
2.23.0

Reply via email to