Implement functions for getting/setting thread affinity.
Threads can be pinned to specific cores by setting their
affinity attribute.

Windows error codes are translated to errno-style error codes.
The possible return values are chosen so that we have as
much semantic compatibility between platforms as possible.

note: convert_cpuset_to_affinity has a limitation that all cpus of
the set belong to the same processor group.

Signed-off-by: Narcisa Vasile <navas...@microsoft.com>
Signed-off-by: Tyler Retzlaff <roret...@linux.microsoft.com>
Acked-by: Dmitry Kozlyuk <dmitry.kozl...@gmail.com>
---
 lib/eal/include/rte_thread.h     |  42 +++++++++
 lib/eal/unix/rte_thread.c        |  16 ++++
 lib/eal/version.map              |   2 +
 lib/eal/windows/eal_lcore.c      | 181 +++++++++++++++++++++++++++++----------
 lib/eal/windows/eal_windows.h    |  10 +++
 lib/eal/windows/include/rte_os.h |   2 +
 lib/eal/windows/rte_thread.c     | 181 ++++++++++++++++++++++++++++++++++++++-
 7 files changed, 386 insertions(+), 48 deletions(-)

diff --git a/lib/eal/include/rte_thread.h b/lib/eal/include/rte_thread.h
index 14478ba..7888f7a 100644
--- a/lib/eal/include/rte_thread.h
+++ b/lib/eal/include/rte_thread.h
@@ -50,6 +50,48 @@
 #ifdef RTE_HAS_CPUSET
 
 /**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice.
+ *
+ * Set the affinity of thread 'thread_id' to the cpu set
+ * specified by 'cpuset'.
+ *
+ * @param thread_id
+ *    Id of the thread for which to set the affinity.
+ *
+ * @param cpuset
+ *   Pointer to CPU affinity to set.
+ *
+ * @return
+ *   On success, return 0.
+ *   On failure, return a positive errno-style error number.
+ */
+__rte_experimental
+int rte_thread_set_affinity_by_id(rte_thread_t thread_id,
+               const rte_cpuset_t *cpuset);
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice.
+ *
+ * Get the affinity of thread 'thread_id' and store it
+ * in 'cpuset'.
+ *
+ * @param thread_id
+ *    Id of the thread for which to get the affinity.
+ *
+ * @param cpuset
+ *   Pointer for storing the affinity value.
+ *
+ * @return
+ *   On success, return 0.
+ *   On failure, return a positive errno-style error number.
+ */
+__rte_experimental
+int rte_thread_get_affinity_by_id(rte_thread_t thread_id,
+               rte_cpuset_t *cpuset);
+
+/**
  * Set core affinity of the current thread.
  * Support both EAL and non-EAL thread and update TLS.
  *
diff --git a/lib/eal/unix/rte_thread.c b/lib/eal/unix/rte_thread.c
index 5e5beb1..9e5fa47 100644
--- a/lib/eal/unix/rte_thread.c
+++ b/lib/eal/unix/rte_thread.c
@@ -102,3 +102,19 @@ struct eal_tls_key {
        }
        return pthread_getspecific(key->thread_index);
 }
+
+int
+rte_thread_set_affinity_by_id(rte_thread_t thread_id,
+               const rte_cpuset_t *cpuset)
+{
+       return pthread_setaffinity_np((pthread_t)thread_id.opaque_id,
+               sizeof(*cpuset), cpuset);
+}
+
+int
+rte_thread_get_affinity_by_id(rte_thread_t thread_id,
+               rte_cpuset_t *cpuset)
+{
+       return pthread_getaffinity_np((pthread_t)thread_id.opaque_id,
+               sizeof(*cpuset), cpuset);
+}
diff --git a/lib/eal/version.map b/lib/eal/version.map
index 05ce8f9..d49e30b 100644
--- a/lib/eal/version.map
+++ b/lib/eal/version.map
@@ -422,7 +422,9 @@ EXPERIMENTAL {
        rte_intr_type_set;
 
        # added in 22.07
+       rte_thread_get_affinity_by_id;
        rte_thread_self;
+       rte_thread_set_affinity_by_id;
 };
 
 INTERNAL {
diff --git a/lib/eal/windows/eal_lcore.c b/lib/eal/windows/eal_lcore.c
index 476c2d2..286fe24 100644
--- a/lib/eal/windows/eal_lcore.c
+++ b/lib/eal/windows/eal_lcore.c
@@ -1,8 +1,8 @@
 /* SPDX-License-Identifier: BSD-3-Clause
  * Copyright(c) 2019 Intel Corporation
+ * Copyright (C) 2022 Microsoft Corporation
  */
 
-#include <pthread.h>
 #include <stdbool.h>
 #include <stdint.h>
 
@@ -27,13 +27,15 @@ struct socket_map {
 };
 
 struct cpu_map {
-       unsigned int socket_count;
        unsigned int lcore_count;
+       unsigned int socket_count;
+       unsigned int cpu_count;
        struct lcore_map lcores[RTE_MAX_LCORE];
        struct socket_map sockets[RTE_MAX_NUMA_NODES];
+       GROUP_AFFINITY cpus[CPU_SETSIZE];
 };
 
-static struct cpu_map cpu_map = { 0 };
+static struct cpu_map cpu_map;
 
 /* eal_create_cpu_map() is called before logging is initialized */
 static void
@@ -47,13 +49,115 @@ struct cpu_map {
        va_end(va);
 }
 
+static int
+eal_query_group_affinity(void)
+{
+       SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX *infos = NULL;
+       unsigned int *cpu_count = &cpu_map.cpu_count;
+       DWORD infos_size = 0;
+       int ret = 0;
+       USHORT group_count;
+       KAFFINITY affinity;
+       USHORT group_no;
+       unsigned int i;
+
+       if (!GetLogicalProcessorInformationEx(RelationGroup, NULL,
+                       &infos_size)) {
+               DWORD error = GetLastError();
+               if (error != ERROR_INSUFFICIENT_BUFFER) {
+                       RTE_LOG(ERR, EAL, "Cannot get group information size, 
error %lu\n", error);
+                       rte_errno = EINVAL;
+                       ret = -1;
+                       goto cleanup;
+               }
+       }
+
+       infos = malloc(infos_size);
+       if (infos == NULL) {
+               RTE_LOG(ERR, EAL, "Cannot allocate memory for NUMA node 
information\n");
+               rte_errno = ENOMEM;
+               ret = -1;
+               goto cleanup;
+       }
+
+       if (!GetLogicalProcessorInformationEx(RelationGroup, infos,
+                       &infos_size)) {
+               RTE_LOG(ERR, EAL, "Cannot get group information, error %lu\n",
+                       GetLastError());
+               rte_errno = EINVAL;
+               ret = -1;
+               goto cleanup;
+       }
+
+       *cpu_count = 0;
+       group_count = infos->Group.ActiveGroupCount;
+       for (group_no = 0; group_no < group_count; group_no++) {
+               affinity = infos->Group.GroupInfo[group_no].ActiveProcessorMask;
+               for (i = 0; i < EAL_PROCESSOR_GROUP_SIZE; i++) {
+                       if ((affinity & ((KAFFINITY)1 << i)) == 0)
+                               continue;
+                       cpu_map.cpus[*cpu_count].Group = group_no;
+                       cpu_map.cpus[*cpu_count].Mask = (KAFFINITY)1 << i;
+                       (*cpu_count)++;
+               }
+       }
+
+cleanup:
+       free(infos);
+       return ret;
+}
+
+static bool
+eal_create_lcore_map(const SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX *info)
+{
+       const unsigned int node_id = info->NumaNode.NodeNumber;
+       const GROUP_AFFINITY *cores = &info->NumaNode.GroupMask;
+       struct lcore_map *lcore;
+       unsigned int socket_id;
+       unsigned int i;
+
+       /*
+        * NUMA node may be reported multiple times if it includes
+        * cores from different processor groups, e. g. 80 cores
+        * of a physical processor comprise one NUMA node, but two
+        * processor groups, because group size is limited by 32/64.
+        */
+       for (socket_id = 0; socket_id < cpu_map.socket_count; socket_id++)
+               if (cpu_map.sockets[socket_id].node_id == node_id)
+                       break;
+
+       if (socket_id == cpu_map.socket_count) {
+               if (socket_id == RTE_DIM(cpu_map.sockets))
+                       return true;
+
+               cpu_map.sockets[socket_id].node_id = node_id;
+               cpu_map.socket_count++;
+       }
+
+       for (i = 0; i < EAL_PROCESSOR_GROUP_SIZE; i++) {
+               if ((cores->Mask & ((KAFFINITY)1 << i)) == 0)
+                       continue;
+
+               if (cpu_map.lcore_count == RTE_DIM(cpu_map.lcores))
+                       return true;
+
+               lcore = &cpu_map.lcores[cpu_map.lcore_count];
+               lcore->socket_id = socket_id;
+               lcore->core_id = cores->Group * EAL_PROCESSOR_GROUP_SIZE + i;
+               cpu_map.lcore_count++;
+       }
+       return false;
+}
+
 int
 eal_create_cpu_map(void)
 {
        SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX *infos, *info;
        DWORD infos_size;
        bool full = false;
+       int ret = 0;
 
+       infos = NULL;
        infos_size = 0;
        if (!GetLogicalProcessorInformationEx(
                        RelationNumaNode, NULL, &infos_size)) {
@@ -62,7 +166,8 @@ struct cpu_map {
                        log_early("Cannot get NUMA node info size, error %lu\n",
                                GetLastError());
                        rte_errno = ENOMEM;
-                       return -1;
+                       ret = -1;
+                       goto exit;
                }
        }
 
@@ -70,7 +175,8 @@ struct cpu_map {
        if (infos == NULL) {
                log_early("Cannot allocate memory for NUMA node information\n");
                rte_errno = ENOMEM;
-               return -1;
+               ret = -1;
+               goto exit;
        }
 
        if (!GetLogicalProcessorInformationEx(
@@ -78,57 +184,30 @@ struct cpu_map {
                log_early("Cannot get NUMA node information, error %lu\n",
                        GetLastError());
                rte_errno = EINVAL;
-               return -1;
+               ret = -1;
+               goto exit;
        }
 
        info = infos;
        while ((uint8_t *)info - (uint8_t *)infos < infos_size) {
-               unsigned int node_id = info->NumaNode.NodeNumber;
-               GROUP_AFFINITY *cores = &info->NumaNode.GroupMask;
-               struct lcore_map *lcore;
-               unsigned int i, socket_id;
-
-               /* NUMA node may be reported multiple times if it includes
-                * cores from different processor groups, e. g. 80 cores
-                * of a physical processor comprise one NUMA node, but two
-                * processor groups, because group size is limited by 32/64.
-                */
-               for (socket_id = 0; socket_id < cpu_map.socket_count;
-                   socket_id++) {
-                       if (cpu_map.sockets[socket_id].node_id == node_id)
-                               break;
-               }
-
-               if (socket_id == cpu_map.socket_count) {
-                       if (socket_id == RTE_DIM(cpu_map.sockets)) {
-                               full = true;
-                               goto exit;
-                       }
-
-                       cpu_map.sockets[socket_id].node_id = node_id;
-                       cpu_map.socket_count++;
-               }
-
-               for (i = 0; i < EAL_PROCESSOR_GROUP_SIZE; i++) {
-                       if ((cores->Mask & ((KAFFINITY)1 << i)) == 0)
-                               continue;
-
-                       if (cpu_map.lcore_count == RTE_DIM(cpu_map.lcores)) {
-                               full = true;
-                               goto exit;
-                       }
-
-                       lcore = &cpu_map.lcores[cpu_map.lcore_count];
-                       lcore->socket_id = socket_id;
-                       lcore->core_id =
-                               cores->Group * EAL_PROCESSOR_GROUP_SIZE + i;
-                       cpu_map.lcore_count++;
+               if (eal_create_lcore_map(info)) {
+                       full = true;
+                       break;
                }
 
                info = (SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX *)(
                        (uint8_t *)info + info->Size);
        }
 
+       if (eal_query_group_affinity()) {
+               /*
+                * No need to set rte_errno here.
+                * It is set by eal_query_group_affinity().
+                */
+               ret = -1;
+               goto exit;
+       }
+
 exit:
        if (full) {
                /* Not a fatal error, but important for troubleshooting. */
@@ -138,7 +217,7 @@ struct cpu_map {
 
        free(infos);
 
-       return 0;
+       return ret;
 }
 
 int
@@ -164,3 +243,11 @@ struct cpu_map {
 {
        return cpu_map.sockets[socket_id].node_id;
 }
+
+PGROUP_AFFINITY
+eal_get_cpu_affinity(size_t cpu_index)
+{
+       RTE_VERIFY(cpu_index < CPU_SETSIZE);
+
+       return &cpu_map.cpus[cpu_index];
+}
diff --git a/lib/eal/windows/eal_windows.h b/lib/eal/windows/eal_windows.h
index e4c4670..ab25814 100644
--- a/lib/eal/windows/eal_windows.h
+++ b/lib/eal/windows/eal_windows.h
@@ -58,6 +58,16 @@
 unsigned int eal_socket_numa_node(unsigned int socket_id);
 
 /**
+ * Get pointer to the group affinity for the cpu.
+ *
+ * @param cpu_index
+ *  Index of the cpu, as it comes from rte_cpuset_t.
+ * @return
+ *  Pointer to the group affinity for the cpu.
+ */
+PGROUP_AFFINITY eal_get_cpu_affinity(size_t cpu_index);
+
+/**
  * Schedule code for execution in the interrupt thread.
  *
  * @param func
diff --git a/lib/eal/windows/include/rte_os.h b/lib/eal/windows/include/rte_os.h
index a0a3114..1c33058 100644
--- a/lib/eal/windows/include/rte_os.h
+++ b/lib/eal/windows/include/rte_os.h
@@ -14,6 +14,8 @@
 #include <stdlib.h>
 #include <string.h>
 
+#include <sched.h>
+
 #ifdef __cplusplus
 extern "C" {
 #endif
diff --git a/lib/eal/windows/rte_thread.c b/lib/eal/windows/rte_thread.c
index 59fed3c..a616703 100644
--- a/lib/eal/windows/rte_thread.c
+++ b/lib/eal/windows/rte_thread.c
@@ -1,16 +1,66 @@
 /* SPDX-License-Identifier: BSD-3-Clause
  * Copyright 2021 Mellanox Technologies, Ltd
+ * Copyright (C) 2022 Microsoft Corporation
  */
 
 #include <rte_common.h>
 #include <rte_errno.h>
 #include <rte_thread.h>
-#include <rte_windows.h>
+
+#include "eal_windows.h"
 
 struct eal_tls_key {
        DWORD thread_index;
 };
 
+/* Translates the most common error codes related to threads */
+static int
+thread_translate_win32_error(DWORD error)
+{
+       switch (error) {
+       case ERROR_SUCCESS:
+               return 0;
+
+       case ERROR_INVALID_PARAMETER:
+               return EINVAL;
+
+       case ERROR_INVALID_HANDLE:
+               return EFAULT;
+
+       case ERROR_NOT_ENOUGH_MEMORY:
+               /* FALLTHROUGH */
+       case ERROR_NO_SYSTEM_RESOURCES:
+               return ENOMEM;
+
+       case ERROR_PRIVILEGE_NOT_HELD:
+               /* FALLTHROUGH */
+       case ERROR_ACCESS_DENIED:
+               return EACCES;
+
+       case ERROR_ALREADY_EXISTS:
+               return EEXIST;
+
+       case ERROR_POSSIBLE_DEADLOCK:
+               return EDEADLK;
+
+       case ERROR_INVALID_FUNCTION:
+               /* FALLTHROUGH */
+       case ERROR_CALL_NOT_IMPLEMENTED:
+               return ENOSYS;
+       }
+
+       return EINVAL;
+}
+
+static int
+thread_log_last_error(const char *message)
+{
+       DWORD error = GetLastError();
+       RTE_LOG(DEBUG, EAL, "GetLastError()=%lu: %s\n", error, message);
+
+       return thread_translate_win32_error(error);
+}
+
 rte_thread_t
 rte_thread_self(void)
 {
@@ -97,3 +147,132 @@ struct eal_tls_key {
        }
        return output;
 }
+
+static int
+convert_cpuset_to_affinity(const rte_cpuset_t *cpuset,
+               PGROUP_AFFINITY affinity)
+{
+       int ret = 0;
+       PGROUP_AFFINITY cpu_affinity = NULL;
+       unsigned int cpu_idx;
+
+       memset(affinity, 0, sizeof(GROUP_AFFINITY));
+       affinity->Group = (USHORT)-1;
+
+       /* Check that all cpus of the set belong to the same processor group and
+        * accumulate thread affinity to be applied.
+        */
+       for (cpu_idx = 0; cpu_idx < CPU_SETSIZE; cpu_idx++) {
+               if (!CPU_ISSET(cpu_idx, cpuset))
+                       continue;
+
+               cpu_affinity = eal_get_cpu_affinity(cpu_idx);
+
+               if (affinity->Group == (USHORT)-1) {
+                       affinity->Group = cpu_affinity->Group;
+               } else if (affinity->Group != cpu_affinity->Group) {
+                       RTE_LOG(DEBUG, EAL, "All processors must belong to the 
same processor group\n");
+                       ret = ENOTSUP;
+                       goto cleanup;
+               }
+
+               affinity->Mask |= cpu_affinity->Mask;
+       }
+
+       if (affinity->Mask == 0) {
+               ret = EINVAL;
+               goto cleanup;
+       }
+
+cleanup:
+       return ret;
+}
+
+int
+rte_thread_set_affinity_by_id(rte_thread_t thread_id,
+               const rte_cpuset_t *cpuset)
+{
+       int ret = 0;
+       GROUP_AFFINITY thread_affinity;
+       HANDLE thread_handle = NULL;
+
+       if (cpuset == NULL) {
+               ret = EINVAL;
+               goto cleanup;
+       }
+
+       ret = convert_cpuset_to_affinity(cpuset, &thread_affinity);
+       if (ret != 0) {
+               RTE_LOG(DEBUG, EAL, "Unable to convert cpuset to thread 
affinity\n");
+               goto cleanup;
+       }
+
+       thread_handle = OpenThread(THREAD_ALL_ACCESS, FALSE,
+               thread_id.opaque_id);
+       if (thread_handle == NULL) {
+               ret = thread_log_last_error("OpenThread()");
+               goto cleanup;
+       }
+
+       if (!SetThreadGroupAffinity(thread_handle, &thread_affinity, NULL)) {
+               ret = thread_log_last_error("SetThreadGroupAffinity()");
+               goto cleanup;
+       }
+
+cleanup:
+       if (thread_handle != NULL) {
+               CloseHandle(thread_handle);
+               thread_handle = NULL;
+       }
+
+       return ret;
+}
+
+int
+rte_thread_get_affinity_by_id(rte_thread_t thread_id,
+               rte_cpuset_t *cpuset)
+{
+       HANDLE thread_handle = NULL;
+       PGROUP_AFFINITY cpu_affinity;
+       GROUP_AFFINITY thread_affinity;
+       unsigned int cpu_idx;
+       int ret = 0;
+
+       if (cpuset == NULL) {
+               ret = EINVAL;
+               goto cleanup;
+       }
+
+       thread_handle = OpenThread(THREAD_ALL_ACCESS, FALSE,
+               thread_id.opaque_id);
+       if (thread_handle == NULL) {
+               ret = thread_log_last_error("OpenThread()");
+               goto cleanup;
+       }
+
+       /* obtain previous thread affinity */
+       if (!GetThreadGroupAffinity(thread_handle, &thread_affinity)) {
+               ret = thread_log_last_error("GetThreadGroupAffinity()");
+               goto cleanup;
+       }
+
+       CPU_ZERO(cpuset);
+
+       /* Convert affinity to DPDK cpu set */
+       for (cpu_idx = 0; cpu_idx < CPU_SETSIZE; cpu_idx++) {
+
+               cpu_affinity = eal_get_cpu_affinity(cpu_idx);
+
+               if ((cpu_affinity->Group == thread_affinity.Group) &&
+                  ((cpu_affinity->Mask & thread_affinity.Mask) != 0)) {
+                       CPU_SET(cpu_idx, cpuset);
+               }
+       }
+
+cleanup:
+       if (thread_handle != NULL) {
+               CloseHandle(thread_handle);
+               thread_handle = NULL;
+       }
+       return ret;
+}
-- 
1.8.3.1

Reply via email to