https://gcc.gnu.org/g:85c55edc0959816f7f4f27ef76c87d60a21e0880
commit 85c55edc0959816f7f4f27ef76c87d60a21e0880 Author: Sandra Loosemore <sloosem...@baylibre.com> Date: Tue May 14 18:28:43 2024 +0200 libgomp: runtime support for target_device selector This patch implements the libgomp runtime support for the dynamic target_device selector via the GOMP_evaluate_target_device function. include/ChangeLog * cuda/cuda.h (CUdevice_attribute): Add definitions for CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR and CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MINOR. libgomp/ChangeLog * Makefile.am (libgomp_la_SOURCES): Add selector.c. * Makefile.in: Regenerate. * config/gcn/selector.c: New. * config/linux/selector.c: New. * config/linux/x86/selector.c: New. * config/nvptx/selector.c: New. * libgomp-plugin.h (GOMP_OFFLOAD_evaluate_device): New. * libgomp.h (struct gomp_device_descr): Add evaluate_device_func field. * libgomp.map (GOMP_5.1.3): New, add GOMP_evaluate_target_device. * libgomp.texi (OpenMP Context Selectors): Document dynamic selector matching of kind/arch/isa. * libgomp_g.h (GOMP_evaluate_current_device): New. (GOMP_evaluate_target_device): New. * oacc-host.c (host_evaluate_device): New. (host_openacc_exec): Initialize evaluate_device_func field to host_evaluate_device. * plugin/plugin-gcn.c (gomp_match_selectors): New. (gomp_match_isa): New. (GOMP_OFFLOAD_evaluate_device): New. * plugin/plugin-nvptx.c (struct ptx_device): Add compute_major and compute_minor fields. (nvptx_open_device): Read compute capability information from device. (gomp_match_selectors): New. (gomp_match_selector): New. (CHECK_ISA): New macro. (GOMP_OFFLOAD_evaluate_device): New. * selector.c: New. * target.c (GOMP_evaluate_target_device): New. (gomp_load_plugin_for_device): Load evaluate_device plugin function. Co-Authored-By: Kwok Cheung Yeung <k...@codesourcery.com> Co-Authored-By: Sandra Loosemore <san...@codesourcery.com> Diff: --- include/ChangeLog.omp | 6 + libgomp/ChangeLog.omp | 32 +++ libgomp/Makefile.am | 2 +- libgomp/Makefile.in | 5 +- libgomp/config/gcn/selector.c | 102 +++++++++ libgomp/config/linux/selector.c | 65 ++++++ libgomp/config/linux/x86/selector.c | 406 ++++++++++++++++++++++++++++++++++++ libgomp/config/nvptx/selector.c | 77 +++++++ libgomp/libgomp-plugin.h | 2 + libgomp/libgomp.h | 1 + libgomp/libgomp.map | 5 + libgomp/libgomp.texi | 18 +- libgomp/libgomp_g.h | 8 + libgomp/oacc-host.c | 11 + libgomp/plugin/plugin-gcn.c | 52 +++++ libgomp/plugin/plugin-nvptx.c | 82 ++++++++ libgomp/selector.c | 64 ++++++ libgomp/target.c | 40 ++++ 18 files changed, 972 insertions(+), 6 deletions(-) diff --git a/include/ChangeLog.omp b/include/ChangeLog.omp index efbc3832ccc..88098aeb89b 100644 --- a/include/ChangeLog.omp +++ b/include/ChangeLog.omp @@ -1,3 +1,9 @@ +2024-05-04 Sandra Loosemore <sloosem...@baylibre.com> + + * cuda/cuda.h (CUdevice_attribute): Add definitions for + CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR and + CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MINOR. + 2023-07-03 Julian Brown <jul...@codesourcery.com> * gomp-constants.h (gomp_map_kind): Add GOMP_MAP_TO_GRID, diff --git a/libgomp/ChangeLog.omp b/libgomp/ChangeLog.omp index fad929553cf..11d863b7013 100644 --- a/libgomp/ChangeLog.omp +++ b/libgomp/ChangeLog.omp @@ -1,3 +1,35 @@ +2024-05-04 Sandra Loosemore <sloosem...@baylibre.com> + + * Makefile.am (libgomp_la_SOURCES): Add selector.c. + * Makefile.in: Regenerate. + * config/gcn/selector.c: New. + * config/linux/selector.c: New. + * config/linux/x86/selector.c: New. + * config/nvptx/selector.c: New. + * libgomp-plugin.h (GOMP_OFFLOAD_evaluate_device): New. + * libgomp.h (struct gomp_device_descr): Add evaluate_device_func field. + * libgomp.map (GOMP_5.1.3): New, add GOMP_evaluate_target_device. + * libgomp.texi (OpenMP Context Selectors): Document dynamic selector + matching of kind/arch/isa. + * libgomp_g.h (GOMP_evaluate_current_device): New. + (GOMP_evaluate_target_device): New. + * oacc-host.c (host_evaluate_device): New. + (host_openacc_exec): Initialize evaluate_device_func field to + host_evaluate_device. + * plugin/plugin-gcn.c (gomp_match_selectors): New. + (gomp_match_isa): New. + (GOMP_OFFLOAD_evaluate_device): New. + * plugin/plugin-nvptx.c (struct ptx_device): Add compute_major and + compute_minor fields. + (nvptx_open_device): Read compute capability information from device. + (gomp_match_selectors): New. + (gomp_match_selector): New. + (CHECK_ISA): New macro. + (GOMP_OFFLOAD_evaluate_device): New. + * selector.c: New. + * target.c (GOMP_evaluate_target_device): New. + (gomp_load_plugin_for_device): Load evaluate_device plugin function. + 2023-10-30 Tobias Burnus <tob...@codesourcery.com> * testsuite/libgomp.fortran/allocate-8a.f90: New test. diff --git a/libgomp/Makefile.am b/libgomp/Makefile.am index 5d355f20cc4..92179cd9031 100644 --- a/libgomp/Makefile.am +++ b/libgomp/Makefile.am @@ -73,7 +73,7 @@ libgomp_la_SOURCES = alloc.c atomic.c barrier.c critical.c env.c error.c \ oacc-init.c oacc-mem.c oacc-async.c oacc-plugin.c oacc-cuda.c \ priority_queue.c affinity-fmt.c teams.c allocator.c oacc-profiling.c \ oacc-target.c target-indirect.c oacc-profiling-acc_register_library.c \ - usmpin-allocator.c + usmpin-allocator.c selector.c include $(top_srcdir)/plugin/Makefrag.am diff --git a/libgomp/Makefile.in b/libgomp/Makefile.in index a63bd5b9dc6..57548cc342c 100644 --- a/libgomp/Makefile.in +++ b/libgomp/Makefile.in @@ -221,7 +221,7 @@ am_libgomp_la_OBJECTS = alloc.lo atomic.lo barrier.lo critical.lo \ affinity-fmt.lo teams.lo allocator.lo oacc-profiling.lo \ oacc-target.lo target-indirect.lo \ oacc-profiling-acc_register_library.lo \ - usmpin-allocator.lo $(am__objects_1) + usmpin-allocator.lo selector.lo $(am__objects_1) libgomp_la_OBJECTS = $(am_libgomp_la_OBJECTS) AM_V_P = $(am__v_P_@AM_V@) am__v_P_ = $(am__v_P_@AM_DEFAULT_V@) @@ -556,7 +556,7 @@ libgomp_la_SOURCES = alloc.c atomic.c barrier.c critical.c env.c \ affinity-fmt.c teams.c allocator.c oacc-profiling.c \ oacc-target.c target-indirect.c \ oacc-profiling-acc_register_library.c \ - usmpin-allocator.c $(am__append_3) + usmpin-allocator.c selector.lo $(am__append_3) # Nvidia PTX OpenACC plugin. @PLUGIN_NVPTX_TRUE@libgomp_plugin_nvptx_version_info = -version-info $(libtool_VERSION) @@ -782,6 +782,7 @@ distclean-compile: @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/ptrlock.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/scope.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/sections.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/selector.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/sem.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/single.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/splay-tree.Plo@am__quote@ diff --git a/libgomp/config/gcn/selector.c b/libgomp/config/gcn/selector.c new file mode 100644 index 00000000000..7e099a00b97 --- /dev/null +++ b/libgomp/config/gcn/selector.c @@ -0,0 +1,102 @@ +/* Copyright (C) 2022 Free Software Foundation, Inc. + Contributed by Mentor, a Siemens Business. + + This file is part of the GNU Offloading and Multi Processing Library + (libgomp). + + Libgomp is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3, or (at your option) + any later version. + + Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY + WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + FOR A PARTICULAR PURPOSE. See the GNU General Public License for + more details. + + Under Section 7 of GPL version 3, you are granted additional + permissions described in the GCC Runtime Library Exception, version + 3.1, as published by the Free Software Foundation. + + You should have received a copy of the GNU General Public License and + a copy of the GCC Runtime Library Exception along with this program; + see the files COPYING3 and COPYING.RUNTIME respectively. If not, see + <http://www.gnu.org/licenses/>. */ + +/* This file contains an implementation of GOMP_evaluate_current_device for + an AMD GCN GPU. */ + +#include "libgomp.h" +#include <string.h> + +/* The selectors are passed as strings, but are actually sets of multiple + trait property names, separated by '\0' and with an extra '\0' at + the end. Match such a string SELECTORS against an array of strings + CHOICES, that is terminated by a null pointer. + matches. */ +static bool +gomp_match_selectors (const char *selectors, const char **choices) +{ + while (*selectors != '\0') + { + bool match = false; + for (int i = 0; !match && choices[i]; i++) + match = !strcmp (selectors, choices[i]); + if (!match) + return false; + selectors += strlen (selectors) + 1; + } + return true; +} + +bool +GOMP_evaluate_current_device (const char *kind, const char *arch, + const char *isa) +{ + static const char *kind_choices[] = { "gpu", "nohost", NULL }; + static const char *arch_choices[] = { "gcn", "amdgcn", NULL }; + static const char *isa_choices[] + = { +#ifdef __fiji__ + "fiji", "gfx803", +#endif +#ifdef __gfx900__ + "gfx900", +#endif +#ifdef __gfx906__ + "gfx906", +#endif +#ifdef __gfx908__ + "gfx908", +#endif +#ifdef __gfx90a__ + "gfx90a", +#endif +#ifdef __gfx90c__ + "gfx90c", +#endif +#ifdef __gfx1030__ + "gfx1030", +#endif +#ifdef __gfx1036__ + "gfx1036", +#endif +#ifdef __gfx1100__ + "gfx1100", +#endif +#ifdef __gfx1103__ + "gfx1103", +#endif + NULL }; + + if (kind && !gomp_match_selectors (kind, kind_choices)) + return false; + + if (arch && !gomp_match_selectors (arch, arch_choices)) + return false; + + if (isa && !gomp_match_selectors (isa, isa_choices)) + return false; + + return true; +} diff --git a/libgomp/config/linux/selector.c b/libgomp/config/linux/selector.c new file mode 100644 index 00000000000..064cb937ecc --- /dev/null +++ b/libgomp/config/linux/selector.c @@ -0,0 +1,65 @@ +/* Copyright (C) 2022 Free Software Foundation, Inc. + Contributed by Mentor, a Siemens Business. + + This file is part of the GNU Offloading and Multi Processing Library + (libgomp). + + Libgomp is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3, or (at your option) + any later version. + + Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY + WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + FOR A PARTICULAR PURPOSE. See the GNU General Public License for + more details. + + Under Section 7 of GPL version 3, you are granted additional + permissions described in the GCC Runtime Library Exception, version + 3.1, as published by the Free Software Foundation. + + You should have received a copy of the GNU General Public License and + a copy of the GCC Runtime Library Exception along with this program; + see the files COPYING3 and COPYING.RUNTIME respectively. If not, see + <http://www.gnu.org/licenses/>. */ + +/* This file contains a generic implementation of + GOMP_evaluate_current_device when run on a Linux host. */ + +#include <string.h> +#include "libgomp.h" + +/* The selectors are passed as strings, but are actually sets of multiple + trait property names, separated by '\0' and with an extra '\0' at + the end. Match such a string SELECTORS against an array of strings + CHOICES, that is terminated by a null pointer. + matches. */ +static bool +gomp_match_selectors (const char *selectors, const char **choices) +{ + while (*selectors != '\0') + { + bool match = false; + for (int i = 0; !match && choices[i]; i++) + match = !strcmp (selectors, choices[i]); + if (!match) + return false; + selectors += strlen (selectors) + 1; + } + return true; +} + +bool +GOMP_evaluate_current_device (const char *kind, const char *arch, + const char *isa) +{ + static const char *kind_choices[] = { "cpu", "host", NULL }; + + if (kind && !gomp_match_selectors (kind, kind_choices)) + return false; + + if (arch || isa) + return false; + + return true; +} diff --git a/libgomp/config/linux/x86/selector.c b/libgomp/config/linux/x86/selector.c new file mode 100644 index 00000000000..13cd2e14389 --- /dev/null +++ b/libgomp/config/linux/x86/selector.c @@ -0,0 +1,406 @@ +/* Copyright (C) 2022 Free Software Foundation, Inc. + Contributed by Mentor, a Siemens Business. + + This file is part of the GNU Offloading and Multi Processing Library + (libgomp). + + Libgomp is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3, or (at your option) + any later version. + + Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY + WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + FOR A PARTICULAR PURPOSE. See the GNU General Public License for + more details. + + Under Section 7 of GPL version 3, you are granted additional + permissions described in the GCC Runtime Library Exception, version + 3.1, as published by the Free Software Foundation. + + You should have received a copy of the GNU General Public License and + a copy of the GCC Runtime Library Exception along with this program; + see the files COPYING3 and COPYING.RUNTIME respectively. If not, see + <http://www.gnu.org/licenses/>. */ + +/* This file contains an implementation of GOMP_evaluate_current_device for + an x86/x64-based Linux host. */ + +#include <string.h> +#include "libgomp.h" + +/* The selectors are passed as strings, but are actually sets of multiple + trait property names, separated by '\0' and with an extra '\0' at + the end. Match such a string SELECTORS against an array of strings + CHOICES, that is terminated by a null pointer. + matches. */ +static bool +gomp_match_selectors (const char *selectors, const char **choices) +{ + while (*selectors != '\0') + { + bool match = false; + for (int i = 0; !match && choices[i]; i++) + match = !strcmp (selectors, choices[i]); + if (!match) + return false; + selectors += strlen (selectors) + 1; + } + return true; +} + +bool +GOMP_evaluate_current_device (const char *kind, const char *arch, + const char *isa) +{ + static const char *kind_choices[] = { "cpu", "host", NULL }; + + static const char *arch_choices[] + = { "x86", + "ia32", +#ifdef __x86_64__ + "x86_64", +#endif +#ifdef __ILP32__ + "x32", +#endif + "i386", +#ifdef __i486__ + "i486", +#endif +#ifdef __i586__ + "i586", +#endif +#ifdef __i686__ + "i686", +#endif + NULL }; + + static const char *isa_choices[] + = { +#ifdef __WBNOINVD__ + "wbnoinvd", +#endif +#ifdef __AVX512VP2INTERSECT__ + "avx512vp2intersect", +#endif +#ifdef __MMX__ + "mmx", +#endif +#ifdef __3dNOW__ + "3dnow", +#endif +#ifdef __3dNOW_A__ + "3dnowa", +#endif +#ifdef __SSE__ + "sse", +#endif +#ifdef __SSE2__ + "sse2", +#endif +#ifdef __SSE3__ + "sse3", +#endif +#ifdef __SSSE3__ + "ssse3", +#endif +#ifdef __SSE4_1__ + "sse4.1", +#endif +#ifdef __SSE4_2__ + "sse4", + "sse4.2", +#endif +#ifdef __AES__ + "aes", +#endif +#ifdef __SHA__ + "sha", +#endif +#ifdef __PCLMUL__ + "pclmul", +#endif +#ifdef __AVX__ + "avx", +#endif +#ifdef __AVX2__ + "avx2", +#endif +#ifdef __AVX512F__ + "avx512f", +#endif +#ifdef __AVX512ER__ + "avx512er", +#endif +#ifdef __AVX512CD__ + "avx512cd", +#endif +#ifdef __AVX512PF__ + "avx512pf", +#endif +#ifdef __AVX512DQ__ + "avx512dq", +#endif +#ifdef __AVX512BW__ + "avx512bw", +#endif +#ifdef __AVX512VL__ + "avx512vl", +#endif +#ifdef __AVX512VBMI__ + "avx512vbmi", +#endif +#ifdef __AVX512IFMA__ + "avx512ifma", +#endif +#ifdef __AVX5124VNNIW__ + "avx5124vnniw", +#endif +#ifdef __AVX512VBMI2__ + "avx512vbmi2", +#endif +#ifdef __AVX512VNNI__ + "avx512vnni", +#endif +#ifdef __PCONFIG__ + "pconfig", +#endif +#ifdef __SGX__ + "sgx", +#endif +#ifdef __AVX5124FMAPS__ + "avx5124fmaps", +#endif +#ifdef __AVX512BITALG__ + "avx512bitalg", +#endif +#ifdef __AVX512VPOPCNTDQ__ + "avx512vpopcntdq", +#endif +#ifdef __FMA__ + "fma", +#endif +#ifdef __RTM__ + "rtm", +#endif +#ifdef __SSE4A__ + "sse4a", +#endif +#ifdef __FMA4__ + "fma4", +#endif +#ifdef __XOP__ + "xop", +#endif +#ifdef __LWP__ + "lwp", +#endif +#ifdef __ABM__ + "abm", +#endif +#ifdef __BMI__ + "bmi", +#endif +#ifdef __BMI2__ + "bmi2", +#endif +#ifdef __LZCNT__ + "lzcnt", +#endif +#ifdef __TBM__ + "tbm", +#endif +#ifdef __CRC32__ + "crc32", +#endif +#ifdef __POPCNT__ + "popcnt", +#endif +#ifdef __FSGSBASE__ + "fsgsbase", +#endif +#ifdef __RDRND__ + "rdrnd", +#endif +#ifdef __F16C__ + "f16c", +#endif +#ifdef __RDSEED__ + "rdseed", +#endif +#ifdef __PRFCHW__ + "prfchw", +#endif +#ifdef __ADX__ + "adx", +#endif +#ifdef __FXSR__ + "fxsr", +#endif +#ifdef __XSAVE__ + "xsave", +#endif +#ifdef __XSAVEOPT__ + "xsaveopt", +#endif +#ifdef __PREFETCHWT1__ + "prefetchwt1", +#endif +#ifdef __CLFLUSHOPT__ + "clflushopt", +#endif +#ifdef __CLZERO__ + "clzero", +#endif +#ifdef __XSAVEC__ + "xsavec", +#endif +#ifdef __XSAVES__ + "xsaves", +#endif +#ifdef __CLWB__ + "clwb", +#endif +#ifdef __MWAITX__ + "mwaitx", +#endif +#ifdef __PKU__ + "pku", +#endif +#ifdef __RDPID__ + "rdpid", +#endif +#ifdef __GFNI__ + "gfni", +#endif +#ifdef __SHSTK__ + "shstk", +#endif +#ifdef __VAES__ + "vaes", +#endif +#ifdef __VPCLMULQDQ__ + "vpclmulqdq", +#endif +#ifdef __MOVDIRI__ + "movdiri", +#endif +#ifdef __MOVDIR64B__ + "movdir64b", +#endif +#ifdef __WAITPKG__ + "waitpkg", +#endif +#ifdef __CLDEMOTE__ + "cldemote", +#endif +#ifdef __SERIALIZE__ + "serialize", +#endif +#ifdef __PTWRITE__ + "ptwrite", +#endif +#ifdef __AVX512BF16__ + "avx512bf16", +#endif +#ifdef __AVX512FP16__ + "avx512fp16", +#endif +#ifdef __ENQCMD__ + "enqcmd", +#endif +#ifdef __TSXLDTRK__ + "tsxldtrk", +#endif +#ifdef __AMX_TILE__ + "amx-tile", +#endif +#ifdef __AMX_INT8__ + "amx-int8", +#endif +#ifdef __AMX_BF16__ + "amx-bf16", +#endif +#ifdef __LAHF_SAHF__ + "sahf", +#endif +#ifdef __MOVBE__ + "movbe", +#endif +#ifdef __UINTR__ + "uintr", +#endif +#ifdef __HRESET__ + "hreset", +#endif +#ifdef __KL__ + "kl", +#endif +#ifdef __WIDEKL__ + "widekl", +#endif +#ifdef __AVXVNNI__ + "avxvnni", +#endif +#ifdef __AVXIFMA_ + "avxifma",_ +#endif +#ifdef __AVXVNNIINT8__ + "avxvnniint8", +#endif +#ifdef __AVXNECONVERT__ + "avxneconvert", +#endif +#ifdef __CMPCCXADD__ + "cmpccxadd", +#endif +#ifdef __AMX_FP16__ + "amx-fp16", +#endif +#ifdef __PREFETCHI__ + "prefetchi", +#endif +#ifdef __RAOINT__ + "raoint", +#endif +#ifdef __AMX_COMPLEX__ + "amx-complex", +#endif +#ifdef __AVXVNNIINT16__ + "amxvnniint16", +#endif +#ifdef __SM3__ + "sm3", +#endif +#ifdef __SHA512__ + "sha512", +#endif +#ifdef __SM4__ + "sm4", +#endif +#ifdef __EVEX512__ + "evex512", +#endif +#ifdef __USER_MSR__ + "usermsr", +#endif +#ifdef __AVX10_1_256__ + "avx10.1-256", +#endif +#ifdef __AVX10_1_512__ + "avx10.1-512", +#endif +#ifdef __APX_F__ + "apxf", +#endif + NULL }; + + if (kind && !gomp_match_selectors (kind, kind_choices)) + return false; + if (arch && !gomp_match_selectors (arch, arch_choices)) + return false; + if (isa && !gomp_match_selectors (isa, isa_choices)) + return false; + return true; +} diff --git a/libgomp/config/nvptx/selector.c b/libgomp/config/nvptx/selector.c new file mode 100644 index 00000000000..c1e81efca28 --- /dev/null +++ b/libgomp/config/nvptx/selector.c @@ -0,0 +1,77 @@ +/* Copyright (C) 2022 Free Software Foundation, Inc. + Contributed by Mentor, a Siemens Business. + + This file is part of the GNU Offloading and Multi Processing Library + (libgomp). + + Libgomp is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3, or (at your option) + any later version. + + Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY + WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + FOR A PARTICULAR PURPOSE. See the GNU General Public License for + more details. + + Under Section 7 of GPL version 3, you are granted additional + permissions described in the GCC Runtime Library Exception, version + 3.1, as published by the Free Software Foundation. + + You should have received a copy of the GNU General Public License and + a copy of the GCC Runtime Library Exception along with this program; + see the files COPYING3 and COPYING.RUNTIME respectively. If not, see + <http://www.gnu.org/licenses/>. */ + +/* This file contains an implementation of GOMP_evaluate_current_device for + a Nvidia GPU. */ + +#include "libgomp.h" +#include <string.h> + +static bool +gomp_match_selectors (const char *selectors, const char **choices) +{ + while (*selectors != '\0') + { + bool match = false; + for (int i = 0; !match && choices[i]; i++) + match = !strcmp (selectors, choices[i]); + if (!match) + return false; + selectors += strlen (selectors) + 1; + } + return true; +} + +bool +GOMP_evaluate_current_device (const char *kind, const char *arch, + const char *isa) +{ + static const char *kind_choices[] = { "gpu", "nohost", NULL }; + static const char *arch_choices[] = { "nvptx", NULL }; + static const char *isa_choices[] + = { + "sm_30", +#if __PTX_SM__ >= 350 + "sm_35", +#endif +#if __PTX_SM__ >= 530 + "sm_53", +#endif +#if __PTX_SM__ >= 750 + "sm_75", +#endif +#if __PTX_SM__ >= 800 + "sm_80", +#endif + NULL }; + + if (kind && !gomp_match_selectors (kind, kind_choices)) + return false; + if (arch && !gomp_match_selectors (arch, arch_choices)) + return false; + if (isa && !gomp_match_selectors (isa, isa_choices)) + return false; + return true; +} diff --git a/libgomp/libgomp-plugin.h b/libgomp/libgomp-plugin.h index 180740b62cd..96a08a5f4c7 100644 --- a/libgomp/libgomp-plugin.h +++ b/libgomp/libgomp-plugin.h @@ -154,6 +154,8 @@ extern int GOMP_OFFLOAD_memcpy3d (int, int, size_t, size_t, size_t, void *, extern bool GOMP_OFFLOAD_can_run (void *); extern void GOMP_OFFLOAD_run (int, void *, void *, void **); extern void GOMP_OFFLOAD_async_run (int, void *, void *, void **, void *); +extern bool GOMP_OFFLOAD_evaluate_device (int, const char *, const char *, + const char *); extern void GOMP_OFFLOAD_openacc_exec (void (*) (void *), size_t, void **, void **, unsigned *, void *); diff --git a/libgomp/libgomp.h b/libgomp/libgomp.h index 88a690a4867..3383d4df45d 100644 --- a/libgomp/libgomp.h +++ b/libgomp/libgomp.h @@ -1437,6 +1437,7 @@ struct gomp_device_descr __typeof (GOMP_OFFLOAD_can_run) *can_run_func; __typeof (GOMP_OFFLOAD_run) *run_func; __typeof (GOMP_OFFLOAD_async_run) *async_run_func; + __typeof (GOMP_OFFLOAD_evaluate_device) *evaluate_device_func; /* Splay tree containing information about mapped memory regions. */ struct splay_tree_s mem_map; diff --git a/libgomp/libgomp.map b/libgomp/libgomp.map index 0fee53046e4..fb9837e6243 100644 --- a/libgomp/libgomp.map +++ b/libgomp/libgomp.map @@ -429,6 +429,11 @@ GOMP_5.1.2 { GOMP_target_map_indirect_ptr; } GOMP_5.1.1; +GOMP_5.1.3 { + global: + GOMP_evaluate_target_device; +} GOMP_5.1.2; + OACC_2.0 { global: acc_get_num_devices; diff --git a/libgomp/libgomp.texi b/libgomp/libgomp.texi index 409023b1abb..5da46f38eab 100644 --- a/libgomp/libgomp.texi +++ b/libgomp/libgomp.texi @@ -6193,9 +6193,10 @@ smaller number. On non-host devices, the value of the @c has to be implemented; cf. also PR target/105640. @c For offload devices, add *additionally* gcc/config/*/t-omp-device. -For the host compiler, @code{kind} always matches @code{host}; for the -offloading architectures AMD GCN and Nvidia PTX, @code{kind} always matches -@code{gpu}. For the x86 family of computers, AMD GCN and Nvidia PTX +For the host compiler, @code{kind} always matches @code{host} and @code{cpu}; +for the offloading architectures AMD GCN and Nvidia PTX, @code{kind} +always matches @code{gpu} and @code{nohost}. +For the x86 family of computers, AMD GCN and Nvidia PTX the following traits are supported in addition; while OpenMP is supported on more architectures, GCC currently does not match any @code{arch} or @code{isa} traits for those. @@ -6212,6 +6213,17 @@ on more architectures, GCC currently does not match any @code{arch} or @tab See @code{-march=} in ``Nvidia PTX Options'' @end multitable +For x86, note that the set of matching @code{arch} and @code{isa} +selectors is determined by command-line options rather than the actual +hardware. This is particularly true of dynamic selectors, which match +the options used to build libgomp rather than the options used to +build user programs (which may also differ between compilation units). + +For the @code{target_device} selector on AMD GCN and Nvidia PTX, +the actual hardware is checked at run time. On AMD GCN, an exact match +of the @code{isa} selector is required, while on Nvidia PTX lower-numbered +revisions also match. + @node Memory allocation @section Memory allocation diff --git a/libgomp/libgomp_g.h b/libgomp/libgomp_g.h index 9394f56406a..73c5fafb471 100644 --- a/libgomp/libgomp_g.h +++ b/libgomp/libgomp_g.h @@ -337,6 +337,11 @@ extern void GOMP_single_copy_end (void *); extern void GOMP_scope_start (uintptr_t *); +/* selector.c */ + +extern bool GOMP_evaluate_current_device (const char *, const char *, + const char *); + /* target.c */ extern void GOMP_target (int, void (*) (void *), const void *, @@ -359,6 +364,9 @@ extern void GOMP_teams (unsigned int, unsigned int); extern bool GOMP_teams4 (unsigned int, unsigned int, unsigned int, bool); extern void *GOMP_target_map_indirect_ptr (void *); +extern bool GOMP_evaluate_target_device (int, const char *, const char *, + const char *); + /* teams.c */ extern void GOMP_teams_reg (void (*) (void *), void *, unsigned, unsigned, diff --git a/libgomp/oacc-host.c b/libgomp/oacc-host.c index 5efdf7fb796..b6883850250 100644 --- a/libgomp/oacc-host.c +++ b/libgomp/oacc-host.c @@ -136,6 +136,16 @@ host_run (int n __attribute__ ((unused)), void *fn_ptr, void *vars, fn (vars); } +static bool +host_evaluate_device (int device_num __attribute__ ((unused)), + const char *kind __attribute__ ((unused)), + const char *arch __attribute__ ((unused)), + const char *isa __attribute__ ((unused))) +{ + __builtin_unreachable (); + return false; +} + static void host_openacc_exec (void (*fn) (void *), size_t mapnum __attribute__ ((unused)), @@ -285,6 +295,7 @@ static struct gomp_device_descr host_dispatch = .memcpy2d_func = NULL, .memcpy3d_func = NULL, .run_func = host_run, + .evaluate_device_func = host_evaluate_device, .mem_map = { NULL }, .mem_map_rev = { NULL }, diff --git a/libgomp/plugin/plugin-gcn.c b/libgomp/plugin/plugin-gcn.c index 6d5d7d4335a..6fd22e19d37 100644 --- a/libgomp/plugin/plugin-gcn.c +++ b/libgomp/plugin/plugin-gcn.c @@ -4397,6 +4397,58 @@ GOMP_OFFLOAD_async_run (int device, void *tgt_fn, void *tgt_vars, GOMP_PLUGIN_target_task_completion, async_data); } +/* The selectors are passed as strings, but are actually sets of multiple + trait property names, separated by '\0' and with an extra '\0' at + the end. Match such a string SELECTORS against an array of strings + CHOICES, that is terminated by a null pointer. + matches. */ +static bool +gomp_match_selectors (const char *selectors, const char **choices) +{ + while (*selectors != '\0') + { + bool match = false; + for (int i = 0; !match && choices[i]; i++) + match = !strcmp (selectors, choices[i]); + if (!match) + return false; + selectors += strlen (selectors) + 1; + } + return true; +} + +/* Here we can only have one possible match and it must be + the only selector provided. */ +static bool +gomp_match_isa (const char *selectors, gcn_isa isa) +{ + if (isa_code (selectors) != isa) + return false; + if (*(selectors + strlen (selectors) + 1) != '\0') + return false; + return true; +} + +bool +GOMP_OFFLOAD_evaluate_device (int device_num, const char *kind, + const char *arch, const char *isa) +{ + static const char *kind_choices[] = { "gpu", "nohost", NULL }; + static const char *arch_choices[] = { "gcn", "amdgcn", NULL }; + struct agent_info *agent = get_agent_info (device_num); + + if (kind && !gomp_match_selectors (kind, kind_choices)) + return false; + + if (arch && !gomp_match_selectors (arch, arch_choices)) + return false; + + if (isa && !gomp_match_isa (isa, agent->device_isa)) + return false; + + return true; +} + /* }}} */ /* {{{ OpenACC Plugin API */ diff --git a/libgomp/plugin/plugin-nvptx.c b/libgomp/plugin/plugin-nvptx.c index e4672afe123..b3293b8ebec 100644 --- a/libgomp/plugin/plugin-nvptx.c +++ b/libgomp/plugin/plugin-nvptx.c @@ -319,6 +319,7 @@ struct ptx_device int max_threads_per_block; int max_threads_per_multiprocessor; int default_dims[GOMP_DIM_MAX]; + int compute_major, compute_minor; /* Length as used by the CUDA Runtime API ('struct cudaDeviceProp'). */ char name[256]; @@ -551,6 +552,14 @@ nvptx_open_device (int n) for (int i = 0; i != GOMP_DIM_MAX; i++) ptx_dev->default_dims[i] = 0; + CUDA_CALL_ERET (NULL, cuDeviceGetAttribute, &pi, + CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR, dev); + ptx_dev->compute_major = pi; + + CUDA_CALL_ERET (NULL, cuDeviceGetAttribute, &pi, + CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MINOR, dev); + ptx_dev->compute_minor = pi; + CUDA_CALL_ERET (NULL, cuDeviceGetName, ptx_dev->name, sizeof ptx_dev->name, dev); @@ -2558,3 +2567,76 @@ poll_again: } /* TODO: Implement GOMP_OFFLOAD_async_run. */ + +/* The selectors are passed as strings, but are actually sets of multiple + trait property names, separated by '\0' and with an extra '\0' at + the end. Match such a string SELECTORS against an array of strings + CHOICES, that is terminated by a null pointer. + matches. */ +static bool +gomp_match_selectors (const char *selectors, const char **choices) +{ + while (*selectors != '\0') + { + bool match = false; + for (int i = 0; !match && choices[i]; i++) + match = !strcmp (selectors, choices[i]); + if (!match) + return false; + selectors += strlen (selectors) + 1; + } + return true; +} + +/* Here we can only have one possible match and it must be + the only selector provided. */ +static bool +gomp_match_selector (const char *selectors, const char *choice) +{ + if (!strcmp (selectors, choice)) + return false; + if (*(selectors + strlen (selectors) + 1) != '\0') + return false; + return true; +} + +#define CHECK_ISA(major, minor) \ + if (device->compute_major >= major \ + && device->compute_minor >= minor \ + && gomp_match_selector (isa, "sm_"#major#minor)) \ + return true + +bool +GOMP_OFFLOAD_evaluate_device (int device_num, const char *kind, + const char *arch, const char *isa) +{ + static const char *kind_choices[] = { "gpu", "nohost", NULL }; + static const char *arch_choices[] = { "nvptx", NULL }; + if (kind && !gomp_match_selectors (kind, kind_choices)) + return false; + + if (arch && !gomp_match_selectors (arch, arch_choices)) + return false; + + if (!isa) + return true; + + struct ptx_device *device = ptx_devices[device_num]; + + CHECK_ISA (3, 0); + CHECK_ISA (3, 5); + CHECK_ISA (3, 7); + CHECK_ISA (5, 0); + CHECK_ISA (5, 2); + CHECK_ISA (5, 3); + CHECK_ISA (6, 0); + CHECK_ISA (6, 1); + CHECK_ISA (6, 2); + CHECK_ISA (7, 0); + CHECK_ISA (7, 2); + CHECK_ISA (7, 5); + CHECK_ISA (8, 0); + CHECK_ISA (8, 6); + + return false; +} diff --git a/libgomp/selector.c b/libgomp/selector.c new file mode 100644 index 00000000000..5b21e582844 --- /dev/null +++ b/libgomp/selector.c @@ -0,0 +1,64 @@ +/* Copyright (C) 2022 Free Software Foundation, Inc. + Contributed by Mentor, a Siemens Business. + + This file is part of the GNU Offloading and Multi Processing Library + (libgomp). + + Libgomp is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3, or (at your option) + any later version. + + Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY + WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + FOR A PARTICULAR PURPOSE. See the GNU General Public License for + more details. + + Under Section 7 of GPL version 3, you are granted additional + permissions described in the GCC Runtime Library Exception, version + 3.1, as published by the Free Software Foundation. + + You should have received a copy of the GNU General Public License and + a copy of the GCC Runtime Library Exception along with this program; + see the files COPYING3 and COPYING.RUNTIME respectively. If not, see + <http://www.gnu.org/licenses/>. */ + +/* This file contains a placeholder implementation of + GOMP_evaluate_current_device. */ + +#include "libgomp.h" + +/* The selectors are passed as strings, but are actually sets of multiple + trait property names, separated by '\0' and with an extra '\0' at + the end. Match such a string SELECTORS against an array of strings + CHOICES, that is terminated by a null pointer. + matches. */ +static bool +gomp_match_selectors (const char *selectors, const char **choices) +{ + while (*selectors != '\0') + { + bool match = false; + for (int i = 0; !match && choices[i]; i++) + match = !strcmp (selectors, choices[i]); + if (!match) + return false; + selectors += strlen (selectors) + 1; + } + return true; +} + +bool +GOMP_evaluate_current_device (const char *kind, const char *arch, + const char *isa) +{ + static const char *kind_choices[] = { "cpu", "host", NULL }; + + if (kind && !gomp_match_selectors (kind, kind_choices)) + return false; + + if (arch || isa) + return false; + + return true; +} diff --git a/libgomp/target.c b/libgomp/target.c index 23dc72476ec..806f9587d2b 100644 --- a/libgomp/target.c +++ b/libgomp/target.c @@ -5652,6 +5652,45 @@ omp_pause_resource_all (omp_pause_resource_t kind) ialias (omp_pause_resource) ialias (omp_pause_resource_all) +bool +GOMP_evaluate_target_device (int device_num, const char *kind, + const char *arch, const char *isa) +{ + bool result = true; + + /* -2 is a magic number to indicate the device number was not specified; + in that case it's supposed to use the default device. */ + if (device_num == -2) + device_num = omp_get_default_device (); + + if (kind && strcmp (kind, "any") == 0) + kind = NULL; + + gomp_debug (1, "%s: device_num = %u, kind=%s, arch=%s, isa=%s", + __FUNCTION__, device_num, kind, arch, isa); + + if (omp_get_device_num () == device_num) + result = GOMP_evaluate_current_device (kind, arch, isa); + else + { + if (!omp_is_initial_device ()) + /* Accelerators are not expected to know about other devices. */ + result = false; + else + { + struct gomp_device_descr *device = resolve_device (device_num, true); + if (device == NULL) + result = false; + else if (device->evaluate_device_func) + result = device->evaluate_device_func (device_num, kind, arch, + isa); + } + } + + gomp_debug (1, " -> %s\n", result ? "true" : "false"); + return result; +} + #ifdef PLUGIN_SUPPORT /* This function tries to load a plugin for DEVICE. Name of plugin is passed @@ -5706,6 +5745,7 @@ gomp_load_plugin_for_device (struct gomp_device_descr *device, DLSYM_OPT (page_locked_host_free, page_locked_host_free); DLSYM (dev2host); DLSYM (host2dev); + DLSYM (evaluate_device); DLSYM_OPT (memcpy2d, memcpy2d); DLSYM_OPT (memcpy3d, memcpy3d); device->capabilities = device->get_caps_func ();