Re: [PATCH] selftests: make kselftest-clean remove libynl outputs
Hello: This patch was applied to netdev/net.git (main) by Jakub Kicinski : On Sat, 5 Oct 2024 14:56:00 -0700 you wrote: > Starting with 6.12 commit 85585b4bc8d8 ("selftests: add ncdevmem, netcat > for devmem TCP") kselftest-all creates additional outputs that > kselftest-clean does not cleanup: > $ make defconfig > $ make kselftest-all > $ make kselftest-clean > $ git clean -ndxf | grep tools/net > Would remove tools/net/ynl/lib/__pycache__/ > Would remove tools/net/ynl/lib/ynl.a > Would remove tools/net/ynl/lib/ynl.d > Would remove tools/net/ynl/lib/ynl.o > > [...] Here is the summary with links: - selftests: make kselftest-clean remove libynl outputs https://git.kernel.org/netdev/net/c/1fd9e4f25782 You are awesome, thank you! -- Deet-doot-dot, I am a bot. https://korg.docs.kernel.org/patchwork/pwbot.html
[PATCH v4 01/19] scripts: move genksyms crc32 implementation to a common include
To avoid duplication between host programs, move the crc32 code to a shared header file. Suggested-by: Petr Pavlu Signed-off-by: Sami Tolvanen Acked-by: Neal Gompa --- scripts/genksyms/genksyms.c | 77 +- scripts/include/crc32.h | 93 + 2 files changed, 94 insertions(+), 76 deletions(-) create mode 100644 scripts/include/crc32.h diff --git a/scripts/genksyms/genksyms.c b/scripts/genksyms/genksyms.c index f3901c55df23..2885bbcb9eec 100644 --- a/scripts/genksyms/genksyms.c +++ b/scripts/genksyms/genksyms.c @@ -18,6 +18,7 @@ #include #include +#include #include "genksyms.h" /*--*/ @@ -58,82 +59,6 @@ static struct string_list *mk_node(const char *string); static void print_location(void); static void print_type_name(enum symbol_type type, const char *name); -/*--*/ - -static const unsigned int crctab32[] = { - 0xU, 0x77073096U, 0xee0e612cU, 0x990951baU, 0x076dc419U, - 0x706af48fU, 0xe963a535U, 0x9e6495a3U, 0x0edb8832U, 0x79dcb8a4U, - 0xe0d5e91eU, 0x97d2d988U, 0x09b64c2bU, 0x7eb17cbdU, 0xe7b82d07U, - 0x90bf1d91U, 0x1db71064U, 0x6ab020f2U, 0xf3b97148U, 0x84be41deU, - 0x1adad47dU, 0x6ddde4ebU, 0xf4d4b551U, 0x83d385c7U, 0x136c9856U, - 0x646ba8c0U, 0xfd62f97aU, 0x8a65c9ecU, 0x14015c4fU, 0x63066cd9U, - 0xfa0f3d63U, 0x8d080df5U, 0x3b6e20c8U, 0x4c69105eU, 0xd56041e4U, - 0xa2677172U, 0x3c03e4d1U, 0x4b04d447U, 0xd20d85fdU, 0xa50ab56bU, - 0x35b5a8faU, 0x42b2986cU, 0xdbbbc9d6U, 0xacbcf940U, 0x32d86ce3U, - 0x45df5c75U, 0xdcd60dcfU, 0xabd13d59U, 0x26d930acU, 0x51de003aU, - 0xc8d75180U, 0xbfd06116U, 0x21b4f4b5U, 0x56b3c423U, 0xcfba9599U, - 0xb8bda50fU, 0x2802b89eU, 0x5f058808U, 0xc60cd9b2U, 0xb10be924U, - 0x2f6f7c87U, 0x58684c11U, 0xc1611dabU, 0xb6662d3dU, 0x76dc4190U, - 0x01db7106U, 0x98d220bcU, 0xefd5102aU, 0x71b18589U, 0x06b6b51fU, - 0x9fbfe4a5U, 0xe8b8d433U, 0x7807c9a2U, 0x0f00f934U, 0x9609a88eU, - 0xe10e9818U, 0x7f6a0dbbU, 0x086d3d2dU, 0x91646c97U, 0xe6635c01U, - 0x6b6b51f4U, 0x1c6c6162U, 0x856530d8U, 0xf262004eU, 0x6c0695edU, - 0x1b01a57bU, 0x8208f4c1U, 0xf50fc457U, 0x65b0d9c6U, 0x12b7e950U, - 0x8bbeb8eaU, 0xfcb9887cU, 0x62dd1ddfU, 0x15da2d49U, 0x8cd37cf3U, - 0xfbd44c65U, 0x4db26158U, 0x3ab551ceU, 0xa3bc0074U, 0xd4bb30e2U, - 0x4adfa541U, 0x3dd895d7U, 0xa4d1c46dU, 0xd3d6f4fbU, 0x4369e96aU, - 0x346ed9fcU, 0xad678846U, 0xda60b8d0U, 0x44042d73U, 0x33031de5U, - 0xaa0a4c5fU, 0xdd0d7cc9U, 0x5005713cU, 0x270241aaU, 0xbe0b1010U, - 0xc90c2086U, 0x5768b525U, 0x206f85b3U, 0xb966d409U, 0xce61e49fU, - 0x5edef90eU, 0x29d9c998U, 0xb0d09822U, 0xc7d7a8b4U, 0x59b33d17U, - 0x2eb40d81U, 0xb7bd5c3bU, 0xc0ba6cadU, 0xedb88320U, 0x9abfb3b6U, - 0x03b6e20cU, 0x74b1d29aU, 0xead54739U, 0x9dd277afU, 0x04db2615U, - 0x73dc1683U, 0xe3630b12U, 0x94643b84U, 0x0d6d6a3eU, 0x7a6a5aa8U, - 0xe40ecf0bU, 0x9309ff9dU, 0x0a00ae27U, 0x7d079eb1U, 0xf00f9344U, - 0x8708a3d2U, 0x1e01f268U, 0x6906c2feU, 0xf762575dU, 0x806567cbU, - 0x196c3671U, 0x6e6b06e7U, 0xfed41b76U, 0x89d32be0U, 0x10da7a5aU, - 0x67dd4accU, 0xf9b9df6fU, 0x8ebeeff9U, 0x17b7be43U, 0x60b08ed5U, - 0xd6d6a3e8U, 0xa1d1937eU, 0x38d8c2c4U, 0x4fdff252U, 0xd1bb67f1U, - 0xa6bc5767U, 0x3fb506ddU, 0x48b2364bU, 0xd80d2bdaU, 0xaf0a1b4cU, - 0x36034af6U, 0x41047a60U, 0xdf60efc3U, 0xa867df55U, 0x316e8eefU, - 0x4669be79U, 0xcb61b38cU, 0xbc66831aU, 0x256fd2a0U, 0x5268e236U, - 0xcc0c7795U, 0xbb0b4703U, 0x220216b9U, 0x5505262fU, 0xc5ba3bbeU, - 0xb2bd0b28U, 0x2bb45a92U, 0x5cb36a04U, 0xc2d7ffa7U, 0xb5d0cf31U, - 0x2cd99e8bU, 0x5bdeae1dU, 0x9b64c2b0U, 0xec63f226U, 0x756aa39cU, - 0x026d930aU, 0x9c0906a9U, 0xeb0e363fU, 0x72076785U, 0x05005713U, - 0x95bf4a82U, 0xe2b87a14U, 0x7bb12baeU, 0x0cb61b38U, 0x92d28e9bU, - 0xe5d5be0dU, 0x7cdcefb7U, 0x0bdbdf21U, 0x86d3d2d4U, 0xf1d4e242U, - 0x68ddb3f8U, 0x1fda836eU, 0x81be16cdU, 0xf6b9265bU, 0x6fb077e1U, - 0x18b74777U, 0x88085ae6U, 0xff0f6a70U, 0x66063bcaU, 0x11010b5cU, - 0x8f659effU, 0xf862ae69U, 0x616bffd3U, 0x166ccf45U, 0xa00ae278U, - 0xd70dd2eeU, 0x4e048354U, 0x3903b3c2U, 0xa7672661U, 0xd06016f7U, - 0x4969474dU, 0x3e6e77dbU, 0xaed16a4aU, 0xd9d65adcU, 0x40df0b66U, - 0x37d83bf0U, 0xa9bcae53U, 0xdebb9ec5U, 0x47b2cf7fU, 0x30b5ffe9U, - 0xbdbdf21cU, 0xcabac28aU, 0x53b39330U, 0x24b4a3a6U, 0xbad03605U, - 0xcdd70693U, 0x54de5729U, 0x23d967bfU, 0xb3667a2eU, 0xc4614ab8U, - 0x5d681b02U, 0x2a6f2b94U, 0xb40bbe37U, 0xc30c8ea1U, 0x5a05df1bU, - 0x2d02ef8dU -}; - -static unsigned long partial_crc32_one(unsigned char c, unsigned long crc) -{ - return crctab32[(crc ^ c) & 0xff] ^ (crc >> 8); -} - -static unsigned long partial_crc32(const char *s, unsigned long crc
[PATCH v4 02/19] tools: Add gendwarfksyms
Add a basic DWARF parser, which uses libdw to traverse the debugging information in an object file and looks for functions and variables. In follow-up patches, this will be expanded to produce symbol versions for CONFIG_MODVERSIONS from DWARF. Signed-off-by: Sami Tolvanen Acked-by: Neal Gompa Reviewed-by: Petr Pavlu --- kernel/module/Kconfig | 8 ++ scripts/Makefile | 1 + scripts/gendwarfksyms/.gitignore | 2 + scripts/gendwarfksyms/Makefile| 8 ++ scripts/gendwarfksyms/dwarf.c | 166 ++ scripts/gendwarfksyms/gendwarfksyms.c | 124 +++ scripts/gendwarfksyms/gendwarfksyms.h | 97 +++ scripts/gendwarfksyms/symbols.c | 83 + 8 files changed, 489 insertions(+) create mode 100644 scripts/gendwarfksyms/.gitignore create mode 100644 scripts/gendwarfksyms/Makefile create mode 100644 scripts/gendwarfksyms/dwarf.c create mode 100644 scripts/gendwarfksyms/gendwarfksyms.c create mode 100644 scripts/gendwarfksyms/gendwarfksyms.h create mode 100644 scripts/gendwarfksyms/symbols.c diff --git a/kernel/module/Kconfig b/kernel/module/Kconfig index 7c6588148d42..f9e5f82fa88b 100644 --- a/kernel/module/Kconfig +++ b/kernel/module/Kconfig @@ -169,6 +169,14 @@ config MODVERSIONS make them incompatible with the kernel you are running. If unsure, say N. +config GENDWARFKSYMS + bool + depends on DEBUG_INFO + # Requires full debugging information, split DWARF not supported. + depends on !DEBUG_INFO_REDUCED && !DEBUG_INFO_SPLIT + # Requires ELF object files. + depends on !LTO + config ASM_MODVERSIONS bool default HAVE_ASM_MODVERSIONS && MODVERSIONS diff --git a/scripts/Makefile b/scripts/Makefile index 6bcda4b9d054..d7fec46d38c0 100644 --- a/scripts/Makefile +++ b/scripts/Makefile @@ -54,6 +54,7 @@ targets += module.lds subdir-$(CONFIG_GCC_PLUGINS) += gcc-plugins subdir-$(CONFIG_MODVERSIONS) += genksyms +subdir-$(CONFIG_GENDWARFKSYMS) += gendwarfksyms subdir-$(CONFIG_SECURITY_SELINUX) += selinux subdir-$(CONFIG_SECURITY_IPE) += ipe diff --git a/scripts/gendwarfksyms/.gitignore b/scripts/gendwarfksyms/.gitignore new file mode 100644 index ..0927f8d3cd96 --- /dev/null +++ b/scripts/gendwarfksyms/.gitignore @@ -0,0 +1,2 @@ +# SPDX-License-Identifier: GPL-2.0 +/gendwarfksyms diff --git a/scripts/gendwarfksyms/Makefile b/scripts/gendwarfksyms/Makefile new file mode 100644 index ..9f8fec4fd39b --- /dev/null +++ b/scripts/gendwarfksyms/Makefile @@ -0,0 +1,8 @@ +# SPDX-License-Identifier: GPL-2.0 +hostprogs-always-y += gendwarfksyms + +gendwarfksyms-objs += gendwarfksyms.o +gendwarfksyms-objs += dwarf.o +gendwarfksyms-objs += symbols.o + +HOSTLDLIBS_gendwarfksyms := -ldw -lelf diff --git a/scripts/gendwarfksyms/dwarf.c b/scripts/gendwarfksyms/dwarf.c new file mode 100644 index ..81df3e2ad3ae --- /dev/null +++ b/scripts/gendwarfksyms/dwarf.c @@ -0,0 +1,166 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2024 Google LLC + */ + +#include "gendwarfksyms.h" + +static bool get_ref_die_attr(Dwarf_Die *die, unsigned int id, Dwarf_Die *value) +{ + Dwarf_Attribute da; + + /* dwarf_formref_die returns a pointer instead of an error value. */ + return dwarf_attr(die, id, &da) && dwarf_formref_die(&da, value); +} + +#define DEFINE_GET_STRING_ATTR(attr) \ + static const char *get_##attr##_attr(Dwarf_Die *die) \ + {\ + Dwarf_Attribute da; \ + if (dwarf_attr(die, DW_AT_##attr, &da)) \ + return dwarf_formstring(&da);\ + return NULL; \ + } + +DEFINE_GET_STRING_ATTR(name) +DEFINE_GET_STRING_ATTR(linkage_name) + +static const char *get_symbol_name(Dwarf_Die *die) +{ + const char *name; + + /* rustc uses DW_AT_linkage_name for exported symbols */ + name = get_linkage_name_attr(die); + if (!name) + name = get_name_attr(die); + + return name; +} + +static bool match_export_symbol(struct state *state, Dwarf_Die *die) +{ + Dwarf_Die *source = die; + Dwarf_Die origin; + + /* If the DIE has an abstract origin, use it for type information. */ + if (get_ref_die_attr(die, DW_AT_abstract_origin, &origin)) + source = &origin; + + state->sym = symbol_get(get_symbol_name(die)); + + /* Look up using the origin name if there are no matches. */ + if (!state->sym && source != die) + state->sym = symbol_get(get_symbol_name(source)); + + state->die = *source; + return !!state->sym; +} + +/* + * Type string processing + */ +static void process(const char *s) +{ + s = s ?: ""; + + if (dump_dies) + fputs(s, stderr
[PATCH v4 00/19] Implement DWARF modversions
Hi, Here's v4 of the DWARF modversions series. The main motivation is modversions support for Rust, which is important for distributions like Android that are about to ship Rust kernel modules. Per Luis' request [1], v2 dropped the Rust specific bits from the series and instead added the feature as an option for the entire kernel. Matt is addressing Rust modversion_info compatibility issues in a separate series [2], and we'll follow up with a patch to actually allow CONFIG_MODVERSIONS with Rust once everything else has been sorted out. Short background: Unlike C, Rust source code doesn't have sufficient information about the final ABI, as the compiler has considerable freedom in adjusting structure layout, for example, which makes using a source code parser like genksyms a non-starter. Based on earlier feedback, this series uses DWARF debugging information for computing versions. DWARF is an established and a relatively stable format, which includes all the necessary ABI details, and adding a CONFIG_DEBUG_INFO dependency for Rust symbol versioning seems like a reasonable trade-off. The first patch moves the genksyms CRC32 implementation to a shared header file and the next 15 patches add gendwarfksyms, a tool for computing symbol versions from DWARF. When passed a list of exported symbols and object files, the tool generates an expanded type string for each symbol and computes symbol CRCs similarly to genksyms. gendwarfksyms is written in C and uses libdw to process DWARF. Patch 17 ensures that debugging information is present where we need it, patch 18 adds gendwarfksyms as an alternative to genksyms, and the last patch adds documentation. v4 is based on v6.12-rc2 and for your convenience the series is also available here: https://github.com/samitolvanen/linux/commits/gendwarfksyms-v4 If you also want to test the series with Rust modules, this branch adds Matt's latest modversion_info series and a small patch to enable Rust modversions: https://github.com/samitolvanen/linux/commits/rustmodversions-v4 Sami [1] https://lore.kernel.org/lkml/znizetkkqweig...@bombadil.infradead.org/ [2] https://lore.kernel.org/lkml/20240925233854.90072-1-mmau...@google.com/ --- v4: - Rebased on v6.12-rc2, which now includes all the prerequisites. - Dropped unnecessary name_only parameter for symbols.c::for_each and cleaned up error handling. (Patch 3) - Fixed anonymous scope handling to ensure unnamed DIEs don't get names. (Patch 4) - Added non-variant children to variant_type output, and included DW_AT_discr_value attributes for variants. (Patch 9) - Added another symbol pointer test case. (Patch 16) - Picked up (Acked|Reviewed)-by tags from v3. v3: https://lore.kernel.org/lkml/20240923181846.549877-22-samitolva...@google.com/ - Updated SPX license headers. - Squashed the first two patches in v2 and tried to reduce churn as much as reasonable. - Dropped patch 18 from v2 ("x86/asm-prototypes: Include ") as it's addressed by a separate patch. - Changed the error handling code to immediately terminate instead of propagating the errors back to main, which cleaned up the code quite a bit. - Switched to the list and hashtable implementations in scripts and dropped the remaining tools/include dependencies. Added a couple missing list macros. (patch 1) - Moved the genksyms CRC32 implementation to scripts/include and dropped the duplicate code. (patches 2 and 14) - Switched from ad-hoc command line parsing to getopt_long (patch 3). - Added structure member and function parameter names to the DIE output to match genksyms behavior, and tweaked the symtypes format to be more parser-friendly in general based on Petr's suggestions. - Replaced the declaration-only struct annotations with more generic kABI stability rules that allow source code annotations to be used where #ifndef __GENKSYMS__ was previously used. Added support for rules that can be used to exclude enumerators from versioning. (patch 16) - Per Miroslav's suggestion, added an option to hide structure members from versioning when they're added to existing alignment holes, for example. (patch 16) - Per Greg's request, added documentation and example macros for the --stable features, and a couple of test cases. (patches 15, 16, and 20) - Fixed making symtypes files, which need to depend on .o files with gendwarfksyms. (patch 19) - Addressed several other smaller issues that Petr and Masahiro kindly pointed out during the v2 review. v2: https://lore.kernel.org/lkml/20240815173903.4172139-21-samitolva...@google.com/ - Per Luis' request, dropped Rust-specific patches and added gendwarfksyms as an alternative to genksyms for the entire kernel. - Added support for missing DWARF features needed to handle also non-Rust code. - Changed symbol address matching to use the symbol table information instead of relying on addresses in DWARF. - Added __gendwarfksyms_ptr patches to ensure the compiler emi
[PATCH v4 05/19] gendwarfksyms: Add a cache for processed DIEs
Basic types in DWARF repeat frequently and traversing the DIEs using libdw is relatively slow. Add a simple hashtable based cache for the processed DIEs. Signed-off-by: Sami Tolvanen Acked-by: Neal Gompa --- scripts/gendwarfksyms/Makefile| 1 + scripts/gendwarfksyms/die.c | 143 ++ scripts/gendwarfksyms/dwarf.c | 136 +--- scripts/gendwarfksyms/gendwarfksyms.c | 6 ++ scripts/gendwarfksyms/gendwarfksyms.h | 63 +++- 5 files changed, 308 insertions(+), 41 deletions(-) create mode 100644 scripts/gendwarfksyms/die.c diff --git a/scripts/gendwarfksyms/Makefile b/scripts/gendwarfksyms/Makefile index 9f8fec4fd39b..c0d4ce50fc27 100644 --- a/scripts/gendwarfksyms/Makefile +++ b/scripts/gendwarfksyms/Makefile @@ -2,6 +2,7 @@ hostprogs-always-y += gendwarfksyms gendwarfksyms-objs += gendwarfksyms.o +gendwarfksyms-objs += die.o gendwarfksyms-objs += dwarf.o gendwarfksyms-objs += symbols.o diff --git a/scripts/gendwarfksyms/die.c b/scripts/gendwarfksyms/die.c new file mode 100644 index ..28d89fce89fc --- /dev/null +++ b/scripts/gendwarfksyms/die.c @@ -0,0 +1,143 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2024 Google LLC + */ + +#include +#include "gendwarfksyms.h" + +#define DIE_HASH_BITS 20 + +/* {die->addr, state} -> struct die * */ +static HASHTABLE_DEFINE(die_map, 1 << DIE_HASH_BITS); + +static unsigned int map_hits; +static unsigned int map_misses; + +static inline unsigned int die_hash(uintptr_t addr, enum die_state state) +{ + return hash_32(addr_hash(addr) ^ (unsigned int)state); +} + +static void init_die(struct die *cd) +{ + cd->state = DIE_INCOMPLETE; + cd->fqn = NULL; + cd->tag = -1; + cd->addr = 0; + INIT_LIST_HEAD(&cd->fragments); +} + +static struct die *create_die(Dwarf_Die *die, enum die_state state) +{ + struct die *cd; + + cd = xmalloc(sizeof(struct die)); + init_die(cd); + cd->addr = (uintptr_t)die->addr; + + hash_add(die_map, &cd->hash, die_hash(cd->addr, state)); + return cd; +} + +int __die_map_get(uintptr_t addr, enum die_state state, struct die **res) +{ + struct die *cd; + + hash_for_each_possible(die_map, cd, hash, die_hash(addr, state)) { + if (cd->addr == addr && cd->state == state) { + *res = cd; + return 0; + } + } + + return -1; +} + +struct die *die_map_get(Dwarf_Die *die, enum die_state state) +{ + struct die *cd; + + if (__die_map_get((uintptr_t)die->addr, state, &cd) == 0) { + map_hits++; + return cd; + } + + map_misses++; + return create_die(die, state); +} + +static void reset_die(struct die *cd) +{ + struct die_fragment *tmp; + struct die_fragment *df; + + list_for_each_entry_safe(df, tmp, &cd->fragments, list) { + if (df->type == FRAGMENT_STRING) + free(df->data.str); + free(df); + } + + if (cd->fqn && *cd->fqn) + free(cd->fqn); + init_die(cd); +} + +void die_map_free(void) +{ + struct hlist_node *tmp; + unsigned int stats[DIE_LAST + 1]; + struct die *cd; + int i; + + memset(stats, 0, sizeof(stats)); + + hash_for_each_safe(die_map, cd, tmp, hash) { + stats[cd->state]++; + reset_die(cd); + free(cd); + } + hash_init(die_map); + + if (map_hits + map_misses > 0) + debug("hits %u, misses %u (hit rate %.02f%%)", map_hits, + map_misses, + (100.0f * map_hits) / (map_hits + map_misses)); + + for (i = 0; i <= DIE_LAST; i++) + debug("%s: %u entries", die_state_name(i), stats[i]); +} + +static struct die_fragment *append_item(struct die *cd) +{ + struct die_fragment *df; + + df = xmalloc(sizeof(struct die_fragment)); + df->type = FRAGMENT_EMPTY; + list_add_tail(&df->list, &cd->fragments); + return df; +} + +void die_map_add_string(struct die *cd, const char *str) +{ + struct die_fragment *df; + + if (!cd) + return; + + df = append_item(cd); + df->data.str = xstrdup(str); + df->type = FRAGMENT_STRING; +} + +void die_map_add_die(struct die *cd, struct die *child) +{ + struct die_fragment *df; + + if (!cd) + return; + + df = append_item(cd); + df->data.addr = child->addr; + df->type = FRAGMENT_DIE; +} diff --git a/scripts/gendwarfksyms/dwarf.c b/scripts/gendwarfksyms/dwarf.c index 35fd1dfeeadc..c2cd4743515e 100644 --- a/scripts/gendwarfksyms/dwarf.c +++ b/scripts/gendwarfksyms/dwarf.c @@ -71,17 +71,19 @@ static bool match_export_symbol(struct state *state, Dwarf_Die *die) /* * Type string processing */ -static void process(const char *s) +static void process(
[PATCH v4 04/19] gendwarfksyms: Expand base_type
Start making gendwarfksyms more useful by adding support for expanding DW_TAG_base_type types and basic DWARF attributes. Example: $ echo loops_per_jiffy | \ scripts/gendwarfksyms/gendwarfksyms \ --debug --dump-dies vmlinux.o ... gendwarfksyms: process_symbol: loops_per_jiffy variable base_type unsigned long byte_size(8) encoding(7) ... Signed-off-by: Sami Tolvanen Acked-by: Neal Gompa Reviewed-by: Petr Pavlu --- scripts/gendwarfksyms/dwarf.c | 159 ++ 1 file changed, 159 insertions(+) diff --git a/scripts/gendwarfksyms/dwarf.c b/scripts/gendwarfksyms/dwarf.c index 81df3e2ad3ae..35fd1dfeeadc 100644 --- a/scripts/gendwarfksyms/dwarf.c +++ b/scripts/gendwarfksyms/dwarf.c @@ -3,8 +3,20 @@ * Copyright (C) 2024 Google LLC */ +#include #include "gendwarfksyms.h" +#define DEFINE_GET_ATTR(attr, type)\ + static bool get_##attr##_attr(Dwarf_Die *die, unsigned int id, \ + type *value) \ + { \ + Dwarf_Attribute da;\ + return dwarf_attr(die, id, &da) && \ + !dwarf_form##attr(&da, value); \ + } + +DEFINE_GET_ATTR(udata, Dwarf_Word) + static bool get_ref_die_attr(Dwarf_Die *die, unsigned int id, Dwarf_Die *value) { Dwarf_Attribute da; @@ -67,6 +79,109 @@ static void process(const char *s) fputs(s, stderr); } +#define MAX_FMT_BUFFER_SIZE 128 + +static void process_fmt(const char *fmt, ...) +{ + char buf[MAX_FMT_BUFFER_SIZE]; + va_list args; + + va_start(args, fmt); + + if (checkp(vsnprintf(buf, sizeof(buf), fmt, args)) >= sizeof(buf)) + error("vsnprintf overflow: increase MAX_FMT_BUFFER_SIZE"); + + process(buf); + va_end(args); +} + +#define MAX_FQN_SIZE 64 + +/* Get a fully qualified name from DWARF scopes */ +static char *get_fqn(Dwarf_Die *die) +{ + const char *list[MAX_FQN_SIZE]; + Dwarf_Die *scopes = NULL; + bool has_name = false; + char *fqn = NULL; + char *p; + int count = 0; + int len = 0; + int res; + int i; + + res = checkp(dwarf_getscopes_die(die, &scopes)); + if (!res) { + list[count] = get_name_attr(die); + + if (!list[count]) + return NULL; + + len += strlen(list[count]); + count++; + + goto done; + } + + for (i = res - 1; i >= 0 && count < MAX_FQN_SIZE; i--) { + if (dwarf_tag(&scopes[i]) == DW_TAG_compile_unit) + continue; + + list[count] = get_name_attr(&scopes[i]); + + if (list[count]) { + has_name = true; + } else { + list[count] = ""; + has_name = false; + } + + len += strlen(list[count]); + count++; + + if (i > 0) { + list[count++] = "::"; + len += 2; + } + } + + free(scopes); + + if (count == MAX_FQN_SIZE) + warn("increase MAX_FQN_SIZE: reached the maximum"); + + /* Consider the DIE unnamed if the last scope doesn't have a name */ + if (!has_name) + return NULL; +done: + fqn = xmalloc(len + 1); + *fqn = '\0'; + + p = fqn; + for (i = 0; i < count; i++) + p = stpcpy(p, list[i]); + + return fqn; +} + +static void process_fqn(Dwarf_Die *die) +{ + process(" "); + process(get_fqn(die) ?: ""); +} + +#define DEFINE_PROCESS_UDATA_ATTRIBUTE(attribute) \ + static void process_##attribute##_attr(Dwarf_Die *die) \ + { \ + Dwarf_Word value; \ + if (get_udata_attr(die, DW_AT_##attribute, &value)) \ + process_fmt(" " #attribute "(%" PRIu64 ")", value); \ + } + +DEFINE_PROCESS_UDATA_ATTRIBUTE(alignment) +DEFINE_PROCESS_UDATA_ATTRIBUTE(byte_size) +DEFINE_PROCESS_UDATA_ATTRIBUTE(encoding) + bool match_all(Dwarf_Die *die) { return true; @@ -93,6 +208,49 @@ int process_die_container(struct state *state, Dwarf_Die *die, return 0; } +static int process_type(struct state *state, Dwarf_Die *die); + +static void process_type_attr(struct state *state, Dwarf_Die *die) +{ + Dwarf_Die type; + + if (get_ref_die_attr(die, DW_AT_type, &type)) { + check(process_type(state, &type)); + return; + } + + /* Compilers can omit DW_AT_type -- print out 'void' to clarify */ + p
[PATCH v4 06/19] gendwarfksyms: Expand type modifiers and typedefs
Add support for expanding DWARF type modifiers, such as pointers, const values etc., and typedefs. These types all have DW_AT_type attribute pointing to the underlying type, and thus produce similar output. Also add linebreaks and indentation to debugging output to make it more readable. Signed-off-by: Sami Tolvanen Acked-by: Neal Gompa Reviewed-by: Petr Pavlu --- scripts/gendwarfksyms/die.c | 12 + scripts/gendwarfksyms/dwarf.c | 67 +++ scripts/gendwarfksyms/gendwarfksyms.h | 5 ++ 3 files changed, 84 insertions(+) diff --git a/scripts/gendwarfksyms/die.c b/scripts/gendwarfksyms/die.c index 28d89fce89fc..2829387fd815 100644 --- a/scripts/gendwarfksyms/die.c +++ b/scripts/gendwarfksyms/die.c @@ -130,6 +130,18 @@ void die_map_add_string(struct die *cd, const char *str) df->type = FRAGMENT_STRING; } +void die_map_add_linebreak(struct die *cd, int linebreak) +{ + struct die_fragment *df; + + if (!cd) + return; + + df = append_item(cd); + df->data.linebreak = linebreak; + df->type = FRAGMENT_LINEBREAK; +} + void die_map_add_die(struct die *cd, struct die *child) { struct die_fragment *df; diff --git a/scripts/gendwarfksyms/dwarf.c b/scripts/gendwarfksyms/dwarf.c index c2cd4743515e..1d67ee18a388 100644 --- a/scripts/gendwarfksyms/dwarf.c +++ b/scripts/gendwarfksyms/dwarf.c @@ -6,6 +6,17 @@ #include #include "gendwarfksyms.h" +static bool do_linebreak; +static int indentation_level; + +/* Line breaks and indentation for pretty-printing */ +static void process_linebreak(struct die *cache, int n) +{ + indentation_level += n; + do_linebreak = true; + die_map_add_linebreak(cache, n); +} + #define DEFINE_GET_ATTR(attr, type)\ static bool get_##attr##_attr(Dwarf_Die *die, unsigned int id, \ type *value) \ @@ -75,6 +86,12 @@ static void process(struct die *cache, const char *s) { s = s ?: ""; + if (dump_dies && do_linebreak) { + fputs("\n", stderr); + for (int i = 0; i < indentation_level; i++) + fputs(" ", stderr); + do_linebreak = false; + } if (dump_dies) fputs(s, stderr); @@ -238,6 +255,40 @@ static void process_type_attr(struct state *state, struct die *cache, process(cache, "base_type void"); } +/* Container types with DW_AT_type */ +static void __process_type(struct state *state, struct die *cache, + Dwarf_Die *die, const char *type) +{ + process(cache, type); + process_fqn(cache, die); + process(cache, " {"); + process_linebreak(cache, 1); + process_type_attr(state, cache, die); + process_linebreak(cache, -1); + process(cache, "}"); + process_byte_size_attr(cache, die); + process_alignment_attr(cache, die); +} + +#define DEFINE_PROCESS_TYPE(type)\ + static void process_##type##_type(struct state *state, \ + struct die *cache, Dwarf_Die *die) \ + {\ + __process_type(state, cache, die, #type "_type");\ + } + +DEFINE_PROCESS_TYPE(atomic) +DEFINE_PROCESS_TYPE(const) +DEFINE_PROCESS_TYPE(immutable) +DEFINE_PROCESS_TYPE(packed) +DEFINE_PROCESS_TYPE(pointer) +DEFINE_PROCESS_TYPE(reference) +DEFINE_PROCESS_TYPE(restrict) +DEFINE_PROCESS_TYPE(rvalue_reference) +DEFINE_PROCESS_TYPE(shared) +DEFINE_PROCESS_TYPE(volatile) +DEFINE_PROCESS_TYPE(typedef) + static void process_base_type(struct state *state, struct die *cache, Dwarf_Die *die) { @@ -259,6 +310,9 @@ static void process_cached(struct state *state, struct die *cache, case FRAGMENT_STRING: process(NULL, df->data.str); break; + case FRAGMENT_LINEBREAK: + process_linebreak(NULL, df->data.linebreak); + break; case FRAGMENT_DIE: if (!dwarf_die_addr_die(dwarf_cu_getdwarf(die->cu), (void *)df->data.addr, &child)) @@ -294,7 +348,20 @@ static int process_type(struct state *state, struct die *parent, Dwarf_Die *die) } switch (tag) { + /* Type modifiers */ + PROCESS_TYPE(atomic) + PROCESS_TYPE(const) + PROCESS_TYPE(immutable) + PROCESS_TYPE(packed) + PROCESS_TYPE(pointer) + PROCESS_TYPE(reference) + PROCESS_TYPE(restrict) + PROCESS_TYPE(rvalue_reference) + PROCESS_TYPE(shared) + PROCESS_TYPE(volatile) + /* Other types */ PROCESS_TYPE(base) + PROCESS_TYPE(typedef) default:
[PATCH v4 03/19] gendwarfksyms: Add address matching
The compiler may choose not to emit type information in DWARF for all aliases, but it's possible for each alias to be exported separately. To ensure we find type information for the aliases as well, read {section, address} tuples from the symbol table and match symbols also by address. Signed-off-by: Sami Tolvanen Acked-by: Neal Gompa --- scripts/gendwarfksyms/gendwarfksyms.c | 2 + scripts/gendwarfksyms/gendwarfksyms.h | 13 +++ scripts/gendwarfksyms/symbols.c | 148 ++ 3 files changed, 163 insertions(+) diff --git a/scripts/gendwarfksyms/gendwarfksyms.c b/scripts/gendwarfksyms/gendwarfksyms.c index 1a9be8fa18c8..6fb12f9f6023 100644 --- a/scripts/gendwarfksyms/gendwarfksyms.c +++ b/scripts/gendwarfksyms/gendwarfksyms.c @@ -103,6 +103,8 @@ int main(int argc, char **argv) error("open failed for '%s': %s", argv[n], strerror(errno)); + symbol_read_symtab(fd); + dwfl = dwfl_begin(&callbacks); if (!dwfl) error("dwfl_begin failed for '%s': %s", argv[n], diff --git a/scripts/gendwarfksyms/gendwarfksyms.h b/scripts/gendwarfksyms/gendwarfksyms.h index 1a10d18f178e..a058647e2361 100644 --- a/scripts/gendwarfksyms/gendwarfksyms.h +++ b/scripts/gendwarfksyms/gendwarfksyms.h @@ -66,14 +66,27 @@ extern int dump_dies; * symbols.c */ +static inline unsigned int addr_hash(uintptr_t addr) +{ + return hash_ptr((const void *)addr); +} + +struct symbol_addr { + uint32_t section; + Elf64_Addr address; +}; + struct symbol { const char *name; + struct symbol_addr addr; + struct hlist_node addr_hash; struct hlist_node name_hash; }; typedef void (*symbol_callback_t)(struct symbol *, void *arg); void symbol_read_exports(FILE *file); +void symbol_read_symtab(int fd); struct symbol *symbol_get(const char *name); /* diff --git a/scripts/gendwarfksyms/symbols.c b/scripts/gendwarfksyms/symbols.c index 4df685deb9e0..6cb99b8769ea 100644 --- a/scripts/gendwarfksyms/symbols.c +++ b/scripts/gendwarfksyms/symbols.c @@ -6,8 +6,39 @@ #include "gendwarfksyms.h" #define SYMBOL_HASH_BITS 15 + +/* struct symbol_addr -> struct symbol */ +static HASHTABLE_DEFINE(symbol_addrs, 1 << SYMBOL_HASH_BITS); +/* name -> struct symbol */ static HASHTABLE_DEFINE(symbol_names, 1 << SYMBOL_HASH_BITS); +static inline unsigned int symbol_addr_hash(const struct symbol_addr *addr) +{ + return hash_32(addr->section ^ addr_hash(addr->address)); +} + +static unsigned int __for_each_addr(struct symbol *sym, symbol_callback_t func, + void *data) +{ + struct hlist_node *tmp; + struct symbol *match = NULL; + unsigned int processed = 0; + + hash_for_each_possible_safe(symbol_addrs, match, tmp, addr_hash, + symbol_addr_hash(&sym->addr)) { + if (match == sym) + continue; /* Already processed */ + + if (match->addr.section == sym->addr.section && + match->addr.address == sym->addr.address) { + func(match, data); + ++processed; + } + } + + return processed; +} + static unsigned int for_each(const char *name, symbol_callback_t func, void *data) { @@ -22,9 +53,13 @@ static unsigned int for_each(const char *name, symbol_callback_t func, if (strcmp(match->name, name)) continue; + /* Call func for the match, and all address matches */ if (func) func(match, data); + if (match->addr.section != SHN_UNDEF) + return __for_each_addr(match, func, data) + 1; + return 1; } @@ -56,6 +91,7 @@ void symbol_read_exports(FILE *file) sym = xcalloc(1, sizeof(struct symbol)); sym->name = name; + sym->addr.section = SHN_UNDEF; hash_add(symbol_names, &sym->name_hash, hash_str(sym->name)); ++nsym; @@ -81,3 +117,115 @@ struct symbol *symbol_get(const char *name) for_each(name, get_symbol, &sym); return sym; } + +typedef void (*elf_symbol_callback_t)(const char *name, GElf_Sym *sym, + Elf32_Word xndx, void *arg); + +static void elf_for_each_global(int fd, elf_symbol_callback_t func, void *arg) +{ + size_t sym_size; + GElf_Shdr shdr_mem; + GElf_Shdr *shdr; + Elf_Data *xndx_data = NULL; + Elf_Scn *scn; + Elf *elf; + + if (elf_version(EV_CURRENT) != EV_CURRENT) + error("elf_version failed: %s", elf_errmsg(-1)); + + elf = elf_begin(fd, ELF_C_READ_MMAP, NULL); + if (!elf) + error("elf_begin failed: %s", elf_errmsg(-1)); + + scn = elf_nextscn
[PATCH v4 09/19] gendwarfksyms: Expand structure types
Recursively expand DWARF structure types, i.e. structs, unions, and enums. Also include relevant DWARF attributes in type strings to encode structure layout, for example. Example output with --dump-dies: subprogram ( formal_parameter structure_type &str { member pointer_type { base_type u8 byte_size(1) encoding(7) } data_ptr data_member_location(0) , member base_type usize byte_size(8) encoding(7) length data_member_location(8) } byte_size(16) alignment(8) msg ) -> base_type void Signed-off-by: Sami Tolvanen Acked-by: Neal Gompa --- scripts/gendwarfksyms/dwarf.c | 138 +- scripts/gendwarfksyms/gendwarfksyms.h | 5 + 2 files changed, 141 insertions(+), 2 deletions(-) diff --git a/scripts/gendwarfksyms/dwarf.c b/scripts/gendwarfksyms/dwarf.c index ade9b3b7b119..f5cebbdcc212 100644 --- a/scripts/gendwarfksyms/dwarf.c +++ b/scripts/gendwarfksyms/dwarf.c @@ -207,9 +207,14 @@ static void process_fqn(struct die *cache, Dwarf_Die *die) value);\ } +DEFINE_PROCESS_UDATA_ATTRIBUTE(accessibility) DEFINE_PROCESS_UDATA_ATTRIBUTE(alignment) +DEFINE_PROCESS_UDATA_ATTRIBUTE(bit_size) DEFINE_PROCESS_UDATA_ATTRIBUTE(byte_size) DEFINE_PROCESS_UDATA_ATTRIBUTE(encoding) +DEFINE_PROCESS_UDATA_ATTRIBUTE(data_bit_offset) +DEFINE_PROCESS_UDATA_ATTRIBUTE(data_member_location) +DEFINE_PROCESS_UDATA_ATTRIBUTE(discr_value) /* Match functions -- die_match_callback_t */ #define DEFINE_MATCH(type) \ @@ -218,7 +223,9 @@ DEFINE_PROCESS_UDATA_ATTRIBUTE(encoding) return dwarf_tag(die) == DW_TAG_##type##_type; \ } +DEFINE_MATCH(enumerator) DEFINE_MATCH(formal_parameter) +DEFINE_MATCH(member) DEFINE_MATCH(subrange) bool match_all(Dwarf_Die *die) @@ -297,6 +304,10 @@ static void __process_list_type(struct state *state, struct die *cache, process(cache, " "); process(cache, name); } + process_accessibility_attr(cache, die); + process_bit_size_attr(cache, die); + process_data_bit_offset_attr(cache, die); + process_data_member_location_attr(cache, die); } #define DEFINE_PROCESS_LIST_TYPE(type) \ @@ -307,6 +318,7 @@ static void __process_list_type(struct state *state, struct die *cache, } DEFINE_PROCESS_LIST_TYPE(formal_parameter) +DEFINE_PROCESS_LIST_TYPE(member) /* Container types with DW_AT_type */ static void __process_type(struct state *state, struct die *cache, @@ -339,6 +351,7 @@ DEFINE_PROCESS_TYPE(reference) DEFINE_PROCESS_TYPE(restrict) DEFINE_PROCESS_TYPE(rvalue_reference) DEFINE_PROCESS_TYPE(shared) +DEFINE_PROCESS_TYPE(template_type_parameter) DEFINE_PROCESS_TYPE(volatile) DEFINE_PROCESS_TYPE(typedef) @@ -392,6 +405,107 @@ static void process_subroutine_type(struct state *state, struct die *cache, __process_subroutine_type(state, cache, die, "subroutine_type"); } +static void process_variant_type(struct state *state, struct die *cache, +Dwarf_Die *die) +{ + process_list_comma(state, cache); + process(cache, "variant {"); + process_linebreak(cache, 1); + check(process_die_container(state, cache, die, process_type, + match_member_type)); + process_linebreak(cache, -1); + process(cache, "}"); + process_discr_value_attr(cache, die); +} + +static void process_variant_part_type(struct state *state, struct die *cache, + Dwarf_Die *die) +{ + process_list_comma(state, cache); + process(cache, "variant_part {"); + process_linebreak(cache, 1); + check(process_die_container(state, cache, die, process_type, + match_all)); + process_linebreak(cache, -1); + process(cache, "}"); +} + +static int ___process_structure_type(struct state *state, struct die *cache, +Dwarf_Die *die) +{ + switch (dwarf_tag(die)) { + case DW_TAG_member: + case DW_TAG_variant_part: + return check(process_type(state, cache, die)); + case DW_TAG_class_type: + case DW_TAG_enumeration_type: + case DW_TAG_structure_type: + case DW_TAG_template_type_parameter: + case DW_TAG_union_type: + case DW_TAG_subprogram: + /* Skip non-member types, including member functions */ + return 0; + default: + error("unexpected structure_type child: %x", dwarf_tag(die)); + } +} + +static void __process_structure_type(struct state *state, struct die *cache, +Dwarf_Die *die, const char *type, +die_callback_t process_func, +die_match_callback_t match_func) +{ +
[PATCH v4 07/19] gendwarfksyms: Expand subroutine_type
Add support for expanding DW_TAG_subroutine_type and the parameters in DW_TAG_formal_parameter. Use this to also expand subprograms. Example output with --dump-dies: subprogram ( formal_parameter pointer_type { const_type { base_type char byte_size(1) encoding(6) } } ) -> base_type unsigned long byte_size(8) encoding(7) Signed-off-by: Sami Tolvanen Acked-by: Neal Gompa Reviewed-by: Petr Pavlu --- scripts/gendwarfksyms/dwarf.c | 84 ++- scripts/gendwarfksyms/gendwarfksyms.h | 4 ++ 2 files changed, 85 insertions(+), 3 deletions(-) diff --git a/scripts/gendwarfksyms/dwarf.c b/scripts/gendwarfksyms/dwarf.c index 1d67ee18a388..7e6b477d7c12 100644 --- a/scripts/gendwarfksyms/dwarf.c +++ b/scripts/gendwarfksyms/dwarf.c @@ -211,6 +211,15 @@ DEFINE_PROCESS_UDATA_ATTRIBUTE(alignment) DEFINE_PROCESS_UDATA_ATTRIBUTE(byte_size) DEFINE_PROCESS_UDATA_ATTRIBUTE(encoding) +/* Match functions -- die_match_callback_t */ +#define DEFINE_MATCH(type) \ + static bool match_##type##_type(Dwarf_Die *die)\ + { \ + return dwarf_tag(die) == DW_TAG_##type##_type; \ + } + +DEFINE_MATCH(formal_parameter) + bool match_all(Dwarf_Die *die) { return true; @@ -223,19 +232,28 @@ int process_die_container(struct state *state, struct die *cache, Dwarf_Die current; int res; + /* Track the first item in lists. */ + if (state) + state->first_list_item = true; + res = checkp(dwarf_child(die, ¤t)); while (!res) { if (match(¤t)) { /* <0 = error, 0 = continue, >0 = stop */ res = checkp(func(state, cache, ¤t)); if (res) - return res; + goto out; } res = checkp(dwarf_siblingof(¤t, ¤t)); } - return 0; + res = 0; +out: + if (state) + state->first_list_item = false; + + return res; } static int process_type(struct state *state, struct die *parent, @@ -255,6 +273,40 @@ static void process_type_attr(struct state *state, struct die *cache, process(cache, "base_type void"); } +static void process_list_comma(struct state *state, struct die *cache) +{ + if (state->first_list_item) { + state->first_list_item = false; + } else { + process(cache, " ,"); + process_linebreak(cache, 0); + } +} + +/* Comma-separated with DW_AT_type */ +static void __process_list_type(struct state *state, struct die *cache, + Dwarf_Die *die, const char *type) +{ + const char *name = get_name_attr(die); + + process_list_comma(state, cache); + process(cache, type); + process_type_attr(state, cache, die); + if (name) { + process(cache, " "); + process(cache, name); + } +} + +#define DEFINE_PROCESS_LIST_TYPE(type) \ + static void process_##type##_type(struct state *state, \ + struct die *cache, Dwarf_Die *die) \ + {\ + __process_list_type(state, cache, die, #type " "); \ + } + +DEFINE_PROCESS_LIST_TYPE(formal_parameter) + /* Container types with DW_AT_type */ static void __process_type(struct state *state, struct die *cache, Dwarf_Die *die, const char *type) @@ -289,6 +341,29 @@ DEFINE_PROCESS_TYPE(shared) DEFINE_PROCESS_TYPE(volatile) DEFINE_PROCESS_TYPE(typedef) +static void __process_subroutine_type(struct state *state, struct die *cache, + Dwarf_Die *die, const char *type) +{ + process(cache, type); + process(cache, " ("); + process_linebreak(cache, 1); + /* Parameters */ + check(process_die_container(state, cache, die, process_type, + match_formal_parameter_type)); + process_linebreak(cache, -1); + process(cache, ")"); + process_linebreak(cache, 0); + /* Return type */ + process(cache, "-> "); + process_type_attr(state, cache, die); +} + +static void process_subroutine_type(struct state *state, struct die *cache, + Dwarf_Die *die) +{ + __process_subroutine_type(state, cache, die, "subroutine_type"); +} + static void process_base_type(struct state *state, struct die *cache, Dwarf_Die *die) { @@ -359,8 +434,11 @@ static int process_type(struct state *state, struct die *parent, Dwarf_Die *die) PROCESS_TYPE(rvalue_reference) PROCESS_TYPE(shared) PROCESS_TYPE(volat
Re: [PATCH v2] selftests: vDSO: Explicitly include sched.h
On 10/7/24 20:33, Yu Liao wrote: The previous commit introduced the use of CLONE_NEWTIME without including which contains its definition. Add an explicit include of to ensure that CLONE_NEWTIME is correctly defined before it is used. Fixes: 2aec90036dcd ("selftests: vDSO: ensure vgetrandom works in a time namespace") Signed-off-by: Yu Liao --- Changes in v2: - Include instead of v1: https://lore.kernel.org/all/20240919111841.20226-1-liaoy...@huawei.com/ tools/testing/selftests/vDSO/vdso_test_getrandom.c | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/testing/selftests/vDSO/vdso_test_getrandom.c b/tools/testing/selftests/vDSO/vdso_test_getrandom.c index 72a1d9b43a84..ddf37e3ab18b 100644 --- a/tools/testing/selftests/vDSO/vdso_test_getrandom.c +++ b/tools/testing/selftests/vDSO/vdso_test_getrandom.c @@ -11,6 +11,7 @@ #include #include #include +#include #include #include #include Thank you. Applied to linux-kselftest fixes for next rc. thanks, -- Shuah
Re: [PATCH net-next v20 01/14] mm: page_frag: add a test module for page_frag
On 10/8/24 05:20, Yunsheng Lin wrote: The testing is done by ensuring that the fragment allocated from a frag_frag_cache instance is pushed into a ptr_ring instance in a kthread binded to a specified cpu, and a kthread binded to a specified cpu will pop the fragment from the ptr_ring and free the fragment. CC: Alexander Duyck Signed-off-by: Yunsheng Lin Reviewed-by: Alexander Duyck Signed-off-by should be last. Same comment on all the other patches in this series. When you have 4 patches, it is a good practice to add cover-letter. --- tools/testing/selftests/mm/Makefile | 3 + tools/testing/selftests/mm/page_frag/Makefile | 18 ++ .../selftests/mm/page_frag/page_frag_test.c | 173 ++ tools/testing/selftests/mm/run_vmtests.sh | 8 + tools/testing/selftests/mm/test_page_frag.sh | 171 + 5 files changed, 373 insertions(+) create mode 100644 tools/testing/selftests/mm/page_frag/Makefile create mode 100644 tools/testing/selftests/mm/page_frag/page_frag_test.c create mode 100755 tools/testing/selftests/mm/test_page_frag.sh diff --git a/tools/testing/selftests/mm/Makefile b/tools/testing/selftests/mm/Makefile index 02e1204971b0..acec529baaca 100644 --- a/tools/testing/selftests/mm/Makefile +++ b/tools/testing/selftests/mm/Makefile @@ -36,6 +36,8 @@ MAKEFLAGS += --no-builtin-rules CFLAGS = -Wall -I $(top_srcdir) $(EXTRA_CFLAGS) $(KHDR_INCLUDES) $(TOOLS_INCLUDES) LDLIBS = -lrt -lpthread -lm +TEST_GEN_MODS_DIR := page_frag + TEST_GEN_FILES = cow TEST_GEN_FILES += compaction_test TEST_GEN_FILES += gup_longterm @@ -126,6 +128,7 @@ TEST_FILES += test_hmm.sh TEST_FILES += va_high_addr_switch.sh TEST_FILES += charge_reserved_hugetlb.sh TEST_FILES += hugetlb_reparenting_test.sh +TEST_FILES += test_page_frag.sh # required by charge_reserved_hugetlb.sh TEST_FILES += write_hugetlb_memory.sh diff --git a/tools/testing/selftests/mm/page_frag/Makefile b/tools/testing/selftests/mm/page_frag/Makefile new file mode 100644 index ..58dda74d50a3 --- /dev/null +++ b/tools/testing/selftests/mm/page_frag/Makefile @@ -0,0 +1,18 @@ +PAGE_FRAG_TEST_DIR := $(realpath $(dir $(abspath $(lastword $(MAKEFILE_LIST) +KDIR ?= $(abspath $(PAGE_FRAG_TEST_DIR)/../../../../..) + +ifeq ($(V),1) +Q = +else +Q = @ +endif + +MODULES = page_frag_test.ko + +obj-m += page_frag_test.o + +all: + +$(Q)make -C $(KDIR) M=$(PAGE_FRAG_TEST_DIR) modules + +clean: + +$(Q)make -C $(KDIR) M=$(PAGE_FRAG_TEST_DIR) clean diff --git a/tools/testing/selftests/mm/page_frag/page_frag_test.c b/tools/testing/selftests/mm/page_frag/page_frag_test.c new file mode 100644 index ..eeb2b6bc681a --- /dev/null +++ b/tools/testing/selftests/mm/page_frag/page_frag_test.c @@ -0,0 +1,173 @@ +// SPDX-License-Identifier: GPL-2.0 I think this would throw a checkpatch warning about comment should be "/*" and not "//" + +/* + * Test module for page_frag cache + * + * Copyright (C) 2024 Yunsheng Lin + */ + +#include +#include +#include +#include +#include +#include + +static struct ptr_ring ptr_ring; +static int nr_objs = 512; +static atomic_t nthreads; +static struct completion wait; +static struct page_frag_cache test_nc; +static int test_popped; +static int test_pushed; + +static int nr_test = 200; +module_param(nr_test, int, 0); +MODULE_PARM_DESC(nr_test, "number of iterations to test"); + +static bool test_align; +module_param(test_align, bool, 0); +MODULE_PARM_DESC(test_align, "use align API for testing"); + +static int test_alloc_len = 2048; +module_param(test_alloc_len, int, 0); +MODULE_PARM_DESC(test_alloc_len, "alloc len for testing"); + +static int test_push_cpu; +module_param(test_push_cpu, int, 0); +MODULE_PARM_DESC(test_push_cpu, "test cpu for pushing fragment"); + +static int test_pop_cpu; +module_param(test_pop_cpu, int, 0); +MODULE_PARM_DESC(test_pop_cpu, "test cpu for popping fragment"); + +static int page_frag_pop_thread(void *arg) +{ + struct ptr_ring *ring = arg; + + pr_info("page_frag pop test thread begins on cpu %d\n", + smp_processor_id()); + + while (test_popped < nr_test) { + void *obj = __ptr_ring_consume(ring); + + if (obj) { + test_popped++; + page_frag_free(obj); + } else { + cond_resched(); + } + } + + if (atomic_dec_and_test(&nthreads)) + complete(&wait); + + pr_info("page_frag pop test thread exits on cpu %d\n", + smp_processor_id()); + + return 0; +} + +static int page_frag_push_thread(void *arg) +{ + struct ptr_ring *ring = arg; + + pr_info("page_frag push test thread begins on cpu %d\n", + smp_processor_id()); + + while (test_pushed < nr_test) { + void *va; + int ret; + + if (test_align) { + va = page
Re: [RFC PATCH 30/39] KVM: guest_memfd: Handle folio preparation for guest_memfd mmap
On Tue, Oct 08, 2024, Ackerley Tng wrote: > Patrick Roy writes: > > For the "non-CoCo with direct map entries removed" VMs that we at AWS > > are going for, we'd like a VM type with host-controlled in-place > > conversions which doesn't zero on transitions, Hmm, your use case shouldn't need conversions _for KVM_, as there's no need for KVM to care if userspace or the guest _wants_ a page to be shared vs. private. Userspace is fully trusted to manage things; KVM simply reacts to the current state of things. And more importantly, whether or not the direct map is zapped needs to be a property of the guest_memfd inode, i.e. can't be associated with a struct kvm. I forget who got volunteered to do the work, but we're going to need similar functionality for tracking the state of individual pages in a huge folio, as folio_mark_uptodate() is too coarse-grained. I.e. at some point, I expect that guest_memfd will make it easy-ish to determine whether or not the direct map has been obliterated. The shared vs. private attributes tracking in KVM is still needed (I think), as it communicates what userspace _wants_, whereas he guest_memfd machinery will track what the state _is_. > > so if KVM_X86_SW_PROTECTED_VM ends up zeroing, we'd need to add another new > > VM type for that. Maybe we should sneak in a s/KVM_X86_SW_PROTECTED_VM/KVM_X86_SW_HARDENED_VM rename? The original thought behind "software protected VM" was to do a slow build of something akin to pKVM, but realistically I don't think that idea is going anywhere. Alternatively, depending on how KVM accesses guest memory that's been removed from the direct map, another solution would be to allow "regular" VMs to bind memslots to guest_memfd, i.e. if the non-CoCo use case needs/wnats to bind all memory to guest_memfd, not just "private" mappings. That's probably the biggest topic of discussion: how do we want to allow mapping guest_memfd into the guest, without direct map entries, but while still allowing KVM to access guest memory as needed, e.g. for shadow paging. One approach is your RFC, where KVM maps guest_memfd pfns on-demand. Another (slightly crazy) approach would be use protection keys to provide the security properties that you want, while giving KVM (and userspace) a quick-and-easy override to access guest memory. 1. mmap() guest_memfd into userpace with RW protections 2. Configure PKRU to make guest_memfd memory inaccessible by default 3. Swizzle PKRU on-demand when intentionally accessing guest memory It's essentially the same idea as SMAP+STAC/CLAC, just applied to guest memory instead of to usersepace memory. The benefit of the PKRU approach is that there are no PTE modifications, and thus no TLB flushes, and only the CPU that is access guest memory gains temporary access. The big downside is that it would be limited to modern hardware, but that might be acceptable, especially if it simplifies KVM's implementation. > > Somewhat related sidenote: For VMs that allow inplace conversions and do > > not zero, we do not need to zap the stage-2 mappings on memory attribute > > changes, right? See above. I don't think conversions by toggling the shared/private flag in KVM's memory attributes is the right fit for your use case.
Re: [RFC PATCH v3 4/4] sched+mm: Use hazard pointers to track lazy active mm existence
On Tue, Oct 8, 2024 at 9:52 AM Mathieu Desnoyers wrote: > > Replace lazy active mm existence tracking with hazard pointers. This > removes the following implementations and their associated config > options: > > - MMU_LAZY_TLB_REFCOUNT > - MMU_LAZY_TLB_SHOOTDOWN > - This removes the call_rcu delayed mm drop for RT. > > It leverages the fact that each CPU only ever have at most one single > lazy active mm. This makes it a very good fit for a hazard pointer > domain implemented with one hazard pointer slot per CPU. > > -static void cleanup_lazy_tlbs(struct mm_struct *mm) > +static void remove_lazy_mm_hp(int cpu, struct hazptr_slot *slot, void *addr) > { > - if (!IS_ENABLED(CONFIG_MMU_LAZY_TLB_SHOOTDOWN)) { > - /* > -* In this case, lazy tlb mms are refounted and would not > reach > -* __mmdrop until all CPUs have switched away and mmdrop()ed. > -*/ > - return; > - } > + smp_call_function_single(cpu, do_shoot_lazy_tlb, addr, 1); > + smp_call_function_single(cpu, do_check_lazy_tlb, addr, 1); > +} > > +static void cleanup_lazy_tlbs(struct mm_struct *mm) > +{ [...] > - on_each_cpu_mask(mm_cpumask(mm), do_shoot_lazy_tlb, (void *)mm, 1); > - if (IS_ENABLED(CONFIG_DEBUG_VM_SHOOT_LAZIES)) > - on_each_cpu(do_check_lazy_tlb, (void *)mm, 1); > + hazptr_scan(&hazptr_domain_sched_lazy_mm, mm, remove_lazy_mm_hp); Hey Mathieu, Take comments with a grain of salt because I am digging into active_mm after a while. It seems to me IMO this seems a strange hazard pointer callback usecase. Because "scan" here immediately retires even though the reader has a "reference". Here it is more like, the callback is forcing all other readers holding a "reference" to switch immediately whether they like it or not and not wait until _they_ release the reference. There is no such precedent in RCU for instance, where a callback never runs before a reader even finishes. That might work for active_mm, but it sounds like a fringe usecase to me that it might probably be better to just force CONFIG_MMU_LAZY_TLB_SHOOTDOWN=y for everyone and use on_each_cpu() instead. That will give the same code simplification for this patch without requiring hazard pointers AFAICS? Or maybe start with that, and _then_ convert to HP if it makes sense to? These are just some thoughts and I am Ok with all the reviewer's consensus. And if I understand correctly, for this usecase - we are not even grabbing a "true reference" to the mm_struct object because direct access to mm_struct should require a proper mmgrab(), not a lazy_tlb flavored one? -- correct me if I'm wrong though. Also, isn't it that on x86, now with this patch there will be more IPIs, whereas previously the global refcount was not requiring that as the last kthread switching out would no longer access the old mm? Might it be worth checking the performance of fork/exit and if that scales? thanks, - Joel
Re: [PATCH v3] selftests: sched_ext: Add sched_ext as proper selftest target
On 10/8/24 09:35, Björn Töpel wrote: From: Björn Töpel The sched_ext selftests is missing proper cross-compilation support, a proper target entry, and out-of-tree build support. When building the kselftest suite, e.g.: make ARCH=riscv CROSS_COMPILE=riscv64-linux-gnu- \ TARGETS=sched_ext SKIP_TARGETS="" O=/output/foo \ -C tools/testing/selftests install or: make ARCH=arm64 LLVM=1 TARGETS=sched_ext SKIP_TARGETS="" \ O=/output/foo -C tools/testing/selftests install The expectation is that the sched_ext is included, cross-built, the correct toolchain is picked up, and placed into /output/foo. In contrast to the BPF selftests, the sched_ext suite does not use bpftool at test run-time, so it is sufficient to build bpftool for the build host only. Add ARCH, CROSS_COMPILE, OUTPUT, and TARGETS support to the sched_ext selftest. Also, remove some variables that were unused by the Makefile. Thank you for adding this to change log. Maybe we need to add this to kselftest.rst at some point. Signed-off-by: Björn Töpel --- v3: * Always build a build host version of bpftool (Mark) * Make sure LLVM style "ARCH only" cross-build works (Mark) v2: * Removed the duplicated LLVM prefix parsing (David) * Made sure make clean didn't do a complete mess (David) * Added sched_ext to default skip (Shuah) Thank you. Reviewed-by: Shuah Khan thanks, -- Shuah
Re: [PATCH v2 5/8] net: af_can: do not leave a dangling sk pointer in can_create()
On Tue, Oct 8, 2024 at 3:38 AM Vincent MAILHOL wrote: > > Hi Ignat, > > Thanks for the patch. > > On Tue. 8 Oct. 2024 at 06:37, Ignat Korchagin wrote: > > On error can_create() frees the allocated sk object, but sock_init_data() > > has already attached it to the provided sock object. This will leave a > > dangling sk pointer in the sock object and may cause use-after-free later. > > I was about to suggest that this should be backported to stable, but > after reading the cover letter, I now understand that this patch is > more an improvement to avoid false positives on kmemleak & Cie. Maybe > the description could be a bit more nuanced? After patch 1/8 of this > series, it seems that the use-after-free is not possible anymore. If we go with Kuniyuki's suggestion in the cover email to replace the explicit NULL with DEBUG_NET_WARN_ON_ONCE(sock->sk) in __sock_create() then use-after-free would be possible again except we will easily catch it. But I will review the description, when I respin the patches to net-next as requested by Jakub. > > Signed-off-by: Ignat Korchagin > > See above comment as notwithstanding. This said: > > Reviewed-by: Vincent Mailhol Thank you. > > > --- > > net/can/af_can.c | 1 + > > 1 file changed, 1 insertion(+) > > > > diff --git a/net/can/af_can.c b/net/can/af_can.c > > index 707576eeeb58..01f3fbb3b67d 100644 > > --- a/net/can/af_can.c > > +++ b/net/can/af_can.c > > @@ -171,6 +171,7 @@ static int can_create(struct net *net, struct socket > > *sock, int protocol, > > /* release sk on errors */ > > sock_orphan(sk); > > sock_put(sk); > > + sock->sk = NULL; > > } > > > > errout: > > -- > > 2.39.5 > > > >
[RESEND PATCH v4] list: test: Check the size of every lists for list_cut_position*()
Check the total number of elements in both resultant lists are correct within list_cut_position*(). Previously, only the first list's size was checked. wo additional elements in the second list would not have been caught. Signed-off-by: I Hsin Cheng --- change in v4: Amend the description of commit message, make it less confusing and focus on the correct check which is performed now. lib/list-test.c | 4 1 file changed, 4 insertions(+) diff --git a/lib/list-test.c b/lib/list-test.c index 37cbc33e9fdb..b4b3810c71d0 100644 --- a/lib/list-test.c +++ b/lib/list-test.c @@ -408,6 +408,8 @@ static void list_test_list_cut_position(struct kunit *test) KUNIT_EXPECT_PTR_EQ(test, cur, &entries[i]); i++; } + + KUNIT_EXPECT_EQ(test, i, 3); } static void list_test_list_cut_before(struct kunit *test) @@ -436,6 +438,8 @@ static void list_test_list_cut_before(struct kunit *test) KUNIT_EXPECT_PTR_EQ(test, cur, &entries[i]); i++; } + + KUNIT_EXPECT_EQ(test, i, 3); } static void list_test_list_splice(struct kunit *test) -- 2.43.0
[RESEND PATCH v4] list: test: Check the size of every lists for list_cut_position*()
Check the total number of elements in both resultant lists are correct within list_cut_position*(). Previously, only the first list's size was checked. so additional elements in the second list would not have been caught. Signed-off-by: I Hsin Cheng --- change in v4: Amend the description of commit message, make it less confusing and focus on the correct check which is performed now. lib/list-test.c | 4 1 file changed, 4 insertions(+) diff --git a/lib/list-test.c b/lib/list-test.c index 37cbc33e9fdb..b4b3810c71d0 100644 --- a/lib/list-test.c +++ b/lib/list-test.c @@ -408,6 +408,8 @@ static void list_test_list_cut_position(struct kunit *test) KUNIT_EXPECT_PTR_EQ(test, cur, &entries[i]); i++; } + + KUNIT_EXPECT_EQ(test, i, 3); } static void list_test_list_cut_before(struct kunit *test) @@ -436,6 +438,8 @@ static void list_test_list_cut_before(struct kunit *test) KUNIT_EXPECT_PTR_EQ(test, cur, &entries[i]); i++; } + + KUNIT_EXPECT_EQ(test, i, 3); } static void list_test_list_splice(struct kunit *test) -- 2.43.0
Re: [RESEND PATCH v4] list: test: Check the size of every lists for list_cut_position*()
Hi I Hsin, On Tue, Oct 08, 2024 at 02:51:23PM +0800, I Hsin Cheng wrote: > Check the total number of elements in both resultant lists are correct > within list_cut_position*(). Previously, only the first list's size was > checked. wo additional elements in the second list would not have been > caught. > > Signed-off-by: I Hsin Cheng > --- > change in v4: > Amend the description of commit message, make it less confusing > and focus on the correct check which is performed now. > Generally, we don't add the prefix "RESEND" to the subject line of the next version of a patch. "RESEND" is only used for resubmissions of a patch that hasn't been modified in any way from the previous submission. See: https://www.kernel.org/doc/html/v6.11/process/submitting-patches.html#don-t-get-discouraged-or-impatient Regards, Kuan-Wei
Re: [PATCH 1/1] s390/virtio_ccw: fix dma_parm pointer not set up
On Mon, Oct 07 2024, Halil Pasic wrote: > At least since commit 334304ac2bac ("dma-mapping: don't return errors > from dma_set_max_seg_size") setting up device.dma_parms is basically > mandated by the DMA API. As of now Channel (CCW) I/O in general does not > utilize the DMA API, except for virtio. For virtio-ccw however the > common virtio DMA infrastructure is such that most of the DMA stuff > hinges on the virtio parent device, which is a CCW device. > > So lets set up the dma_parms pointer for the CCW parent device and hope > for the best! > > Signed-off-by: Halil Pasic > Fixes: 334304ac2bac ("dma-mapping: don't return errors from > dma_set_max_seg_size") > Reported-by: "Marc Hartmayer" > Closes: https://bugzilla.linux.ibm.com/show_bug.cgi?id=209131 > Reviewed-by: Eric Farman > --- > > In the long run it may make sense to move dma_parms into struct > ccw_device, since layering-wise it is much cleaner. I decided > to put it in virtio_ccw_device because currently it is only used for > virtio. Yes, ccw_device would make more sense as a resting place; no idea what other devices (dasd, QDIO based, ...) would do with it ATM -- I agree that if adding it to virtio_ccw_device get things going again, we should do that and consider the possible generic case later. Reviewed-by: Cornelia Huck [I assume this one can be picked up together with other s390 patches?] > > --- > drivers/s390/virtio/virtio_ccw.c | 4 > 1 file changed, 4 insertions(+) > > diff --git a/drivers/s390/virtio/virtio_ccw.c > b/drivers/s390/virtio/virtio_ccw.c > index 62eca9419ad7..21fa7ac849e5 100644 > --- a/drivers/s390/virtio/virtio_ccw.c > +++ b/drivers/s390/virtio/virtio_ccw.c > @@ -58,6 +58,8 @@ struct virtio_ccw_device { > struct virtio_device vdev; > __u8 config[VIRTIO_CCW_CONFIG_SIZE]; > struct ccw_device *cdev; > + /* we make cdev->dev.dma_parms point to this */ > + struct device_dma_parameters dma_parms; > __u32 curr_io; > int err; > unsigned int revision; /* Transport revision */ > @@ -1303,6 +1305,7 @@ static int virtio_ccw_offline(struct ccw_device *cdev) > unregister_virtio_device(&vcdev->vdev); > spin_lock_irqsave(get_ccwdev_lock(cdev), flags); > dev_set_drvdata(&cdev->dev, NULL); > + cdev->dev.dma_parms = NULL; > spin_unlock_irqrestore(get_ccwdev_lock(cdev), flags); > return 0; > } > @@ -1366,6 +1369,7 @@ static int virtio_ccw_online(struct ccw_device *cdev) > } > vcdev->vdev.dev.parent = &cdev->dev; > vcdev->cdev = cdev; > + cdev->dev.dma_parms = &vcdev->dma_parms; > vcdev->dma_area = ccw_device_dma_zalloc(vcdev->cdev, > sizeof(*vcdev->dma_area), > &vcdev->dma_area_addr); > > base-commit: 87d6aab2389e5ce0197d8257d5f8ee965a67c4cd
[PATCH 1/1] selftests: livepatch: add test cases of stack_order sysfs interface
Add selftest test cases to sysfs attribute 'stack_order'. Signed-off-by: Wardenjohn --- .../testing/selftests/livepatch/test-sysfs.sh | 71 +++ .../selftests/livepatch/test_modules/Makefile | 5 +- .../test_klp_livepatch_noreplace.c| 53 ++ .../test_klp_livepatch_noreplace2.c | 53 ++ .../test_klp_livepatch_noreplace3.c | 53 ++ 5 files changed, 234 insertions(+), 1 deletion(-) create mode 100644 tools/testing/selftests/livepatch/test_modules/test_klp_livepatch_noreplace.c create mode 100644 tools/testing/selftests/livepatch/test_modules/test_klp_livepatch_noreplace2.c create mode 100644 tools/testing/selftests/livepatch/test_modules/test_klp_livepatch_noreplace3.c diff --git a/tools/testing/selftests/livepatch/test-sysfs.sh b/tools/testing/selftests/livepatch/test-sysfs.sh index 05a14f5a7bfb..a086b62fb488 100755 --- a/tools/testing/selftests/livepatch/test-sysfs.sh +++ b/tools/testing/selftests/livepatch/test-sysfs.sh @@ -5,6 +5,9 @@ . $(dirname $0)/functions.sh MOD_LIVEPATCH=test_klp_livepatch +MOD_LIVEPATCH_NOREPLACE=test_klp_livepatch_noreplace +MOD_LIVEPATCH_NOREPLACE2=test_klp_livepatch_noreplace2 +MOD_LIVEPATCH_NOREPLACE3=test_klp_livepatch_noreplace3 setup_config @@ -131,4 +134,72 @@ livepatch: '$MOD_LIVEPATCH': completing unpatching transition livepatch: '$MOD_LIVEPATCH': unpatching complete % rmmod $MOD_LIVEPATCH" +start_test "sysfs test stack_order read" + +load_lp $MOD_LIVEPATCH_NOREPLACE + +check_sysfs_rights "$MOD_LIVEPATCH_NOREPLACE" "stack_order" "-r--r--r--" +check_sysfs_value "$MOD_LIVEPATCH_NOREPLACE" "stack_order" "1" + +load_lp $MOD_LIVEPATCH_NOREPLACE2 + +check_sysfs_rights "$MOD_LIVEPATCH_NOREPLACE2" "stack_order" "-r--r--r--" +check_sysfs_value "$MOD_LIVEPATCH_NOREPLACE2" "stack_order" "2" + +load_lp $MOD_LIVEPATCH_NOREPLACE3 + +check_sysfs_rights "$MOD_LIVEPATCH_NOREPLACE3" "stack_order" "-r--r--r--" +check_sysfs_value "$MOD_LIVEPATCH_NOREPLACE3" "stack_order" "3" + +disable_lp $MOD_LIVEPATCH_NOREPLACE2 +unload_lp $MOD_LIVEPATCH_NOREPLACE2 + +check_sysfs_rights "$MOD_LIVEPATCH_NOREPLACE" "stack_order" "-r--r--r--" +check_sysfs_value "$MOD_LIVEPATCH_NOREPLACE" "stack_order" "1" +check_sysfs_rights "$MOD_LIVEPATCH_NOREPLACE3" "stack_order" "-r--r--r--" +check_sysfs_value "$MOD_LIVEPATCH_NOREPLACE3" "stack_order" "2" + +disable_lp $MOD_LIVEPATCH_NOREPLACE3 +unload_lp $MOD_LIVEPATCH_NOREPLACE3 + +disable_lp $MOD_LIVEPATCH_NOREPLACE +unload_lp $MOD_LIVEPATCH_NOREPLACE + +check_result "% insmod test_modules/$MOD_LIVEPATCH_NOREPLACE.ko +livepatch: enabling patch '$MOD_LIVEPATCH_NOREPLACE' +livepatch: '$MOD_LIVEPATCH_NOREPLACE': initializing patching transition +livepatch: '$MOD_LIVEPATCH_NOREPLACE': starting patching transition +livepatch: '$MOD_LIVEPATCH_NOREPLACE': completing patching transition +livepatch: '$MOD_LIVEPATCH_NOREPLACE': patching complete +% insmod test_modules/$MOD_LIVEPATCH_NOREPLACE2.ko +livepatch: enabling patch '$MOD_LIVEPATCH_NOREPLACE2' +livepatch: '$MOD_LIVEPATCH_NOREPLACE2': initializing patching transition +livepatch: '$MOD_LIVEPATCH_NOREPLACE2': starting patching transition +livepatch: '$MOD_LIVEPATCH_NOREPLACE2': completing patching transition +livepatch: '$MOD_LIVEPATCH_NOREPLACE2': patching complete +% insmod test_modules/$MOD_LIVEPATCH_NOREPLACE3.ko +livepatch: enabling patch '$MOD_LIVEPATCH_NOREPLACE3' +livepatch: '$MOD_LIVEPATCH_NOREPLACE3': initializing patching transition +livepatch: '$MOD_LIVEPATCH_NOREPLACE3': starting patching transition +livepatch: '$MOD_LIVEPATCH_NOREPLACE3': completing patching transition +livepatch: '$MOD_LIVEPATCH_NOREPLACE3': patching complete +% echo 0 > /sys/kernel/livepatch/$MOD_LIVEPATCH_NOREPLACE2/enabled +livepatch: '$MOD_LIVEPATCH_NOREPLACE2': initializing unpatching transition +livepatch: '$MOD_LIVEPATCH_NOREPLACE2': starting unpatching transition +livepatch: '$MOD_LIVEPATCH_NOREPLACE2': completing unpatching transition +livepatch: '$MOD_LIVEPATCH_NOREPLACE2': unpatching complete +% rmmod $MOD_LIVEPATCH_NOREPLACE2 +% echo 0 > /sys/kernel/livepatch/$MOD_LIVEPATCH_NOREPLACE3/enabled +livepatch: '$MOD_LIVEPATCH_NOREPLACE3': initializing unpatching transition +livepatch: '$MOD_LIVEPATCH_NOREPLACE3': starting unpatching transition +livepatch: '$MOD_LIVEPATCH_NOREPLACE3': completing unpatching transition +livepatch: '$MOD_LIVEPATCH_NOREPLACE3': unpatching complete +% rmmod $MOD_LIVEPATCH_NOREPLACE3 +% echo 0 > /sys/kernel/livepatch/$MOD_LIVEPATCH_NOREPLACE/enabled +livepatch: '$MOD_LIVEPATCH_NOREPLACE': initializing unpatching transition +livepatch: '$MOD_LIVEPATCH_NOREPLACE': starting unpatching transition +livepatch: '$MOD_LIVEPATCH_NOREPLACE': completing unpatching transition +livepatch: '$MOD_LIVEPATCH_NOREPLACE': unpatching complete +% rmmod $MOD_LIVEPATCH_NOREPLACE" + exit 0 diff --git a/tools/testing/selftests/livepatch/test_modules/Makefile b/tools/testing/selftests/livepatch/test_modules/Makefile index
Re: [PATCH net-next v8 01/24] netlink: add NLA_POLICY_MAX_LEN macro
On 07/10/24 17:53, Jakub Kicinski wrote: On Mon, 7 Oct 2024 12:04:22 +0200 Antonio Quartulli wrote: Or we could check if len(self.checks) <= 1 early and throw our hands up if there is more, for now? We already perform the same check in the 'else' branch below. It'd be about moving it at the beginning of the function and bail out if true, right? Should I modify this patch and move the check above? I just sent the refactor patch, that seemed easier than explaining ;) Great, thanks :-) -- Antonio Quartulli OpenVPN Inc.
[PATCH 0/1] selftests: livepatch: add test cases of stack_order sysfs
This patch add self test cases to 'stack_order' sysfs interface. Reuse test module of 'test_klp_livepatch'. However, some module in test_module have '.replace' enable. So, I set the replace value of the stack_order test module to false. Regards. Wardenjohn.
Re: [PATCH net-next v8 03/24] ovpn: add basic netlink support
Hi, On 07/10/24 17:32, Jiri Pirko wrote: Wed, Oct 02, 2024 at 11:02:17AM CEST, anto...@openvpn.net wrote: [...] +operations: + list: +- + name: dev-new + attribute-set: ovpn + flags: [ admin-perm ] + doc: Create a new interface of type ovpn + do: +request: + attributes: +- ifname +- mode +reply: + attributes: +- ifname +- ifindex +- + name: dev-del Why you expose new and del here in ovn specific generic netlink iface? Why can't you use the exising RTNL api which is used for creation and destruction of other types of devices? That was my original approach in v1, but it was argued that an ovpn interface needs a userspace program to be configured and used in a meaningful way, therefore it was decided to concentrate all iface mgmt APIs along with the others in the netlink family and to not expose any RTNL ops. However, recently we decided to add a dellink implementation for better integration with network namespaces and to allow the user to wipe a dangling interface. In the future we are planning to also add the possibility to create a "persistent interface", that is an interface created before launching any userspace program and that survives when the latter is stopped. I can guess this functionality may be better suited for RTNL, but I am not sure yet. @Jiri: do you have any particular opinion why we should use RTNL ops and not netlink for creating/destroying interfaces? I feel this is mostly a matter of taste, but maybe there are technical reasons we should consider. Thanks a lot for your contribution. Regards, ip link add [link DEV | parentdev NAME] [ name ] NAME [ txqueuelen PACKETS ] [ address LLADDR ] [ broadcast LLADDR ] [ mtu MTU ] [index IDX ] [ numtxqueues QUEUE_COUNT ] [ numrxqueues QUEUE_COUNT ] [ netns { PID | NETNSNAME | NETNSFILE } ] type TYPE [ ARGS ] ip link delete { DEVICE | dev DEVICE | group DEVGROUP } type TYPE [ ARGS ] Lots of examples of existing types creation is for example here: https://developers.redhat.com/blog/2018/10/22/introduction-to-linux-interfaces-for-virtual-networking + attribute-set: ovpn + flags: [ admin-perm ] + doc: Delete existing interface of type ovpn + do: +pre: ovpn-nl-pre-doit +post: ovpn-nl-post-doit +request: + attributes: +- ifindex [...] -- Antonio Quartulli OpenVPN Inc.
Re: [RFC PATCH 30/39] KVM: guest_memfd: Handle folio preparation for guest_memfd mmap
On 2024-10-08 at 19:56+ Sean Christopherson wrote: > Another (slightly crazy) approach would be use protection keys to provide the > security properties that you want, while giving KVM (and userspace) a > quick-and-easy > override to access guest memory. > > 1. mmap() guest_memfd into userpace with RW protections > 2. Configure PKRU to make guest_memfd memory inaccessible by default > 3. Swizzle PKRU on-demand when intentionally accessing guest memory > > It's essentially the same idea as SMAP+STAC/CLAC, just applied to guest memory > instead of to usersepace memory. > > The benefit of the PKRU approach is that there are no PTE modifications, and > thus > no TLB flushes, and only the CPU that is access guest memory gains temporary > access. The big downside is that it would be limited to modern hardware, but > that might be acceptable, especially if it simplifies KVM's implementation. Yeah this might be worth it if it simplifies significantly. Jenkins et al. showed MPK worked for stopping in-process Spectre V1 [1]. While future hardware bugs are always possible, the host kernel would still offer better protection overall since discovery of additional Spectre approaches and gadgets in the kernel is more likely (I think it's a bigger surface area than hardware-specific MPK transient execution issues). Patrick, we talked about this a couple weeks ago and ended up focusing on within-userspace protection, but I see keys can also be used to stop kernel access like Andrew's project he mentioned during Dave's MPK session at LPC [2]. Andrew, could you share that here? It's not clear to me how reliably the kernel prevents its own access to such pages. I see a few papers that warrant more investigation: "we found multiple interfaces that Linux, by design, provides for accessing process memory that ignore PKU domains on a page." [3] "Though Connor et al. demonstrate that existing MPK protections can be bypassed by using the kernel as a confused deputy, compelling recent work indicates that MPK operations can be made secure." [4] Dave and others, if you're aware of resources clarifying how strong the boundaries are, that would be helpful. Derek [1] https://www.cs.dartmouth.edu/~sws/pubs/jas2020.pdf [2] https://www.youtube.com/watch?v=gEUeMfrNH94&t=1028s [3] https://www.usenix.org/system/files/sec20-connor.pdf [4] https://ics.uci.edu/~dabrowsa/kirth-eurosys22-pkru.pdf
Re: [PATCH net-next v20 01/14] mm: page_frag: add a test module for page_frag
On 2024/10/9 3:56, Shuah Khan wrote: > On 10/8/24 05:20, Yunsheng Lin wrote: >> The testing is done by ensuring that the fragment allocated >> from a frag_frag_cache instance is pushed into a ptr_ring >> instance in a kthread binded to a specified cpu, and a kthread >> binded to a specified cpu will pop the fragment from the >> ptr_ring and free the fragment. >> >> CC: Alexander Duyck >> Signed-off-by: Yunsheng Lin >> Reviewed-by: Alexander Duyck > > Signed-off-by should be last. Same comment on all the other Hi, Shuah I used 'git am' to collect those tag, it seems that is the order the tool applied, and I checking other applied commit, it seems only Signed-off-by from the committer is the last, like the below recent mm commit: 6901cf55de22 ff7f5ad7bce4 > patches in this series. When you have 4 patches, it is a good > practice to add cover-letter. I guess the cover-letter meant below? https://lore.kernel.org/all/20241008112049.2279307-1-linyunsh...@huawei.com/ > >> --- >> tools/testing/selftests/mm/Makefile | 3 + >> tools/testing/selftests/mm/page_frag/Makefile | 18 ++ >> .../selftests/mm/page_frag/page_frag_test.c | 173 ++ >> tools/testing/selftests/mm/run_vmtests.sh | 8 + >> tools/testing/selftests/mm/test_page_frag.sh | 171 + >> 5 files changed, 373 insertions(+) >> create mode 100644 tools/testing/selftests/mm/page_frag/Makefile >> create mode 100644 tools/testing/selftests/mm/page_frag/page_frag_test.c >> create mode 100755 tools/testing/selftests/mm/test_page_frag.sh >> >> diff --git a/tools/testing/selftests/mm/Makefile >> b/tools/testing/selftests/mm/Makefile >> index 02e1204971b0..acec529baaca 100644 >> --- a/tools/testing/selftests/mm/Makefile >> +++ b/tools/testing/selftests/mm/Makefile >> @@ -36,6 +36,8 @@ MAKEFLAGS += --no-builtin-rules >> CFLAGS = -Wall -I $(top_srcdir) $(EXTRA_CFLAGS) $(KHDR_INCLUDES) >> $(TOOLS_INCLUDES) >> LDLIBS = -lrt -lpthread -lm >> +TEST_GEN_MODS_DIR := page_frag >> + >> TEST_GEN_FILES = cow >> TEST_GEN_FILES += compaction_test >> TEST_GEN_FILES += gup_longterm >> @@ -126,6 +128,7 @@ TEST_FILES += test_hmm.sh >> TEST_FILES += va_high_addr_switch.sh >> TEST_FILES += charge_reserved_hugetlb.sh >> TEST_FILES += hugetlb_reparenting_test.sh >> +TEST_FILES += test_page_frag.sh >> # required by charge_reserved_hugetlb.sh >> TEST_FILES += write_hugetlb_memory.sh >> diff --git a/tools/testing/selftests/mm/page_frag/Makefile >> b/tools/testing/selftests/mm/page_frag/Makefile >> new file mode 100644 >> index ..58dda74d50a3 >> --- /dev/null >> +++ b/tools/testing/selftests/mm/page_frag/Makefile >> @@ -0,0 +1,18 @@ >> +PAGE_FRAG_TEST_DIR := $(realpath $(dir $(abspath $(lastword >> $(MAKEFILE_LIST) >> +KDIR ?= $(abspath $(PAGE_FRAG_TEST_DIR)/../../../../..) >> + >> +ifeq ($(V),1) >> +Q = >> +else >> +Q = @ >> +endif >> + >> +MODULES = page_frag_test.ko >> + >> +obj-m += page_frag_test.o >> + >> +all: >> + +$(Q)make -C $(KDIR) M=$(PAGE_FRAG_TEST_DIR) modules >> + >> +clean: >> + +$(Q)make -C $(KDIR) M=$(PAGE_FRAG_TEST_DIR) clean >> diff --git a/tools/testing/selftests/mm/page_frag/page_frag_test.c >> b/tools/testing/selftests/mm/page_frag/page_frag_test.c >> new file mode 100644 >> index ..eeb2b6bc681a >> --- /dev/null >> +++ b/tools/testing/selftests/mm/page_frag/page_frag_test.c >> @@ -0,0 +1,173 @@ >> +// SPDX-License-Identifier: GPL-2.0 > > I think this would throw a checkpatch warning about > comment should be "/*" and not "//" using "git grep 'SPDX-License' mm", "//" seems like a more common case. And I did using './scripts/checkpatch.pl --strict --codespell', and it does not throw a checkpatch warning. >> + >> +/* >> + * Test module for page_frag cache >> + * ... >> +function run_manual_check() >> +{ >> + # >> + # Validate passed parameters. If there is wrong one, >> + # the script exists and does not execute further. >> + # >> + validate_passed_args $@ >> + >> + echo "Run the test with following parameters: $@" > > Is this marker good enough to isolate the test results in the > dmesg? Include the test name in the message. > > >> + insmod $DRIVER $@ > /dev/null 2>&1 >> + echo "Done." > > Is this marker good enough to isolate the test results in the > dmesg? Include the test name in the message. > >> + echo "Check the kernel ring buffer to see the summary." > > Usually the test would run dmesg and filter out the test results > from the dmesg and include them in the test script output. > > You can refer to other tests that do that: powerpc/scripts/hmi.sh > is one example. Thanks, will check that.
[RESEND] [PATCH v6 0/2] Add test to distinguish between thread's signal mask and ucontext_t
This patch series is motivated by the following observation: Raise a signal, jump to signal handler. The ucontext_t structure dumped by kernel to userspace has a uc_sigmask field having the mask of blocked signals. If you run a fresh minimalistic program doing this, this field is empty, even if you block some signals while registering the handler with sigaction(). Here is what the man-pages have to say: sigaction(2): "sa_mask specifies a mask of signals which should be blocked (i.e., added to the signal mask of the thread in which the signal handler is invoked) during execution of the signal handler. In addition, the signal which triggered the handler will be blocked, unless the SA_NODEFER flag is used." signal(7): Under "Execution of signal handlers", (1.3) implies: "The thread's current signal mask is accessible via the ucontext_t object that is pointed to by the third argument of the signal handler." But, (1.4) states: "Any signals specified in act->sa_mask when registering the handler with sigprocmask(2) are added to the thread's signal mask. The signal being delivered is also added to the signal mask, unless SA_NODEFER was specified when registering the handler. These signals are thus blocked while the handler executes." There clearly is no distinction being made in the man pages between "Thread's signal mask" and ucontext_t; this logically should imply that a signal blocked by populating struct sigaction should be visible in ucontext_t. Here is what the kernel code does (for Aarch64): do_signal() -> handle_signal() -> sigmask_to_save(), which returns ¤t->blocked, is passed to setup_rt_frame() -> setup_sigframe() -> __copy_to_user(). Hence, ¤t->blocked is copied to ucontext_t exposed to userspace. Returning back to handle_signal(), signal_setup_done() -> signal_delivered() -> sigorsets() and set_current_blocked() are responsible for using information from struct ksignal ksig, which was populated through the sigaction() system call in kernel/signal.c: copy_from_user(&new_sa.sa, act, sizeof(new_sa.sa)), to update ¤t->blocked; hence, the set of blocked signals for the current thread is updated AFTER the kernel dumps ucontext_t to userspace. Assuming that the above is indeed the intended behaviour, because it semantically makes sense, since the signals blocked using sigaction() remain blocked only till the execution of the handler, and not in the context present before jumping to the handler (but nothing can be confirmed from the man-pages), the series introduces a test for mangling with uc_sigmask. I will send a separate series to fix the man-pages. The proposed selftest has been tested out on Aarch32, Aarch64 and x86_64. v5->v6: - Drop renaming of sas.c - Include the explanation from the cover letter in the changelog for the second patch v4->v5: - Remove a redundant print statement v3->v4: - Allocate sigsets as automatic variables to avoid malloc() v2->v3: - ucontext describes current state -> ucontext describes interrupted context - Add a comment for blockage of USR2 even after return from handler - Describe blockage of signals in a better way v1->v2: - Replace all occurrences of SIGPIPE with SIGSEGV - Fixed a mismatch between code comment and ksft log - Add a testcase: Raise the same signal again; it must not be queued - Remove unneeded , - Give a detailed test description in the comments; also describe the exact meaning of delivered and blocked - Handle errors for all libc functions/syscalls - Mention tests in Makefile and .gitignore in alphabetical order v1: - https://lore.kernel.org/all/20240607122319.768640-1-dev.j...@arm.com/ Dev Jain (2): selftests: Rename sigaltstack to generic signal selftests: Add a test mangling with uc_sigmask tools/testing/selftests/Makefile | 2 +- .../{sigaltstack => signal}/.gitignore| 1 + .../{sigaltstack => signal}/Makefile | 3 +- .../current_stack_pointer.h | 0 .../selftests/signal/mangle_uc_sigmask.c | 184 ++ .../selftests/{sigaltstack => signal}/sas.c | 0 6 files changed, 188 insertions(+), 2 deletions(-) rename tools/testing/selftests/{sigaltstack => signal}/.gitignore (70%) rename tools/testing/selftests/{sigaltstack => signal}/Makefile (56%) rename tools/testing/selftests/{sigaltstack => signal}/current_stack_pointer.h (100%) create mode 100644 tools/testing/selftests/signal/mangle_uc_sigmask.c rename tools/testing/selftests/{sigaltstack => signal}/sas.c (100%) -- 2.30.2
[RESEND] [PATCH v6 1/2] selftests: Rename sigaltstack to generic signal
Rename sigaltstack to generic signal directory, to allow adding more signal tests in the future. Signed-off-by: Dev Jain Reviewed-by: Mark Brown Acked-by: Shuah Khan --- tools/testing/selftests/Makefile| 2 +- tools/testing/selftests/{sigaltstack => signal}/.gitignore | 0 tools/testing/selftests/{sigaltstack => signal}/Makefile| 0 .../selftests/{sigaltstack => signal}/current_stack_pointer.h | 0 tools/testing/selftests/{sigaltstack => signal}/sas.c | 0 5 files changed, 1 insertion(+), 1 deletion(-) rename tools/testing/selftests/{sigaltstack => signal}/.gitignore (100%) rename tools/testing/selftests/{sigaltstack => signal}/Makefile (100%) rename tools/testing/selftests/{sigaltstack => signal}/current_stack_pointer.h (100%) rename tools/testing/selftests/{sigaltstack => signal}/sas.c (100%) diff --git a/tools/testing/selftests/Makefile b/tools/testing/selftests/Makefile index bc8fe9e8f7f2..edbe30fb3304 100644 --- a/tools/testing/selftests/Makefile +++ b/tools/testing/selftests/Makefile @@ -87,7 +87,7 @@ TARGETS += rtc TARGETS += rust TARGETS += seccomp TARGETS += sgx -TARGETS += sigaltstack +TARGETS += signal TARGETS += size TARGETS += sparc64 TARGETS += splice diff --git a/tools/testing/selftests/sigaltstack/.gitignore b/tools/testing/selftests/signal/.gitignore similarity index 100% rename from tools/testing/selftests/sigaltstack/.gitignore rename to tools/testing/selftests/signal/.gitignore diff --git a/tools/testing/selftests/sigaltstack/Makefile b/tools/testing/selftests/signal/Makefile similarity index 100% rename from tools/testing/selftests/sigaltstack/Makefile rename to tools/testing/selftests/signal/Makefile diff --git a/tools/testing/selftests/sigaltstack/current_stack_pointer.h b/tools/testing/selftests/signal/current_stack_pointer.h similarity index 100% rename from tools/testing/selftests/sigaltstack/current_stack_pointer.h rename to tools/testing/selftests/signal/current_stack_pointer.h diff --git a/tools/testing/selftests/sigaltstack/sas.c b/tools/testing/selftests/signal/sas.c similarity index 100% rename from tools/testing/selftests/sigaltstack/sas.c rename to tools/testing/selftests/signal/sas.c -- 2.30.2
[RESEND] [PATCH v6 2/2] selftests: Add a test mangling with uc_sigmask
The test is motivated by the following observation: Raise a signal, jump to signal handler. The ucontext_t structure dumped by kernel to userspace has a uc_sigmask field having the mask of blocked signals. If you run a fresh minimalistic program doing this, this field is empty, even if you block some signals while registering the handler with sigaction(). Here is what the man-pages have to say: sigaction(2): "sa_mask specifies a mask of signals which should be blocked (i.e., added to the signal mask of the thread in which the signal handler is invoked) during execution of the signal handler. In addition, the signal which triggered the handler will be blocked, unless the SA_NODEFER flag is used." signal(7): Under "Execution of signal handlers", (1.3) implies: "The thread's current signal mask is accessible via the ucontext_t object that is pointed to by the third argument of the signal handler." But, (1.4) states: "Any signals specified in act->sa_mask when registering the handler with sigprocmask(2) are added to the thread's signal mask. The signal being delivered is also added to the signal mask, unless SA_NODEFER was specified when registering the handler. These signals are thus blocked while the handler executes." There clearly is no distinction being made in the man pages between "Thread's signal mask" and ucontext_t; this logically should imply that a signal blocked by populating struct sigaction should be visible in ucontext_t. Here is what the kernel code does (for Aarch64): do_signal() -> handle_signal() -> sigmask_to_save(), which returns ¤t->blocked, is passed to setup_rt_frame() -> setup_sigframe() -> __copy_to_user(). Hence, ¤t->blocked is copied to ucontext_t exposed to userspace. Returning back to handle_signal(), signal_setup_done() -> signal_delivered() -> sigorsets() and set_current_blocked() are responsible for using information from struct ksignal ksig, which was populated through the sigaction() system call in kernel/signal.c: copy_from_user(&new_sa.sa, act, sizeof(new_sa.sa)), to update ¤t->blocked; hence, the set of blocked signals for the current thread is updated AFTER the kernel dumps ucontext_t to userspace. Assuming that the above is indeed the intended behaviour, because it semantically makes sense, since the signals blocked using sigaction() remain blocked only till the execution of the handler, and not in the context present before jumping to the handler (but nothing can be confirmed from the man-pages), this patch introduces a test for mangling with uc_sigmask. The test asserts the relation between blocked signal, delivered signal, and ucontext. The ucontext is mangled with, by adding a signal mask to it; on return from the handler, the thread must block the corresponding signal. In the test description, I have also described signal delivery and blockage, for ease of understanding what the test does. Signed-off-by: Dev Jain Reviewed-by: Mark Brown --- tools/testing/selftests/signal/.gitignore | 1 + tools/testing/selftests/signal/Makefile | 3 +- .../selftests/signal/mangle_uc_sigmask.c | 184 ++ 3 files changed, 187 insertions(+), 1 deletion(-) create mode 100644 tools/testing/selftests/signal/mangle_uc_sigmask.c diff --git a/tools/testing/selftests/signal/.gitignore b/tools/testing/selftests/signal/.gitignore index 50a19ace..3f339865a3b6 100644 --- a/tools/testing/selftests/signal/.gitignore +++ b/tools/testing/selftests/signal/.gitignore @@ -1,2 +1,3 @@ # SPDX-License-Identifier: GPL-2.0-only +mangle_uc_sigmask sas diff --git a/tools/testing/selftests/signal/Makefile b/tools/testing/selftests/signal/Makefile index 3e96d5d47036..e0bf7058d19c 100644 --- a/tools/testing/selftests/signal/Makefile +++ b/tools/testing/selftests/signal/Makefile @@ -1,6 +1,7 @@ # SPDX-License-Identifier: GPL-2.0-only CFLAGS = -Wall -TEST_GEN_PROGS = sas +TEST_GEN_PROGS = mangle_uc_sigmask +TEST_GEN_PROGS += sas include ../lib.mk diff --git a/tools/testing/selftests/signal/mangle_uc_sigmask.c b/tools/testing/selftests/signal/mangle_uc_sigmask.c new file mode 100644 index ..b79ab92178a8 --- /dev/null +++ b/tools/testing/selftests/signal/mangle_uc_sigmask.c @@ -0,0 +1,184 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2024 ARM Ltd. + * + * Author: Dev Jain + * + * Test describing a clear distinction between signal states - delivered and + * blocked, and their relation with ucontext. + * + * A process can request blocking of a signal by masking it into its set of + * blocked signals; such a signal, when sent to the process by the kernel, + * will get blocked by the process and it may later unblock it and take an + * action. At that point, the signal will be delivered. + * + * We test the following functionalities of the kernel: + * + * ucontext_t describes the interrupted context of the thread; this implies + * that, in case of registering a handler and catching the corresponding + * signal, that s
[PATCH] selftests: drivers: net: fix name not defined
This fix solves this error, when calling kselftest with targets "drivers/net": File "tools/testing/selftests/net/lib/py/nsim.py", line 64, in __init__ if e.errno == errno.ENOSPC: NameError: name 'errno' is not defined The module errno makes available standard error system symbols. Signed-off-by: Alessandro Zanni --- tools/testing/selftests/net/lib/py/nsim.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/testing/selftests/net/lib/py/nsim.py b/tools/testing/selftests/net/lib/py/nsim.py index f571a8b3139b..1a8cbe9acc48 100644 --- a/tools/testing/selftests/net/lib/py/nsim.py +++ b/tools/testing/selftests/net/lib/py/nsim.py @@ -1,5 +1,6 @@ # SPDX-License-Identifier: GPL-2.0 +import errno import json import os import random -- 2.43.0
[PATCH 1/1] selftests/rseq: Fix mm_cid test failure
Adapt the rseq.c/rseq.h code to follow GNU C library changes introduced by: glibc commit 2e456ccf0c34 ("Linux: Make __rseq_size useful for feature detection (bug 31965)") Without this fix, rseq selftests for mm_cid fail: ./run_param_test.sh Default parameters Running test spinlock Running compare-twice test spinlock Running mm_cid test spinlock Error: cpu id getter unavailable [ This is based on the following branch: https://git.kernel.org/pub/scm/linux/kernel/git/shuah/linux-kselftest.git branch: fixes ] Fixes: 18c2355838e7 ("selftests/rseq: Implement rseq mm_cid field support") Signed-off-by: Mathieu Desnoyers Cc: Peter Zijlstra CC: Boqun Feng CC: "Paul E. McKenney" Cc: Shuah Khan CC: Carlos O'Donell CC: Florian Weimer CC: linux-kselft...@vger.kernel.org CC: sta...@vger.kernel.org --- tools/testing/selftests/rseq/rseq.c | 110 +++- tools/testing/selftests/rseq/rseq.h | 10 +-- 2 files changed, 77 insertions(+), 43 deletions(-) diff --git a/tools/testing/selftests/rseq/rseq.c b/tools/testing/selftests/rseq/rseq.c index 96e812bdf8a4..5b9772cdf265 100644 --- a/tools/testing/selftests/rseq/rseq.c +++ b/tools/testing/selftests/rseq/rseq.c @@ -60,12 +60,6 @@ unsigned int rseq_size = -1U; /* Flags used during rseq registration. */ unsigned int rseq_flags; -/* - * rseq feature size supported by the kernel. 0 if the registration was - * unsuccessful. - */ -unsigned int rseq_feature_size = -1U; - static int rseq_ownership; static int rseq_reg_success; /* At least one rseq registration has succeded. */ @@ -111,6 +105,43 @@ int rseq_available(void) } } +/* The rseq areas need to be at least 32 bytes. */ +static +unsigned int get_rseq_min_alloc_size(void) +{ + unsigned int alloc_size = rseq_size; + + if (alloc_size < ORIG_RSEQ_ALLOC_SIZE) + alloc_size = ORIG_RSEQ_ALLOC_SIZE; + return alloc_size; +} + +/* + * Return the feature size supported by the kernel. + * + * Depending on the value returned by getauxval(AT_RSEQ_FEATURE_SIZE): + * + * 0: Return ORIG_RSEQ_FEATURE_SIZE (20) + * > 0: Return the value from getauxval(AT_RSEQ_FEATURE_SIZE). + * + * It should never return a value below ORIG_RSEQ_FEATURE_SIZE. + */ +static +unsigned int get_rseq_kernel_feature_size(void) +{ + unsigned long auxv_rseq_feature_size, auxv_rseq_align; + + auxv_rseq_align = getauxval(AT_RSEQ_ALIGN); + assert(!auxv_rseq_align || auxv_rseq_align <= RSEQ_THREAD_AREA_ALLOC_SIZE); + + auxv_rseq_feature_size = getauxval(AT_RSEQ_FEATURE_SIZE); + assert(!auxv_rseq_feature_size || auxv_rseq_feature_size <= RSEQ_THREAD_AREA_ALLOC_SIZE); + if (auxv_rseq_feature_size) + return auxv_rseq_feature_size; + else + return ORIG_RSEQ_FEATURE_SIZE; +} + int rseq_register_current_thread(void) { int rc; @@ -119,7 +150,7 @@ int rseq_register_current_thread(void) /* Treat libc's ownership as a successful registration. */ return 0; } - rc = sys_rseq(&__rseq_abi, rseq_size, 0, RSEQ_SIG); + rc = sys_rseq(&__rseq_abi, get_rseq_min_alloc_size(), 0, RSEQ_SIG); if (rc) { if (RSEQ_READ_ONCE(rseq_reg_success)) { /* Incoherent success/failure within process. */ @@ -140,28 +171,12 @@ int rseq_unregister_current_thread(void) /* Treat libc's ownership as a successful unregistration. */ return 0; } - rc = sys_rseq(&__rseq_abi, rseq_size, RSEQ_ABI_FLAG_UNREGISTER, RSEQ_SIG); + rc = sys_rseq(&__rseq_abi, get_rseq_min_alloc_size(), RSEQ_ABI_FLAG_UNREGISTER, RSEQ_SIG); if (rc) return -1; return 0; } -static -unsigned int get_rseq_feature_size(void) -{ - unsigned long auxv_rseq_feature_size, auxv_rseq_align; - - auxv_rseq_align = getauxval(AT_RSEQ_ALIGN); - assert(!auxv_rseq_align || auxv_rseq_align <= RSEQ_THREAD_AREA_ALLOC_SIZE); - - auxv_rseq_feature_size = getauxval(AT_RSEQ_FEATURE_SIZE); - assert(!auxv_rseq_feature_size || auxv_rseq_feature_size <= RSEQ_THREAD_AREA_ALLOC_SIZE); - if (auxv_rseq_feature_size) - return auxv_rseq_feature_size; - else - return ORIG_RSEQ_FEATURE_SIZE; -} - static __attribute__((constructor)) void rseq_init(void) { @@ -178,28 +193,54 @@ void rseq_init(void) } if (libc_rseq_size_p && libc_rseq_offset_p && libc_rseq_flags_p && *libc_rseq_size_p != 0) { + unsigned int libc_rseq_size; + /* rseq registration owned by glibc */ rseq_offset = *libc_rseq_offset_p; - rseq_size = *libc_rseq_size_p; + libc_rseq_size = *libc_rseq_size_p; rseq_flags = *libc_rseq_flags_p; - rseq_feature_size = get_rseq_feature_size(); - if (rseq_feature_size > rseq_size) -
Re: [PATCH] selftest: hid: add the missing tests directory
在 2024/10/9 03:33, Shuah Khan 写道: On 10/8/24 03:31, Yun Lu wrote: Commit 160c826b4dd0 ("selftest: hid: add missing run-hid-tools-tests.sh") has added the run-hid-tools-tests.sh script for it to be installed, but I forgot to add the tests directory together. In fact, the run-hid-tools-tests.sh script uses the scripts in the tests directory to run tests. The tests directory also needs to be added to be installed Include the error you are seeing in here. If running the test case without the tests directory, the error message will like this: cd $KSFT_INSTALL_PATH ./run_kselftest.sh -t hid:hid-core.sh # /usr/lib/python3.11/site-packages/_pytest/config/__init__.py:331: PluggyTeardownRaisedWarning: A plugin raised an exception during an old-style hookwrapper teardown. # Plugin: helpconfig, Hook: pytest_cmdline_parse # UsageError: usage: __main__.py [options] [file_or_dir] [file_or_dir] [...] # __main__.py: error: unrecognized arguments: --udevd # inifile: None # rootdir: /root/linux/kselftest_install/hid Fixes: ffb85d5c9e80 ("selftests: hid: import hid-tools hid-core tests") Cc: sta...@vger.kernel.org Signed-off-by: Yun Lu --- tools/testing/selftests/hid/Makefile | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/testing/selftests/hid/Makefile b/tools/testing/selftests/hid/Makefile index 38ae31bb07b5..662209f5fabc 100644 --- a/tools/testing/selftests/hid/Makefile +++ b/tools/testing/selftests/hid/Makefile @@ -18,6 +18,7 @@ TEST_PROGS += hid-usb_crash.sh TEST_PROGS += hid-wacom.sh TEST_FILES := run-hid-tools-tests.sh +TEST_FILES += tests What about the files if any under the tests directory? The install rule would handle the case, however, did you verify that those are copied as well? Yes, the install rule will copy the entire directory (including all files under the directory), and I have confirmed it. Thanks and best regards. --Yun Lu CXX ?= $(CROSS_COMPILE)g++ thanks, -- Shuah
Re: [PATCH 3/3] rcu: Report callbacks enqueued on offline CPU blind spot
On 10/2/2024 8:30 PM, Frederic Weisbecker wrote: > Le Wed, Oct 02, 2024 at 04:57:38PM +0200, Frederic Weisbecker a écrit : >> Callbacks enqueued after rcutree_report_cpu_dead() fall into RCU barrier >> blind spot. Report any potential misuse. >> >> Reported-by: Paul E. McKenney >> Signed-off-by: Frederic Weisbecker >> --- >> kernel/rcu/tree.c | 3 +++ >> 1 file changed, 3 insertions(+) >> >> diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c >> index a60616e69b66..36070b6bf4a1 100644 >> --- a/kernel/rcu/tree.c >> +++ b/kernel/rcu/tree.c >> @@ -3084,8 +3084,11 @@ __call_rcu_common(struct rcu_head *head, >> rcu_callback_t func, bool lazy_in) >> head->func = func; >> head->next = NULL; >> kasan_record_aux_stack_noalloc(head); >> + >> local_irq_save(flags); >> rdp = this_cpu_ptr(&rcu_data); >> +RCU_LOCKDEP_WARN(rcu_rdp_cpu_online(rdp), "Callback enqueued on offline >> CPU!"); > > This should be !rcu_rdp_cpu_online(rdp) > With this patch series, 600 mins RCU torture overnight testing completed without failures at my end. - Neeraj > Sigh... > >> + >> lazy = lazy_in && !rcu_async_should_hurry(); >> >> /* Add the callback to our list. */ >> -- >> 2.46.0 >> >>
Re: [PATCH v1 04/10] iommufd/viommu: Allow drivers to control vdev_id lifecycle
Sorry for the late reply. Just sat down and started to look at this series. On Thu, Sep 05, 2024 at 03:01:19PM -0300, Jason Gunthorpe wrote: > On Tue, Aug 27, 2024 at 10:02:06AM -0700, Nicolin Chen wrote: > > The iommufd core provides a lookup helper for an IOMMU driver to find a > > device pointer by device's per-viommu virtual ID. Yet a driver may need > > an inverted lookup to find a device's per-viommu virtual ID by a device > > pointer, e.g. when reporting virtual IRQs/events back to the user space. > > In this case, it'd be unsafe for iommufd core to do an inverted lookup, > > as the driver can't track the lifecycle of a viommu object or a vdev_id > > object. > > > > Meanwhile, some HW can even support virtual device ID lookup by its HW- > > accelerated virtualization feature. E.g. Tegra241 CMDQV HW supports to > > execute vanilla guest-issued SMMU commands containing virtual Stream ID > > but requires software to configure a link between virtual Stream ID and > > physical Stream ID via HW registers. So not only the iommufd core needs > > a vdev_id lookup table, drivers will want one too. > > > > Given the two justifications above, it's the best practice to provide a > > a pair of set_vdev_id/unset_vdev_id ops in the viommu ops, so a driver > > can implement them to control a vdev_id's lifecycle, and configure the > > HW properly if required. > > I think the lifecycle rules should be much simpler. > > If a nested domain is attached to a STE/RID/device then the vIOMMU > affiliated with that nested domain is pinned while the STE is in place > > So the driver only need to provide locking around attach changing the > STE's vIOMMU vs async operations translating from a STE to a > vIOMMU. This can be a simple driver lock of some kind, ie a rwlock > across the STE table. I was worried about the async between a vdev link (idev<=>vIOMMU) and an regular attach link (idev<=>nested_domain). Though practically the vdev link wouldn't break until the regular attach link breaks first, it was still not safe from it happening. So, having a master->lock to protect master->vdev could ensure. Now, with the vdev being an object refcounting idev/vIOMMU objs, I think we can simply pin the vdev in iommufd_hw_pagetable_attach to ensure the vdev won't disappear. Then, a driver wouldn't need to worry about that. [1] Meanwhile, a nested_domain pins the vIOMMU object in the iommufd level upon allocation. [2] So, what's missing seems to be the attach between the master->dev and the nested_domain itself [3] idev <--- vdev [1] -> vIOMMU (dev) ---[3]--> nested_domain [2]-^ A lock, protecting the attach(), which is in parallel with iommu core's group->mutex could help I think? > Generally that is how all the async events should work, go from the > STE to the VIOMMU to a iommufd callback to the iommufd event > queue. iommufd will translate the struct device from the driver to an > idev_id (or maybe even a vdevid) the same basic way the PRI code works My first attempt was putting the vdev into the attach_handle that was created for PRI code. Yet later on I found the links were too long and some of them weren't safe (perhaps with the new vdev and those pins mentioned above, it worth another try). So letting the driver hold the vdev itself became much simpler. Thanks Nicolin
Re: [PATCH V5 1/1] livepatch: Add stack_order sysfs attribute
On Tue, Oct 08, 2024 at 09:48:56AM +0800, Wardenjohn wrote: > Add "stack_order" sysfs attribute which holds the order in which a live > patch module was loaded into the system. A user can then determine an > active live patched version of a function. > > cat /sys/kernel/livepatch/livepatch_1/stack_order -> 1 > > means that livepatch_1 is the first live patch applied > > cat /sys/kernel/livepatch/livepatch_module/stack_order -> N > > means that livepatch_module is the Nth live patch applied > > Suggested-by: Petr Mladek > Suggested-by: Miroslav Benes > Suggested-by: Josh Poimboeuf > Signed-off-by: Wardenjohn Acked-by: Josh Poimboeuf -- Josh
Re: [PATCH net-next v8 03/24] ovpn: add basic netlink support
Tue, Oct 08, 2024 at 11:16:01AM CEST, anto...@openvpn.net wrote: >On 08/10/2024 10:58, Jiri Pirko wrote: >> Tue, Oct 08, 2024 at 10:01:40AM CEST, anto...@openvpn.net wrote: >> > Hi, >> > >> > On 07/10/24 17:32, Jiri Pirko wrote: >> > > Wed, Oct 02, 2024 at 11:02:17AM CEST, anto...@openvpn.net wrote: >> > > >> > > [...] >> > > >> > > >> > > > +operations: >> > > > + list: >> > > > +- >> > > > + name: dev-new >> > > > + attribute-set: ovpn >> > > > + flags: [ admin-perm ] >> > > > + doc: Create a new interface of type ovpn >> > > > + do: >> > > > +request: >> > > > + attributes: >> > > > +- ifname >> > > > +- mode >> > > > +reply: >> > > > + attributes: >> > > > +- ifname >> > > > +- ifindex >> > > > +- >> > > > + name: dev-del >> > > >> > > Why you expose new and del here in ovn specific generic netlink iface? >> > > Why can't you use the exising RTNL api which is used for creation and >> > > destruction of other types of devices? >> > >> > That was my original approach in v1, but it was argued that an ovpn >> > interface >> > needs a userspace program to be configured and used in a meaningful way, >> > therefore it was decided to concentrate all iface mgmt APIs along with the >> > others in the netlink family and to not expose any RTNL ops. >> >> Can you please point me to the message id? > > from >Sergey and subsequent replies. >RTNL vs NL topic starts right after the definition of 'ovpn_link_ops' Yeah, does not make sense to me. All devices should implement common rtnl ops, the extra-config, if needed, could be on a separate channel. I don't find Sergey's argumentation valid. > >Recently Kuniyuki commented on this topic as well in: ><20240919055259.17622-1-kun...@amazon.com> >and that is why I added a default dellink implemetation. Having dellink without newlink implemented is just wrong. > >> >> >> > >> > However, recently we decided to add a dellink implementation for better >> > integration with network namespaces and to allow the user to wipe a >> > dangling >> > interface. >> >> Hmm, one more argument to have symmetric add/del impletentation in RTNL >> >> >> > >> > In the future we are planning to also add the possibility to create a >> > "persistent interface", that is an interface created before launching any >> > userspace program and that survives when the latter is stopped. >> > I can guess this functionality may be better suited for RTNL, but I am not >> > sure yet. >> >> That would be quite confusing to have RTNL and genetlink iface to >> add/del device. From what you described above, makes more sent to have >> it just in RTNL > >All in all I tend to agree. > >> >> > >> > @Jiri: do you have any particular opinion why we should use RTNL ops and >> > not >> > netlink for creating/destroying interfaces? I feel this is mostly a matter >> > of >> > taste, but maybe there are technical reasons we should consider. >> >> Well. technically, you can probabaly do both. But it is quite common >> that you can add/delete these kind of devices over RTNL. Lots of >> examples. People are used to it, aligns with existing flows. > >The only counterargument I see is the one brought by Sergey: "the ovpn >interface is not usable after creation, if no openvpn process is running". > >However, allowing to create "persistent interfaces" will define a use-case >for having an ovpn device without any userspace process. > >@Sergey what is your opinion here? I am not sure persistent interfaces were >discussed at the time you brought your point about RTNL vs NL. > > >Regards, > > >> >> > >> > Thanks a lot for your contribution. >> > >> > Regards, >> > >> > >> > > >> > > >> > > ip link add [link DEV | parentdev NAME] [ name ] NAME >> > > [ txqueuelen PACKETS ] >> > > [ address LLADDR ] >> > > [ broadcast LLADDR ] >> > > [ mtu MTU ] [index IDX ] >> > > [ numtxqueues QUEUE_COUNT ] >> > > [ numrxqueues QUEUE_COUNT ] >> > > [ netns { PID | NETNSNAME | NETNSFILE } ] >> > > type TYPE [ ARGS ] >> > > >> > > ip link delete { DEVICE | dev DEVICE | group DEVGROUP } type TYPE [ ARGS >> > > ] >> > > >> > > Lots of examples of existing types creation is for example here: >> > > https://developers.redhat.com/blog/2018/10/22/introduction-to-linux-interfaces-for-virtual-networking >> > > >> > > >> > > >> > > > + attribute-set: ovpn >> > > > + flags: [ admin-perm ] >> > > > + doc: Delete existing interface of type ovpn >> > > > + do: >> > > > +pre: ovpn-nl-pre-doit >> > > > +post: ovpn-nl-post-doit >> > > > +request: >> > > > + attributes: >> > > > +- ifindex >> > > >> > > [...] >> > >> > -- >> > Antonio Quartulli >> > OpenVPN Inc. > >-- >Antonio Quartulli >OpenVPN Inc.
Re: [PATCH v2 6/8] net: ieee802154: do not leave a dangling sk pointer in ieee802154_create()
Hi Ignat, ig...@cloudflare.com wrote on Mon, 7 Oct 2024 22:35:00 +0100: > sock_init_data() attaches the allocated sk object to the provided sock > object. If ieee802154_create() fails later, the allocated sk object is > freed, but the dangling pointer remains in the provided sock object, which > may allow use-after-free. > > Clear the sk pointer in the sock object on error. > > Signed-off-by: Ignat Korchagin Reviewed-by: Miquel Raynal Thanks, Miquèl
[PATCH] selftests: net: rds: fix module not found
This fix solves this error, when calling kselftest with targets "net/rds": selftests: net/rds: test.py Traceback (most recent call last): File "tools/testing/selftests/net/rds/./test.py", line 17, in from lib.py import ip ModuleNotFoundError: No module named 'lib' Signed-off-by: Alessandro Zanni --- tools/testing/selftests/net/rds/test.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tools/testing/selftests/net/rds/test.py b/tools/testing/selftests/net/rds/test.py index e6bb109bcead..112a8059c030 100755 --- a/tools/testing/selftests/net/rds/test.py +++ b/tools/testing/selftests/net/rds/test.py @@ -14,8 +14,9 @@ import sys import atexit from pwd import getpwuid from os import stat -from lib.py import ip +sys.path.append("..") +from lib.py.utils import ip libc = ctypes.cdll.LoadLibrary('libc.so.6') setns = libc.setns -- 2.43.0
[PATCH] vdpa: solidrun: Fix UB bug with devres
In psnet_open_pf_bar() and snet_open_vf_bar() a string later passed to pcim_iomap_regions() is placed on the stack. Neither pcim_iomap_regions() nor the functions it calls copy that string. Should the string later ever be used, this, consequently, causes undefined behavior since the stack frame will by then have disappeared. Fix the bug by allocating the strings on the heap through devm_kasprintf(). Cc: sta...@vger.kernel.org # v6.3 Fixes: 51a8f9d7f587 ("virtio: vdpa: new SolidNET DPU driver.") Reported-by: Christophe JAILLET Closes: https://lore.kernel.org/all/74e9109a-ac59-49e2-9b1d-d825c9c9f...@wanadoo.fr/ Suggested-by: Andy Shevchenko Signed-off-by: Philipp Stanner --- drivers/vdpa/solidrun/snet_main.c | 14 ++ 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/drivers/vdpa/solidrun/snet_main.c b/drivers/vdpa/solidrun/snet_main.c index 99428a04068d..c8b74980dbd1 100644 --- a/drivers/vdpa/solidrun/snet_main.c +++ b/drivers/vdpa/solidrun/snet_main.c @@ -555,7 +555,7 @@ static const struct vdpa_config_ops snet_config_ops = { static int psnet_open_pf_bar(struct pci_dev *pdev, struct psnet *psnet) { - char name[50]; + char *name; int ret, i, mask = 0; /* We don't know which BAR will be used to communicate.. * We will map every bar with len > 0. @@ -573,7 +573,10 @@ static int psnet_open_pf_bar(struct pci_dev *pdev, struct psnet *psnet) return -ENODEV; } - snprintf(name, sizeof(name), "psnet[%s]-bars", pci_name(pdev)); + name = devm_kasprintf(&pdev->dev, GFP_KERNEL, "psnet[%s]-bars", pci_name(pdev)); + if (!name) + return -ENOMEM; + ret = pcim_iomap_regions(pdev, mask, name); if (ret) { SNET_ERR(pdev, "Failed to request and map PCI BARs\n"); @@ -590,10 +593,13 @@ static int psnet_open_pf_bar(struct pci_dev *pdev, struct psnet *psnet) static int snet_open_vf_bar(struct pci_dev *pdev, struct snet *snet) { - char name[50]; + char *name; int ret; - snprintf(name, sizeof(name), "snet[%s]-bar", pci_name(pdev)); + name = devm_kasprintf(&pdev->dev, GFP_KERNEL, "snet[%s]-bars", pci_name(pdev)); + if (!name) + return -ENOMEM; + /* Request and map BAR */ ret = pcim_iomap_regions(pdev, BIT(snet->psnet->cfg.vf_bar), name); if (ret) { -- 2.46.1
[RFC PATCH v3 4/4] sched+mm: Use hazard pointers to track lazy active mm existence
Replace lazy active mm existence tracking with hazard pointers. This removes the following implementations and their associated config options: - MMU_LAZY_TLB_REFCOUNT - MMU_LAZY_TLB_SHOOTDOWN - This removes the call_rcu delayed mm drop for RT. It leverages the fact that each CPU only ever have at most one single lazy active mm. This makes it a very good fit for a hazard pointer domain implemented with one hazard pointer slot per CPU. * Benchmarks: will-it-scale context_switch1_threads nr threads (-t) speedup 1-0.2% 2+0.4% 3+0.2% 6+0.6% 12+0.8% 24+3% 48 +12% 96 +21% 192 +28% 384+4% 768-0.6% Methodology: Each test is the average of 20 iterations. Use median result of 3 test runs. Test hardware: CPU(s): 384 On-line CPU(s) list:0-383 Vendor ID:AuthenticAMD Model name: AMD EPYC 9654 96-Core Processor CPU family: 25 Model:17 Thread(s) per core: 2 Core(s) per socket: 96 Socket(s):2 Stepping: 1 Frequency boost: enabled CPU(s) scaling MHz: 100% CPU max MHz: 3709. CPU min MHz: 400. BogoMIPS: 4799.75 Memory: 768 GB ram. Signed-off-by: Mathieu Desnoyers Cc: Nicholas Piggin Cc: Michael Ellerman Cc: Greg Kroah-Hartman Cc: Sebastian Andrzej Siewior Cc: "Paul E. McKenney" Cc: Will Deacon Cc: Peter Zijlstra Cc: Boqun Feng Cc: Alan Stern Cc: John Stultz Cc: Neeraj Upadhyay Cc: Linus Torvalds Cc: Andrew Morton Cc: Boqun Feng Cc: Frederic Weisbecker Cc: Joel Fernandes Cc: Josh Triplett Cc: Uladzislau Rezki Cc: Steven Rostedt Cc: Lai Jiangshan Cc: Zqiang Cc: Ingo Molnar Cc: Waiman Long Cc: Mark Rutland Cc: Thomas Gleixner Cc: Vlastimil Babka Cc: maged.mich...@gmail.com Cc: Mateusz Guzik Cc: Jonas Oberhauser Cc: r...@vger.kernel.org Cc: linux...@kvack.org Cc: l...@lists.linux.dev --- Documentation/mm/active_mm.rst | 9 ++-- arch/Kconfig | 32 - arch/powerpc/Kconfig | 1 - arch/powerpc/mm/book3s64/radix_tlb.c | 23 +- include/linux/mm_types.h | 3 -- include/linux/sched/mm.h | 68 ++-- kernel/exit.c| 4 +- kernel/fork.c| 47 +-- kernel/sched/sched.h | 8 +--- lib/Kconfig.debug| 10 10 files changed, 45 insertions(+), 160 deletions(-) diff --git a/Documentation/mm/active_mm.rst b/Documentation/mm/active_mm.rst index d096fc091e23..c225cac49c30 100644 --- a/Documentation/mm/active_mm.rst +++ b/Documentation/mm/active_mm.rst @@ -2,11 +2,10 @@ Active MM = -Note, the mm_count refcount may no longer include the "lazy" users -(running tasks with ->active_mm == mm && ->mm == NULL) on kernels -with CONFIG_MMU_LAZY_TLB_REFCOUNT=n. Taking and releasing these lazy -references must be done with mmgrab_lazy_tlb() and mmdrop_lazy_tlb() -helpers, which abstract this config option. +Note, the mm_count refcount no longer include the "lazy" users (running +tasks with ->active_mm == mm && ->mm == NULL) Taking and releasing these +lazy references must be done with mmgrab_lazy_tlb() and mmdrop_lazy_tlb() +helpers, which are implemented with hazard pointers. :: diff --git a/arch/Kconfig b/arch/Kconfig index 975dd22a2dbd..d4261935f8dc 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -475,38 +475,6 @@ config ARCH_WANT_IRQS_OFF_ACTIVATE_MM irqs disabled over activate_mm. Architectures that do IPI based TLB shootdowns should enable this. -# Use normal mm refcounting for MMU_LAZY_TLB kernel thread references. -# MMU_LAZY_TLB_REFCOUNT=n can improve the scalability of context switching -# to/from kernel threads when the same mm is running on a lot of CPUs (a large -# multi-threaded application), by reducing contention on the mm refcount. -# -# This can be disabled if the architecture ensures no CPUs are using an mm as a -# "lazy tlb" beyond its final refcount (i.e., by the time __mmdrop frees the mm -# or its kernel page tables). This could be arranged by arch_exit_mmap(), or -# final exit(2) TLB flush, for example. -# -# To implement this, an arch *must*: -# Ensure the _lazy_tlb variants of mmgrab/mmdrop are used when manipulating -# the lazy tlb reference of a kthread's ->active_mm (non-arch code has been -# converted already). -config MMU_LAZY_TLB_REFCOUNT - def_bool y - depends on !MMU_LAZY_TLB_SHOOTDOWN - -# This option allows MMU_LAZY_TLB_REFCOUNT=n. It ensures no CPUs are using an -# mm as a lazy tlb beyond its last reference count, by shooting down these -# users before the mm is deallocated. __mmdrop() first IPIs all CPUs that may -
[RFC PATCH v3 1/4] compiler.h: Introduce ptr_eq() to preserve address dependency
Compiler CSE and SSA GVN optimizations can cause the address dependency of addresses returned by rcu_dereference to be lost when comparing those pointers with either constants or previously loaded pointers. Introduce ptr_eq() to compare two addresses while preserving the address dependencies for later use of the address. It should be used when comparing an address returned by rcu_dereference(). This is needed to prevent the compiler CSE and SSA GVN optimizations from using @a (or @b) in places where the source refers to @b (or @a) based on the fact that after the comparison, the two are known to be equal, which does not preserve address dependencies and allows the following misordering speculations: - If @b is a constant, the compiler can issue the loads which depend on @a before loading @a. - If @b is a register populated by a prior load, weakly-ordered CPUs can speculate loads which depend on @a before loading @a. The same logic applies with @a and @b swapped. Suggested-by: Linus Torvalds Suggested-by: Boqun Feng Signed-off-by: Mathieu Desnoyers Reviewed-by: Boqun Feng Reviewed-by: Joel Fernandes (Google) Tested-by: Joel Fernandes (Google) Acked-by: "Paul E. McKenney" Acked-by: Alan Stern Cc: Greg Kroah-Hartman Cc: Sebastian Andrzej Siewior Cc: "Paul E. McKenney" Cc: Will Deacon Cc: Peter Zijlstra Cc: Boqun Feng Cc: Alan Stern Cc: John Stultz Cc: Neeraj Upadhyay Cc: Linus Torvalds Cc: Boqun Feng Cc: Frederic Weisbecker Cc: Joel Fernandes Cc: Josh Triplett Cc: Uladzislau Rezki Cc: Steven Rostedt Cc: Lai Jiangshan Cc: Zqiang Cc: Ingo Molnar Cc: Waiman Long Cc: Mark Rutland Cc: Thomas Gleixner Cc: Vlastimil Babka Cc: maged.mich...@gmail.com Cc: Mateusz Guzik Cc: Gary Guo Cc: Jonas Oberhauser Cc: r...@vger.kernel.org Cc: linux...@kvack.org Cc: l...@lists.linux.dev Cc: Nikita Popov Cc: l...@lists.linux.dev --- Changes since v0: - Include feedback from Alan Stern. --- include/linux/compiler.h | 63 1 file changed, 63 insertions(+) diff --git a/include/linux/compiler.h b/include/linux/compiler.h index 2df665fa2964..75a378ae7af1 100644 --- a/include/linux/compiler.h +++ b/include/linux/compiler.h @@ -186,6 +186,69 @@ void ftrace_likely_update(struct ftrace_likely_data *f, int val, __asm__ ("" : "=r" (var) : "0" (var)) #endif +/* + * Compare two addresses while preserving the address dependencies for + * later use of the address. It should be used when comparing an address + * returned by rcu_dereference(). + * + * This is needed to prevent the compiler CSE and SSA GVN optimizations + * from using @a (or @b) in places where the source refers to @b (or @a) + * based on the fact that after the comparison, the two are known to be + * equal, which does not preserve address dependencies and allows the + * following misordering speculations: + * + * - If @b is a constant, the compiler can issue the loads which depend + * on @a before loading @a. + * - If @b is a register populated by a prior load, weakly-ordered + * CPUs can speculate loads which depend on @a before loading @a. + * + * The same logic applies with @a and @b swapped. + * + * Return value: true if pointers are equal, false otherwise. + * + * The compiler barrier() is ineffective at fixing this issue. It does + * not prevent the compiler CSE from losing the address dependency: + * + * int fct_2_volatile_barriers(void) + * { + * int *a, *b; + * + * do { + * a = READ_ONCE(p); + * asm volatile ("" : : : "memory"); + * b = READ_ONCE(p); + * } while (a != b); + * asm volatile ("" : : : "memory"); <-- barrier() + * return *b; + * } + * + * With gcc 14.2 (arm64): + * + * fct_2_volatile_barriers: + * adrpx0, .LANCHOR0 + * add x0, x0, :lo12:.LANCHOR0 + * .L2: + * ldr x1, [x0] <-- x1 populated by first load. + * ldr x2, [x0] + * cmp x1, x2 + * bne .L2 + * ldr w0, [x1] <-- x1 is used for access which should depend on b. + * ret + * + * On weakly-ordered architectures, this lets CPU speculation use the + * result from the first load to speculate "ldr w0, [x1]" before + * "ldr x2, [x0]". + * Based on the RCU documentation, the control dependency does not + * prevent the CPU from speculating loads. + */ +static __always_inline +int ptr_eq(const volatile void *a, const volatile void *b) +{ + OPTIMIZER_HIDE_VAR(a); + OPTIMIZER_HIDE_VAR(b); + return a == b; +} + #define __UNIQUE_ID(prefix) __PASTE(__PASTE(__UNIQUE_ID_, prefix), __COUNTER__) /** -- 2.39.2
[RFC PATCH v3 2/4] Documentation: RCU: Refer to ptr_eq()
Refer to ptr_eq() in the rcu_dereference() documentation. ptr_eq() is a mechanism that preserves address dependencies when comparing pointers, and should be favored when comparing a pointer obtained from rcu_dereference() against another pointer. Signed-off-by: Mathieu Desnoyers Acked-by: Alan Stern Acked-by: Paul E. McKenney Reviewed-by: Joel Fernandes (Google) Cc: Greg Kroah-Hartman Cc: Sebastian Andrzej Siewior Cc: "Paul E. McKenney" Cc: Will Deacon Cc: Peter Zijlstra Cc: Boqun Feng Cc: Alan Stern Cc: John Stultz Cc: Neeraj Upadhyay Cc: Linus Torvalds Cc: Boqun Feng Cc: Frederic Weisbecker Cc: Joel Fernandes Cc: Josh Triplett Cc: Uladzislau Rezki Cc: Steven Rostedt Cc: Lai Jiangshan Cc: Zqiang Cc: Ingo Molnar Cc: Waiman Long Cc: Mark Rutland Cc: Thomas Gleixner Cc: Vlastimil Babka Cc: maged.mich...@gmail.com Cc: Mateusz Guzik Cc: Gary Guo Cc: Jonas Oberhauser Cc: r...@vger.kernel.org Cc: linux...@kvack.org Cc: l...@lists.linux.dev Cc: Nikita Popov Cc: l...@lists.linux.dev --- Changes since v0: - Include feedback from Alan Stern. Changes since v1: - Include feedback from Paul E. McKenney. --- Documentation/RCU/rcu_dereference.rst | 38 +++ 1 file changed, 33 insertions(+), 5 deletions(-) diff --git a/Documentation/RCU/rcu_dereference.rst b/Documentation/RCU/rcu_dereference.rst index 2524dcdadde2..de6175bf430f 100644 --- a/Documentation/RCU/rcu_dereference.rst +++ b/Documentation/RCU/rcu_dereference.rst @@ -104,11 +104,12 @@ readers working properly: after such branches, but can speculate loads, which can again result in misordering bugs. -- Be very careful about comparing pointers obtained from - rcu_dereference() against non-NULL values. As Linus Torvalds - explained, if the two pointers are equal, the compiler could - substitute the pointer you are comparing against for the pointer - obtained from rcu_dereference(). For example:: +- Use operations that preserve address dependencies (such as + "ptr_eq()") to compare pointers obtained from rcu_dereference() + against non-NULL pointers. As Linus Torvalds explained, if the + two pointers are equal, the compiler could substitute the + pointer you are comparing against for the pointer obtained from + rcu_dereference(). For example:: p = rcu_dereference(gp); if (p == &default_struct) @@ -125,6 +126,29 @@ readers working properly: On ARM and Power hardware, the load from "default_struct.a" can now be speculated, such that it might happen before the rcu_dereference(). This could result in bugs due to misordering. + Performing the comparison with "ptr_eq()" ensures the compiler + does not perform such transformation. + + If the comparison is against another pointer, the compiler is + allowed to use either pointer for the following accesses, which + loses the address dependency and allows weakly-ordered + architectures such as ARM and PowerPC to speculate the + address-dependent load before rcu_dereference(). For example:: + + p1 = READ_ONCE(gp); + p2 = rcu_dereference(gp); + if (p1 == p2) /* BUGGY!!! */ + do_default(p2->a); + + The compiler can use p1->a rather than p2->a, destroying the + address dependency. Performing the comparison with "ptr_eq()" + ensures the compiler preserves the address dependencies. + Corrected code:: + + p1 = READ_ONCE(gp); + p2 = rcu_dereference(gp); + if (ptr_eq(p1, p2)) + do_default(p2->a); However, comparisons are OK in the following cases: @@ -204,6 +228,10 @@ readers working properly: comparison will provide exactly the information that the compiler needs to deduce the value of the pointer. + When in doubt, use operations that preserve address dependencies + (such as "ptr_eq()") to compare pointers obtained from + rcu_dereference() against non-NULL pointers. + - Disable any value-speculation optimizations that your compiler might provide, especially if you are making use of feedback-based optimizations that take data collected from prior runs. Such -- 2.39.2
[RFC PATCH v3 0/4] sched+mm: Track lazy active mm existence with hazard pointers
[ I'm posting a v3 taking care of feedback from Peter Zijlstra and Paul E. McKenney in case it can be useful to try hazard pointers with other use-cases, or for further benchmarking of active mm tracking impact. ] Hazard pointers appear to be a good fit for replacing refcount based lazy active mm tracking. Highlight: will-it-scale context_switch1_threads nr threads (-t) speedup 1-0.2% 2+0.4% 3+0.2% 6+0.6% 12+0.8% 24+3% 48 +12% 96 +21% 192 +28% 384+4% 768-0.6% This series applies on top of v6.11.1. Signed-off-by: Mathieu Desnoyers Cc: Linus Torvalds Cc: Andrew Morton Cc: Peter Zijlstra Cc: Nicholas Piggin Cc: Michael Ellerman Cc: Greg Kroah-Hartman Cc: Sebastian Andrzej Siewior Cc: "Paul E. McKenney" Cc: Will Deacon Cc: Alan Stern Cc: John Stultz Cc: Neeraj Upadhyay Cc: Boqun Feng Cc: Frederic Weisbecker Cc: Joel Fernandes Cc: Josh Triplett Cc: Uladzislau Rezki Cc: Steven Rostedt Cc: Lai Jiangshan Cc: Zqiang Cc: Ingo Molnar Cc: Waiman Long Cc: Mark Rutland Cc: Thomas Gleixner Cc: Vlastimil Babka Cc: maged.mich...@gmail.com Cc: Mateusz Guzik Cc: Jonas Oberhauser Cc: r...@vger.kernel.org Cc: linux...@kvack.org Cc: l...@lists.linux.dev Mathieu Desnoyers (4): compiler.h: Introduce ptr_eq() to preserve address dependency Documentation: RCU: Refer to ptr_eq() hazptr: Implement Hazard Pointers sched+mm: Use hazard pointers to track lazy active mm existence Documentation/RCU/rcu_dereference.rst | 38 +- Documentation/mm/active_mm.rst| 9 +- arch/Kconfig | 32 - arch/powerpc/Kconfig | 1 - arch/powerpc/mm/book3s64/radix_tlb.c | 23 +--- include/linux/compiler.h | 63 ++ include/linux/hazptr.h| 165 ++ include/linux/mm_types.h | 3 - include/linux/sched/mm.h | 68 --- kernel/Makefile | 2 +- kernel/exit.c | 4 +- kernel/fork.c | 47 ++-- kernel/hazptr.c | 51 kernel/sched/sched.h | 8 +- lib/Kconfig.debug | 10 -- 15 files changed, 358 insertions(+), 166 deletions(-) create mode 100644 include/linux/hazptr.h create mode 100644 kernel/hazptr.c -- 2.39.2
[RFC PATCH v3 3/4] hazptr: Implement Hazard Pointers
This API provides existence guarantees of objects through Hazard Pointers (hazptr). This minimalist implementation is specific to use with preemption disabled, but can be extended further as needed. Each hazptr domain defines a fixed number of hazard pointer slots (nr_cpus) across the entire system. Its main benefit over RCU is that it allows fast reclaim of HP-protected pointers without needing to wait for a grace period. It also allows the hazard pointer scan to call a user-defined callback to retire a hazard pointer slot immediately if needed. This callback may, for instance, issue an IPI to the relevant CPU. There are a few possible use-cases for this in the Linux kernel: - Improve performance of mm_count by replacing lazy active mm by hazptr. - Guarantee object existence on pointer dereference to use refcount: - replace locking used for that purpose in some drivers, - replace RCU + inc_not_zero pattern, - rtmutex: Improve situations where locks need to be taken in reverse dependency chain order by guaranteeing existence of first and second locks in traversal order, allowing them to be locked in the correct order (which is reverse from traversal order) rather than try-lock+retry on nested lock. References: [1]: M. M. Michael, "Hazard pointers: safe memory reclamation for lock-free objects," in IEEE Transactions on Parallel and Distributed Systems, vol. 15, no. 6, pp. 491-504, June 2004 Link: https://lore.kernel.org/lkml/j3scdl5iymjlxavomgc6u5ndg3svhab6ga23dr36o4f5mt333w@7xslvq6b6hmv/ Link: https://lpc.events/event/18/contributions/1731/ Signed-off-by: Mathieu Desnoyers Cc: Nicholas Piggin Cc: Michael Ellerman Cc: Greg Kroah-Hartman Cc: Sebastian Andrzej Siewior Cc: "Paul E. McKenney" Cc: Will Deacon Cc: Peter Zijlstra Cc: Boqun Feng Cc: Alan Stern Cc: John Stultz Cc: Neeraj Upadhyay Cc: Linus Torvalds Cc: Andrew Morton Cc: Boqun Feng Cc: Frederic Weisbecker Cc: Joel Fernandes Cc: Josh Triplett Cc: Uladzislau Rezki Cc: Steven Rostedt Cc: Lai Jiangshan Cc: Zqiang Cc: Ingo Molnar Cc: Waiman Long Cc: Mark Rutland Cc: Thomas Gleixner Cc: Vlastimil Babka Cc: maged.mich...@gmail.com Cc: Mateusz Guzik Cc: Jonas Oberhauser Cc: r...@vger.kernel.org Cc: linux...@kvack.org Cc: l...@lists.linux.dev --- Changes since v0: - Remove slot variable from hp_dereference_allocate(). Changes since v2: - Address Peter Zijlstra's comments. - Address Paul E. McKenney's comments. --- include/linux/hazptr.h | 165 + kernel/Makefile| 2 +- kernel/hazptr.c| 51 + 3 files changed, 217 insertions(+), 1 deletion(-) create mode 100644 include/linux/hazptr.h create mode 100644 kernel/hazptr.c diff --git a/include/linux/hazptr.h b/include/linux/hazptr.h new file mode 100644 index ..f8e36d2bdc58 --- /dev/null +++ b/include/linux/hazptr.h @@ -0,0 +1,165 @@ +// SPDX-FileCopyrightText: 2024 Mathieu Desnoyers +// +// SPDX-License-Identifier: LGPL-2.1-or-later + +#ifndef _LINUX_HAZPTR_H +#define _LINUX_HAZPTR_H + +/* + * HP: Hazard Pointers + * + * This API provides existence guarantees of objects through hazard + * pointers. + * + * It uses a fixed number of hazard pointer slots (nr_cpus) across the + * entire system for each hazard pointer domain. + * + * Its main benefit over RCU is that it allows fast reclaim of + * HP-protected pointers without needing to wait for a grace period. + * + * It also allows the hazard pointer scan to call a user-defined callback + * to retire a hazard pointer slot immediately if needed. This callback + * may, for instance, issue an IPI to the relevant CPU. + * + * References: + * + * [1]: M. M. Michael, "Hazard pointers: safe memory reclamation for + * lock-free objects," in IEEE Transactions on Parallel and + * Distributed Systems, vol. 15, no. 6, pp. 491-504, June 2004 + */ + +#include +#include + +/* + * Hazard pointer slot. + */ +struct hazptr_slot { + void *addr; +}; + +struct hazptr_domain { + struct hazptr_slot __percpu *percpu_slots; +}; + +#define DECLARE_HAZPTR_DOMAIN(domain) \ + extern struct hazptr_domain domain + +#define DEFINE_HAZPTR_DOMAIN(domain) \ + static DEFINE_PER_CPU(struct hazptr_slot, __ ## domain ## _slots); \ + struct hazptr_domain domain = { \ + .percpu_slots = &__## domain ## _slots, \ + } + +/* + * hazptr_scan: Scan hazard pointer domain for @addr. + * + * Scan hazard pointer domain for @addr. + * If @on_match_cb is NULL, wait to observe that each slot contains a value + * that differs from @addr. + * If @on_match_cb is non-NULL, invoke @on_match_cb for each slot containing + * @addr. + */ +void hazptr_scan(struct hazptr_domain *domain, void *addr, +void (*on_match_cb)(int cpu, struct hazptr_slot *slot, void *addr)); + +/* + *
Re: [PATCH 1/1] s390/virtio_ccw: fix dma_parm pointer not set up
On Mon, Oct 07, 2024 at 10:10 PM +0200, Halil Pasic wrote: > At least since commit 334304ac2bac ("dma-mapping: don't return errors > from dma_set_max_seg_size") setting up device.dma_parms is basically > mandated by the DMA API. As of now Channel (CCW) I/O in general does not > utilize the DMA API, except for virtio. For virtio-ccw however the > common virtio DMA infrastructure is such that most of the DMA stuff > hinges on the virtio parent device, which is a CCW device. > > So lets set up the dma_parms pointer for the CCW parent device and hope > for the best! > > Signed-off-by: Halil Pasic > Fixes: 334304ac2bac ("dma-mapping: don't return errors from > dma_set_max_seg_size") > Reported-by: "Marc Hartmayer" > Closes: https://bugzilla.linux.ibm.com/show_bug.cgi?id=209131 I guess, this line can be removed as it’s internal only. > Reviewed-by: Eric Farman > --- > > In the long run it may make sense to move dma_parms into struct > ccw_device, since layering-wise it is much cleaner. I decided > to put it in virtio_ccw_device because currently it is only used for > virtio. > > --- […snip…] Thanks for fixing this! Tested-by: Marc Hartmayer -- Kind regards / Beste Grüße Marc Hartmayer IBM Deutschland Research & Development GmbH Vorsitzender des Aufsichtsrats: Wolfgang Wendt Geschäftsführung: David Faller Sitz der Gesellschaft: Böblingen Registergericht: Amtsgericht Stuttgart, HRB 243294
[PATCH net-next v02 0/2] net: af_packet: allow joining a fanout when link is down
PACKET socket can retain its fanout membership through link down and up and leave a fanout while closed regardless of link state. However, socket was forbidden from joining a fanout while it was not RUNNING. This patch allows PACKET socket to join a fanout while not RUNNING. Selftest psock_fanout is extended to test this scenario. This is the only test that was performed. This scenario was identified while studying DPDK pmd_af_packet_drv. Since sockets are only created during initialization, there is no reason to fail the initialization if a single link is temporarily down. I hope it is not considered as breaking user space and that applications are not designed to expect this failure. Changes: V02: * psock_fanout: use explicit loopback up/down instead of toggle. * psock_fanout: don't try to restore loopback state on failure. * Rephrase commit message about "leaving a fanout". V01: https://lore.kernel.org/netdev/cover.1728303615.git.gur.st...@huawei.com/ Gur Stavi (2): af_packet: allow fanout_add when socket is not RUNNING selftests: net/psock_fanout: socket joins fanout when link is down net/packet/af_packet.c | 10 +++--- tools/testing/selftests/net/psock_fanout.c | 42 -- 2 files changed, 44 insertions(+), 8 deletions(-) base-commit: f95b4725e796b12e5f347a0d161e1d3843142aa8 -- 2.45.2
[PATCH net-next v02 2/2] selftests: net/psock_fanout: socket joins fanout when link is down
Modify test_control_group to have toggle parameter. When toggle is non-zero, loopback device will be set down for the initialization of fd[1] which is still expected to successfully join the fanout. Signed-off-by: Gur Stavi --- tools/testing/selftests/net/psock_fanout.c | 42 -- 1 file changed, 39 insertions(+), 3 deletions(-) diff --git a/tools/testing/selftests/net/psock_fanout.c b/tools/testing/selftests/net/psock_fanout.c index 4f31e92ebd96..acdfae8f8a9a 100644 --- a/tools/testing/selftests/net/psock_fanout.c +++ b/tools/testing/selftests/net/psock_fanout.c @@ -48,6 +48,7 @@ #include #include #include +#include #include #include #include @@ -59,6 +60,33 @@ static uint32_t cfg_max_num_members; +static void loopback_set_up_down(int state_up) +{ + struct ifreq ifreq = {}; + int fd, err; + + fd = socket(AF_PACKET, SOCK_RAW, 0); + if (fd < 0) { + perror("socket loopback"); + exit(1); + } + strcpy(ifreq.ifr_name, "lo"); + err = ioctl(fd, SIOCGIFFLAGS, &ifreq); + if (err) { + perror("SIOCGIFFLAGS"); + exit(1); + } + if (state_up != !!(ifreq.ifr_flags & IFF_UP)) { + ifreq.ifr_flags ^= IFF_UP; + err = ioctl(fd, SIOCSIFFLAGS, &ifreq); + if (err) { + perror("SIOCSIFFLAGS"); + exit(1); + } + } + close(fd); +} + /* Open a socket in a given fanout mode. * @return -1 if mode is bad, a valid socket otherwise */ static int sock_fanout_open(uint16_t typeflags, uint16_t group_id) @@ -264,17 +292,22 @@ static void test_control_single(void) } /* Test illegal group with different modes or flags */ -static void test_control_group(void) +static void test_control_group(int toggle) { int fds[2]; - fprintf(stderr, "test: control multiple sockets\n"); + if (toggle) + fprintf(stderr, "test: control multiple sockets with link down toggle\n"); + else + fprintf(stderr, "test: control multiple sockets\n"); fds[0] = sock_fanout_open(PACKET_FANOUT_HASH, 0); if (fds[0] == -1) { fprintf(stderr, "ERROR: failed to open HASH socket\n"); exit(1); } + if (toggle) + loopback_set_up_down(0); if (sock_fanout_open(PACKET_FANOUT_HASH | PACKET_FANOUT_FLAG_DEFRAG, 0) != -1) { fprintf(stderr, "ERROR: joined group with wrong flag defrag\n"); @@ -294,6 +327,8 @@ static void test_control_group(void) fprintf(stderr, "ERROR: failed to join group\n"); exit(1); } + if (toggle) + loopback_set_up_down(1); if (close(fds[1]) || close(fds[0])) { fprintf(stderr, "ERROR: closing sockets\n"); exit(1); @@ -489,7 +524,8 @@ int main(int argc, char **argv) int port_off = 2, tries = 20, ret; test_control_single(); - test_control_group(); + test_control_group(0); + test_control_group(1); test_control_group_max_num_members(); test_unique_fanout_group_ids(); -- 2.45.2
[PATCH net-next v02 1/2] af_packet: allow fanout_add when socket is not RUNNING
PACKET socket can retain its fanout membership through link down and up and leave a fanout while closed regardless of link state. However, socket was forbidden from joining a fanout while it was not RUNNING. This patch allows PACKET socket to join fanout while not RUNNING. Signed-off-by: Gur Stavi --- net/packet/af_packet.c | 10 +- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index f8942062f776..fb2cca73d953 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -1846,21 +1846,21 @@ static int fanout_add(struct sock *sk, struct fanout_args *args) err = -EINVAL; spin_lock(&po->bind_lock); - if (packet_sock_flag(po, PACKET_SOCK_RUNNING) && - match->type == type && + if (match->type == type && match->prot_hook.type == po->prot_hook.type && match->prot_hook.dev == po->prot_hook.dev) { err = -ENOSPC; if (refcount_read(&match->sk_ref) < match->max_num_members) { - __dev_remove_pack(&po->prot_hook); - /* Paired with packet_setsockopt(PACKET_FANOUT_DATA) */ WRITE_ONCE(po->fanout, match); po->rollover = rollover; rollover = NULL; refcount_set(&match->sk_ref, refcount_read(&match->sk_ref) + 1); - __fanout_link(sk, po); + if (packet_sock_flag(po, PACKET_SOCK_RUNNING)) { + __dev_remove_pack(&po->prot_hook); + __fanout_link(sk, po); + } err = 0; } } -- 2.45.2
Re: [PATCH v10 7/7] remoteproc: stm32: Add support of an OP-TEE TA to load the firmware
Hi Arnaud, kernel test robot noticed the following build warnings: [auto build test WARNING on 9852d85ec9d492ebef56dc5f229416c925758edc] url: https://github.com/intel-lab-lkp/linux/commits/Arnaud-Pouliquen/remoteproc-core-Introduce-rproc_pa_to_va-helper/20241007-212358 base: 9852d85ec9d492ebef56dc5f229416c925758edc patch link: https://lore.kernel.org/r/20241007131620.2090104-8-arnaud.pouliquen%40foss.st.com patch subject: [PATCH v10 7/7] remoteproc: stm32: Add support of an OP-TEE TA to load the firmware config: alpha-kismet-CONFIG_REMOTEPROC_TEE-CONFIG_STM32_RPROC-0-0 (https://download.01.org/0day-ci/archive/20241008/202410081902.twqcmwjk-...@intel.com/config) reproduce: (https://download.01.org/0day-ci/archive/20241008/202410081902.twqcmwjk-...@intel.com/reproduce) If you fix the issue in a separate patch/commit (i.e. not just a new version of the same patch/commit), kindly add following tags | Reported-by: kernel test robot | Closes: https://lore.kernel.org/oe-kbuild-all/202410081902.twqcmwjk-...@intel.com/ kismet warnings: (new ones prefixed by >>) >> kismet: WARNING: unmet direct dependencies detected for REMOTEPROC_TEE when >> selected by STM32_RPROC WARNING: unmet direct dependencies detected for REMOTEPROC_TEE Depends on [n]: REMOTEPROC [=y] && OPTEE [=n] Selected by [y]: - STM32_RPROC [=y] && (ARCH_STM32 || COMPILE_TEST [=y]) && REMOTEPROC [=y] -- 0-DAY CI Kernel Test Service https://github.com/intel/lkp-tests/wiki
[PATCH net-next v20 11/14] mm: page_frag: add testing for the newly added prepare API
Add testing for the newly added prepare API, for both aligned and non-aligned API, also probe API is also tested along with prepare API. CC: Alexander Duyck Signed-off-by: Yunsheng Lin --- .../selftests/mm/page_frag/page_frag_test.c | 66 +-- tools/testing/selftests/mm/run_vmtests.sh | 4 ++ tools/testing/selftests/mm/test_page_frag.sh | 31 + 3 files changed, 96 insertions(+), 5 deletions(-) diff --git a/tools/testing/selftests/mm/page_frag/page_frag_test.c b/tools/testing/selftests/mm/page_frag/page_frag_test.c index 36543a129e40..567bcc6a181e 100644 --- a/tools/testing/selftests/mm/page_frag/page_frag_test.c +++ b/tools/testing/selftests/mm/page_frag/page_frag_test.c @@ -29,6 +29,10 @@ static bool test_align; module_param(test_align, bool, 0); MODULE_PARM_DESC(test_align, "use align API for testing"); +static bool test_prepare; +module_param(test_prepare, bool, 0); +MODULE_PARM_DESC(test_prepare, "use prepare API for testing"); + static int test_alloc_len = 2048; module_param(test_alloc_len, int, 0); MODULE_PARM_DESC(test_alloc_len, "alloc len for testing"); @@ -68,6 +72,18 @@ static int page_frag_pop_thread(void *arg) return 0; } +static void frag_frag_test_commit(struct page_frag_cache *nc, + struct page_frag *prepare_pfrag, + struct page_frag *probe_pfrag, + unsigned int used_sz) +{ + WARN_ON_ONCE(prepare_pfrag->page != probe_pfrag->page || +prepare_pfrag->offset != probe_pfrag->offset || +prepare_pfrag->size != probe_pfrag->size); + + page_frag_commit(nc, prepare_pfrag, used_sz); +} + static int page_frag_push_thread(void *arg) { struct ptr_ring *ring = arg; @@ -80,13 +96,52 @@ static int page_frag_push_thread(void *arg) int ret; if (test_align) { - va = page_frag_alloc_align(&test_nc, test_alloc_len, - GFP_KERNEL, SMP_CACHE_BYTES); + if (test_prepare) { + struct page_frag prepare_frag, probe_frag; + void *probe_va; + + va = page_frag_alloc_refill_prepare_align(&test_nc, + test_alloc_len, + &prepare_frag, + GFP_KERNEL, + SMP_CACHE_BYTES); + + probe_va = __page_frag_alloc_refill_probe_align(&test_nc, + test_alloc_len, + &probe_frag, + -SMP_CACHE_BYTES); + WARN_ON_ONCE(va != probe_va); + + if (likely(va)) + frag_frag_test_commit(&test_nc, &prepare_frag, + &probe_frag, test_alloc_len); + } else { + va = page_frag_alloc_align(&test_nc, + test_alloc_len, + GFP_KERNEL, + SMP_CACHE_BYTES); + } WARN_ONCE((unsigned long)va & (SMP_CACHE_BYTES - 1), "unaligned va returned\n"); } else { - va = page_frag_alloc(&test_nc, test_alloc_len, GFP_KERNEL); + if (test_prepare) { + struct page_frag prepare_frag, probe_frag; + void *probe_va; + + va = page_frag_alloc_refill_prepare(&test_nc, test_alloc_len, + &prepare_frag, GFP_KERNEL); + + probe_va = page_frag_alloc_refill_probe(&test_nc, test_alloc_len, + &probe_frag); + + WARN_ON_ONCE(va != probe_va); + if (likely(va)) + frag_frag_test_commit(&test_nc, &prepare_frag, + &probe_frag, test_alloc_len); + } else { + va = page_frag_alloc(&test_nc, test_alloc_len, GFP_KERNEL); + } }
[PATCH bpf] selftests/bpf: add missing header include for htons
Including the network_helpers.h header in tests can lead to the following build error: ./network_helpers.h: In function ‘csum_tcpudp_magic’: ./network_helpers.h:116:14: error: implicit declaration of function \ ‘htons’ [-Werror=implicit-function-declaration] 116 | s += htons(proto + len); The error is avoided in many cases thanks to some other headers included earlier and bringing in arpa/inet.h (ie: test_progs.h). Make sure that test_progs build success does not depend on header ordering by adding the missing header include in network_helpers.h Fixes: f6642de0c3e9 ("selftests/bpf: Add csum helpers") Signed-off-by: Alexis Lothoré (eBPF Foundation) --- tools/testing/selftests/bpf/network_helpers.h | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/testing/selftests/bpf/network_helpers.h b/tools/testing/selftests/bpf/network_helpers.h index c72c16e1aff825439896b38e59962ffafe92dc71..5764155b6d25188ed38e828e1e4a8a08f8a83934 100644 --- a/tools/testing/selftests/bpf/network_helpers.h +++ b/tools/testing/selftests/bpf/network_helpers.h @@ -1,6 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 */ #ifndef __NETWORK_HELPERS_H #define __NETWORK_HELPERS_H +#include #include #include #include --- base-commit: 67a7c7b656cfc10a7280f71641fb9e88726e8a5d change-id: 20241008-network_helpers_fix-bbb7d1589930 Best regards, -- Alexis Lothoré, Bootlin Embedded Linux and Kernel engineering https://bootlin.com
[PATCH] Fix typo in vringh_test.c
Corrected minor typo in tools/virtio/vringh_test.c: - Fixed "retreives" to "retrieves" Signed-off-by: Shivam Chaudhary --- tools/virtio/vringh_test.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/virtio/vringh_test.c b/tools/virtio/vringh_test.c index 43d3a6aa1dcf..b9591223437a 100644 --- a/tools/virtio/vringh_test.c +++ b/tools/virtio/vringh_test.c @@ -519,7 +519,7 @@ int main(int argc, char *argv[]) errx(1, "virtqueue_add_sgs: %i", err); __kmalloc_fake = NULL; - /* Host retreives it. */ + /* Host retrieves it. */ vringh_iov_init(&riov, host_riov, ARRAY_SIZE(host_riov)); vringh_iov_init(&wiov, host_wiov, ARRAY_SIZE(host_wiov)); -- 2.34.1
[PATCH] selftest: hid: add the missing tests directory
Commit 160c826b4dd0 ("selftest: hid: add missing run-hid-tools-tests.sh") has added the run-hid-tools-tests.sh script for it to be installed, but I forgot to add the tests directory together. In fact, the run-hid-tools-tests.sh script uses the scripts in the tests directory to run tests. The tests directory also needs to be added to be installed. Fixes: ffb85d5c9e80 ("selftests: hid: import hid-tools hid-core tests") Cc: sta...@vger.kernel.org Signed-off-by: Yun Lu --- tools/testing/selftests/hid/Makefile | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/testing/selftests/hid/Makefile b/tools/testing/selftests/hid/Makefile index 38ae31bb07b5..662209f5fabc 100644 --- a/tools/testing/selftests/hid/Makefile +++ b/tools/testing/selftests/hid/Makefile @@ -18,6 +18,7 @@ TEST_PROGS += hid-usb_crash.sh TEST_PROGS += hid-wacom.sh TEST_FILES := run-hid-tools-tests.sh +TEST_FILES += tests CXX ?= $(CROSS_COMPILE)g++ -- 2.27.0
Re: [PATCH v10 7/7] remoteproc: stm32: Add support of an OP-TEE TA to load the firmware
Hi Arnaud, kernel test robot noticed the following build errors: [auto build test ERROR on 9852d85ec9d492ebef56dc5f229416c925758edc] url: https://github.com/intel-lab-lkp/linux/commits/Arnaud-Pouliquen/remoteproc-core-Introduce-rproc_pa_to_va-helper/20241007-212358 base: 9852d85ec9d492ebef56dc5f229416c925758edc patch link: https://lore.kernel.org/r/20241007131620.2090104-8-arnaud.pouliquen%40foss.st.com patch subject: [PATCH v10 7/7] remoteproc: stm32: Add support of an OP-TEE TA to load the firmware config: parisc-randconfig-001-20241008 (https://download.01.org/0day-ci/archive/20241008/202410081704.zo2k0szq-...@intel.com/config) compiler: hppa-linux-gcc (GCC) 14.1.0 reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20241008/202410081704.zo2k0szq-...@intel.com/reproduce) If you fix the issue in a separate patch/commit (i.e. not just a new version of the same patch/commit), kindly add following tags | Reported-by: kernel test robot | Closes: https://lore.kernel.org/oe-kbuild-all/202410081704.zo2k0szq-...@intel.com/ All errors (new ones prefixed by >>): hppa-linux-ld: drivers/remoteproc/remoteproc_tee.o: in function `tee_rproc_load_fw': >> (.text+0xa8): undefined reference to `tee_shm_register_kernel_buf' >> hppa-linux-ld: (.text+0x160): undefined reference to `tee_client_invoke_func' >> hppa-linux-ld: (.text+0x178): undefined reference to `tee_shm_free' hppa-linux-ld: drivers/remoteproc/remoteproc_tee.o: in function `tee_rproc_register': >> (.text+0x2f4): undefined reference to `tee_client_open_session' hppa-linux-ld: drivers/remoteproc/remoteproc_tee.o: in function `tee_rproc_unregister': >> (.text+0x3d4): undefined reference to `tee_client_close_session' hppa-linux-ld: drivers/remoteproc/remoteproc_tee.o: in function `tee_rproc_probe': >> (.text+0x478): undefined reference to `tee_client_open_context' >> hppa-linux-ld: (.text+0x4f8): undefined reference to >> `tee_client_close_context' hppa-linux-ld: drivers/remoteproc/remoteproc_tee.o: in function `tee_rproc_remove': (.text+0x558): undefined reference to `tee_client_close_session' hppa-linux-ld: (.text+0x59c): undefined reference to `tee_client_close_context' hppa-linux-ld: drivers/remoteproc/remoteproc_tee.o: in function `tee_rproc_start': >> (.text+0x68c): undefined reference to `tee_client_invoke_func' hppa-linux-ld: drivers/remoteproc/remoteproc_tee.o: in function `tee_rproc_stop': (.text+0x7c8): undefined reference to `tee_client_invoke_func' hppa-linux-ld: drivers/remoteproc/remoteproc_tee.o: in function `tee_rproc_get_loaded_rsc_table': (.text+0x92c): undefined reference to `tee_client_invoke_func' hppa-linux-ld: drivers/remoteproc/remoteproc_tee.o: in function `tee_rproc_release_fw': (.text+0xb18): undefined reference to `tee_client_invoke_func' >> hppa-linux-ld: drivers/remoteproc/remoteproc_tee.o:(.data+0x8): undefined >> reference to `tee_bus_type' Kconfig warnings: (for reference only) WARNING: unmet direct dependencies detected for REMOTEPROC_TEE Depends on [n]: REMOTEPROC [=y] && OPTEE [=n] Selected by [y]: - STM32_RPROC [=y] && (ARCH_STM32 || COMPILE_TEST [=y]) && REMOTEPROC [=y] -- 0-DAY CI Kernel Test Service https://github.com/intel/lkp-tests/wiki
Re: [PATCH] mailbox, remoteproc: k3-m4+: fix compile testing
On Mon, Oct 07, 2024 at 01:23:57PM +, Arnd Bergmann wrote: > From: Arnd Bergmann > > The k3-m4 remoteproc driver was merged with incorrect dependencies. > Despite multiple people trying to fix this, the version 6.12-rc2 > remains broken and causes a build failure with CONFIG_TI_SCI_PROTOCOL=m > when the driver is built-in. > (Sigh) I will do my own testing and will get back to you. > arm-linux-gnueabi-ld: drivers/remoteproc/ti_k3_m4_remoteproc.o: in function > `k3_m4_rproc_probe': > ti_k3_m4_remoteproc.c:(.text.k3_m4_rproc_probe+0x76): undefined reference to > `devm_ti_sci_get_by_phandle' > > Fix the dependency again to make it work in all configurations. > The 'select OMAP2PLUS_MBOX' no longer matches what the other drivers > dependencies. The link failure can be avoided with a simple 'depends > do, so turn that into the same 'depends' to ensure we get no circular > on TI_SCI_PROTOCOL', but the extra COMPILE_TEST alternative is what > we use elsehwere. On the other hand, building for OMAP2PLUS makes > no sense since the hardware only exists on K3. > > Fixes: ebcf9008a895 ("remoteproc: k3-m4: Add a remoteproc driver for M4F > subsystem") > Fixes: ba0c0cb56f22 ("remoteproc: k3-m4: use the proper dependencies") > Fixes: 54595f2807d2 ("mailbox, remoteproc: omap2+: fix compile testing") > Signed-off-by: Arnd Bergmann > --- > drivers/remoteproc/Kconfig | 6 +++--- > 1 file changed, 3 insertions(+), 3 deletions(-) > > diff --git a/drivers/remoteproc/Kconfig b/drivers/remoteproc/Kconfig > index 955e4e38477e..62f8548fb46a 100644 > --- a/drivers/remoteproc/Kconfig > +++ b/drivers/remoteproc/Kconfig > @@ -341,9 +341,9 @@ config TI_K3_DSP_REMOTEPROC > > config TI_K3_M4_REMOTEPROC > tristate "TI K3 M4 remoteproc support" > - depends on ARCH_OMAP2PLUS || ARCH_K3 > - select MAILBOX > - select OMAP2PLUS_MBOX > + depends on ARCH_K3 || COMPILE_TEST > + depends on TI_SCI_PROTOCOL || (COMPILE_TEST && TI_SCI_PROTOCOL=n) > + depends on OMAP2PLUS_MBOX > help > Say m here to support TI's M4 remote processor subsystems > on various TI K3 family of SoCs through the remote processor > -- > 2.39.2 >
Re: [PATCH v10 7/7] remoteproc: stm32: Add support of an OP-TEE TA to load the firmware
>From hereon and starting with this version, I will not review patchets that don't pass the compilation bots. Mathieu On Tue, Oct 08, 2024 at 07:07:40PM +0800, kernel test robot wrote: > Hi Arnaud, > > kernel test robot noticed the following build warnings: > > [auto build test WARNING on 9852d85ec9d492ebef56dc5f229416c925758edc] > > url: > https://github.com/intel-lab-lkp/linux/commits/Arnaud-Pouliquen/remoteproc-core-Introduce-rproc_pa_to_va-helper/20241007-212358 > base: 9852d85ec9d492ebef56dc5f229416c925758edc > patch link: > https://lore.kernel.org/r/20241007131620.2090104-8-arnaud.pouliquen%40foss.st.com > patch subject: [PATCH v10 7/7] remoteproc: stm32: Add support of an OP-TEE TA > to load the firmware > config: alpha-kismet-CONFIG_REMOTEPROC_TEE-CONFIG_STM32_RPROC-0-0 > (https://download.01.org/0day-ci/archive/20241008/202410081902.twqcmwjk-...@intel.com/config) > reproduce: > (https://download.01.org/0day-ci/archive/20241008/202410081902.twqcmwjk-...@intel.com/reproduce) > > If you fix the issue in a separate patch/commit (i.e. not just a new version > of > the same patch/commit), kindly add following tags > | Reported-by: kernel test robot > | Closes: > https://lore.kernel.org/oe-kbuild-all/202410081902.twqcmwjk-...@intel.com/ > > kismet warnings: (new ones prefixed by >>) > >> kismet: WARNING: unmet direct dependencies detected for REMOTEPROC_TEE > >> when selected by STM32_RPROC >WARNING: unmet direct dependencies detected for REMOTEPROC_TEE > Depends on [n]: REMOTEPROC [=y] && OPTEE [=n] > Selected by [y]: > - STM32_RPROC [=y] && (ARCH_STM32 || COMPILE_TEST [=y]) && REMOTEPROC > [=y] > > -- > 0-DAY CI Kernel Test Service > https://github.com/intel/lkp-tests/wiki
Re: [PATCH net-next v02 1/2] af_packet: allow fanout_add when socket is not RUNNING
Gur Stavi wrote: > PACKET socket can retain its fanout membership through link down and up > and leave a fanout while closed regardless of link state. > However, socket was forbidden from joining a fanout while it was not > RUNNING. > > This patch allows PACKET socket to join fanout while not RUNNING. > > Signed-off-by: Gur Stavi > --- > net/packet/af_packet.c | 10 +- > 1 file changed, 5 insertions(+), 5 deletions(-) > > diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c > index f8942062f776..fb2cca73d953 100644 > --- a/net/packet/af_packet.c > +++ b/net/packet/af_packet.c > @@ -1846,21 +1846,21 @@ static int fanout_add(struct sock *sk, struct > fanout_args *args) > err = -EINVAL; > > spin_lock(&po->bind_lock); > - if (packet_sock_flag(po, PACKET_SOCK_RUNNING) && > - match->type == type && > + if (match->type == type && > match->prot_hook.type == po->prot_hook.type && > match->prot_hook.dev == po->prot_hook.dev) { Remaining unaddressed issue is that the socket can now be added before being bound. See comment in v1. > err = -ENOSPC; > if (refcount_read(&match->sk_ref) < match->max_num_members) { > - __dev_remove_pack(&po->prot_hook); > - > /* Paired with packet_setsockopt(PACKET_FANOUT_DATA) */ > WRITE_ONCE(po->fanout, match); > > po->rollover = rollover; > rollover = NULL; > refcount_set(&match->sk_ref, > refcount_read(&match->sk_ref) + 1); > - __fanout_link(sk, po); > + if (packet_sock_flag(po, PACKET_SOCK_RUNNING)) { > + __dev_remove_pack(&po->prot_hook); > + __fanout_link(sk, po); > + } > err = 0; > } > } > -- > 2.45.2 >
Re: [PATCH net-next v8 03/24] ovpn: add basic netlink support
On 08/10/2024 14:52, Jiri Pirko wrote: Tue, Oct 08, 2024 at 11:16:01AM CEST, anto...@openvpn.net wrote: On 08/10/2024 10:58, Jiri Pirko wrote: Tue, Oct 08, 2024 at 10:01:40AM CEST, anto...@openvpn.net wrote: Hi, On 07/10/24 17:32, Jiri Pirko wrote: Wed, Oct 02, 2024 at 11:02:17AM CEST, anto...@openvpn.net wrote: [...] +operations: + list: +- + name: dev-new + attribute-set: ovpn + flags: [ admin-perm ] + doc: Create a new interface of type ovpn + do: +request: + attributes: +- ifname +- mode +reply: + attributes: +- ifname +- ifindex +- + name: dev-del Why you expose new and del here in ovn specific generic netlink iface? Why can't you use the exising RTNL api which is used for creation and destruction of other types of devices? That was my original approach in v1, but it was argued that an ovpn interface needs a userspace program to be configured and used in a meaningful way, therefore it was decided to concentrate all iface mgmt APIs along with the others in the netlink family and to not expose any RTNL ops. Can you please point me to the message id? from Sergey and subsequent replies. RTNL vs NL topic starts right after the definition of 'ovpn_link_ops' Yeah, does not make sense to me. All devices should implement common rtnl ops, the extra-config, if needed, could be on a separate channel. I don't find Sergey's argumentation valid. Thanks a lot for taking the time to read our conversation. Ok, considering all points we have discussed so far (including future developments already on the roadmap), I think it makes sense to go back to RTNL and drop the new/del-dev ops from the netlink family. Will do that in v9. Regards, Recently Kuniyuki commented on this topic as well in: <20240919055259.17622-1-kun...@amazon.com> and that is why I added a default dellink implemetation. Having dellink without newlink implemented is just wrong. However, recently we decided to add a dellink implementation for better integration with network namespaces and to allow the user to wipe a dangling interface. Hmm, one more argument to have symmetric add/del impletentation in RTNL In the future we are planning to also add the possibility to create a "persistent interface", that is an interface created before launching any userspace program and that survives when the latter is stopped. I can guess this functionality may be better suited for RTNL, but I am not sure yet. That would be quite confusing to have RTNL and genetlink iface to add/del device. From what you described above, makes more sent to have it just in RTNL All in all I tend to agree. @Jiri: do you have any particular opinion why we should use RTNL ops and not netlink for creating/destroying interfaces? I feel this is mostly a matter of taste, but maybe there are technical reasons we should consider. Well. technically, you can probabaly do both. But it is quite common that you can add/delete these kind of devices over RTNL. Lots of examples. People are used to it, aligns with existing flows. The only counterargument I see is the one brought by Sergey: "the ovpn interface is not usable after creation, if no openvpn process is running". However, allowing to create "persistent interfaces" will define a use-case for having an ovpn device without any userspace process. @Sergey what is your opinion here? I am not sure persistent interfaces were discussed at the time you brought your point about RTNL vs NL. Regards, Thanks a lot for your contribution. Regards, ip link add [link DEV | parentdev NAME] [ name ] NAME [ txqueuelen PACKETS ] [ address LLADDR ] [ broadcast LLADDR ] [ mtu MTU ] [index IDX ] [ numtxqueues QUEUE_COUNT ] [ numrxqueues QUEUE_COUNT ] [ netns { PID | NETNSNAME | NETNSFILE } ] type TYPE [ ARGS ] ip link delete { DEVICE | dev DEVICE | group DEVGROUP } type TYPE [ ARGS ] Lots of examples of existing types creation is for example here: https://developers.redhat.com/blog/2018/10/22/introduction-to-linux-interfaces-for-virtual-networking + attribute-set: ovpn + flags: [ admin-perm ] + doc: Delete existing interface of type ovpn + do: +pre: ovpn-nl-pre-doit +post: ovpn-nl-post-doit +request: + attributes: +- ifindex [...] -- Antonio Quartulli OpenVPN Inc. -- Antonio Quartulli OpenVPN Inc. -- Antonio Quartulli OpenVPN Inc.
Re: [PATCH v4 0/2] virtiofs: fix the warning for kernel direct IO
On Sat, 31 Aug 2024 at 11:38, Hou Tao wrote: > > From: Hou Tao > > Hi, > > The patch set aims to fix the warning related to an abnormal size > parameter of kmalloc() in virtiofs. Patch #1 fixes it by introducing > use_pages_for_kvec_io option in fuse_conn and enabling it in virtiofs. > Beside the abnormal size parameter for kmalloc, the gfp parameter is > also questionable: GFP_ATOMIC is used even when the allocation occurs > in a kworker context. Patch #2 fixes it by using GFP_NOFS when the > allocation is initiated by the kworker. For more details, please check > the individual patches. Applied, thanks. Miklos
[PATCH net-next v20 01/14] mm: page_frag: add a test module for page_frag
The testing is done by ensuring that the fragment allocated from a frag_frag_cache instance is pushed into a ptr_ring instance in a kthread binded to a specified cpu, and a kthread binded to a specified cpu will pop the fragment from the ptr_ring and free the fragment. CC: Alexander Duyck Signed-off-by: Yunsheng Lin Reviewed-by: Alexander Duyck --- tools/testing/selftests/mm/Makefile | 3 + tools/testing/selftests/mm/page_frag/Makefile | 18 ++ .../selftests/mm/page_frag/page_frag_test.c | 173 ++ tools/testing/selftests/mm/run_vmtests.sh | 8 + tools/testing/selftests/mm/test_page_frag.sh | 171 + 5 files changed, 373 insertions(+) create mode 100644 tools/testing/selftests/mm/page_frag/Makefile create mode 100644 tools/testing/selftests/mm/page_frag/page_frag_test.c create mode 100755 tools/testing/selftests/mm/test_page_frag.sh diff --git a/tools/testing/selftests/mm/Makefile b/tools/testing/selftests/mm/Makefile index 02e1204971b0..acec529baaca 100644 --- a/tools/testing/selftests/mm/Makefile +++ b/tools/testing/selftests/mm/Makefile @@ -36,6 +36,8 @@ MAKEFLAGS += --no-builtin-rules CFLAGS = -Wall -I $(top_srcdir) $(EXTRA_CFLAGS) $(KHDR_INCLUDES) $(TOOLS_INCLUDES) LDLIBS = -lrt -lpthread -lm +TEST_GEN_MODS_DIR := page_frag + TEST_GEN_FILES = cow TEST_GEN_FILES += compaction_test TEST_GEN_FILES += gup_longterm @@ -126,6 +128,7 @@ TEST_FILES += test_hmm.sh TEST_FILES += va_high_addr_switch.sh TEST_FILES += charge_reserved_hugetlb.sh TEST_FILES += hugetlb_reparenting_test.sh +TEST_FILES += test_page_frag.sh # required by charge_reserved_hugetlb.sh TEST_FILES += write_hugetlb_memory.sh diff --git a/tools/testing/selftests/mm/page_frag/Makefile b/tools/testing/selftests/mm/page_frag/Makefile new file mode 100644 index ..58dda74d50a3 --- /dev/null +++ b/tools/testing/selftests/mm/page_frag/Makefile @@ -0,0 +1,18 @@ +PAGE_FRAG_TEST_DIR := $(realpath $(dir $(abspath $(lastword $(MAKEFILE_LIST) +KDIR ?= $(abspath $(PAGE_FRAG_TEST_DIR)/../../../../..) + +ifeq ($(V),1) +Q = +else +Q = @ +endif + +MODULES = page_frag_test.ko + +obj-m += page_frag_test.o + +all: + +$(Q)make -C $(KDIR) M=$(PAGE_FRAG_TEST_DIR) modules + +clean: + +$(Q)make -C $(KDIR) M=$(PAGE_FRAG_TEST_DIR) clean diff --git a/tools/testing/selftests/mm/page_frag/page_frag_test.c b/tools/testing/selftests/mm/page_frag/page_frag_test.c new file mode 100644 index ..eeb2b6bc681a --- /dev/null +++ b/tools/testing/selftests/mm/page_frag/page_frag_test.c @@ -0,0 +1,173 @@ +// SPDX-License-Identifier: GPL-2.0 + +/* + * Test module for page_frag cache + * + * Copyright (C) 2024 Yunsheng Lin + */ + +#include +#include +#include +#include +#include +#include + +static struct ptr_ring ptr_ring; +static int nr_objs = 512; +static atomic_t nthreads; +static struct completion wait; +static struct page_frag_cache test_nc; +static int test_popped; +static int test_pushed; + +static int nr_test = 200; +module_param(nr_test, int, 0); +MODULE_PARM_DESC(nr_test, "number of iterations to test"); + +static bool test_align; +module_param(test_align, bool, 0); +MODULE_PARM_DESC(test_align, "use align API for testing"); + +static int test_alloc_len = 2048; +module_param(test_alloc_len, int, 0); +MODULE_PARM_DESC(test_alloc_len, "alloc len for testing"); + +static int test_push_cpu; +module_param(test_push_cpu, int, 0); +MODULE_PARM_DESC(test_push_cpu, "test cpu for pushing fragment"); + +static int test_pop_cpu; +module_param(test_pop_cpu, int, 0); +MODULE_PARM_DESC(test_pop_cpu, "test cpu for popping fragment"); + +static int page_frag_pop_thread(void *arg) +{ + struct ptr_ring *ring = arg; + + pr_info("page_frag pop test thread begins on cpu %d\n", + smp_processor_id()); + + while (test_popped < nr_test) { + void *obj = __ptr_ring_consume(ring); + + if (obj) { + test_popped++; + page_frag_free(obj); + } else { + cond_resched(); + } + } + + if (atomic_dec_and_test(&nthreads)) + complete(&wait); + + pr_info("page_frag pop test thread exits on cpu %d\n", + smp_processor_id()); + + return 0; +} + +static int page_frag_push_thread(void *arg) +{ + struct ptr_ring *ring = arg; + + pr_info("page_frag push test thread begins on cpu %d\n", + smp_processor_id()); + + while (test_pushed < nr_test) { + void *va; + int ret; + + if (test_align) { + va = page_frag_alloc_align(&test_nc, test_alloc_len, + GFP_KERNEL, SMP_CACHE_BYTES); + + WARN_ONCE((unsigned long)va & (SMP_CACHE_BYTES - 1), + "unaligned va returned\n"); + } else { +
[PATCH net-next v20 02/14] mm: move the page fragment allocator from page_alloc into its own file
Inspired by [1], move the page fragment allocator from page_alloc into its own c file and header file, as we are about to make more change for it to replace another page_frag implementation in sock.c As this patchset is going to replace 'struct page_frag' with 'struct page_frag_cache' in sched.h, including page_frag_cache.h in sched.h has a compiler error caused by interdependence between mm_types.h and mm.h for asm-offsets.c, see [2]. So avoid the compiler error by moving 'struct page_frag_cache' to mm_types_task.h as suggested by Alexander, see [3]. 1. https://lore.kernel.org/all/20230411160902.4134381-3-dhowe...@redhat.com/ 2. https://lore.kernel.org/all/15623dac-9358-4597-b3ee-3694a5956...@gmail.com/ 3. https://lore.kernel.org/all/CAKgT0UdH1yD=LSCXFJ=ym_aia4oomd-2wxyko42bizawmt_...@mail.gmail.com/ CC: David Howells CC: Alexander Duyck Signed-off-by: Yunsheng Lin Acked-by: Andrew Morton Reviewed-by: Alexander Duyck --- include/linux/gfp.h | 22 --- include/linux/mm_types.h | 18 --- include/linux/mm_types_task.h | 18 +++ include/linux/page_frag_cache.h | 31 include/linux/skbuff.h| 1 + mm/Makefile | 1 + mm/page_alloc.c | 136 mm/page_frag_cache.c | 145 ++ .../selftests/mm/page_frag/page_frag_test.c | 2 +- 9 files changed, 197 insertions(+), 177 deletions(-) create mode 100644 include/linux/page_frag_cache.h create mode 100644 mm/page_frag_cache.c diff --git a/include/linux/gfp.h b/include/linux/gfp.h index a951de920e20..a0a6d25f883f 100644 --- a/include/linux/gfp.h +++ b/include/linux/gfp.h @@ -371,28 +371,6 @@ __meminit void *alloc_pages_exact_nid_noprof(int nid, size_t size, gfp_t gfp_mas extern void __free_pages(struct page *page, unsigned int order); extern void free_pages(unsigned long addr, unsigned int order); -struct page_frag_cache; -void page_frag_cache_drain(struct page_frag_cache *nc); -extern void __page_frag_cache_drain(struct page *page, unsigned int count); -void *__page_frag_alloc_align(struct page_frag_cache *nc, unsigned int fragsz, - gfp_t gfp_mask, unsigned int align_mask); - -static inline void *page_frag_alloc_align(struct page_frag_cache *nc, - unsigned int fragsz, gfp_t gfp_mask, - unsigned int align) -{ - WARN_ON_ONCE(!is_power_of_2(align)); - return __page_frag_alloc_align(nc, fragsz, gfp_mask, -align); -} - -static inline void *page_frag_alloc(struct page_frag_cache *nc, -unsigned int fragsz, gfp_t gfp_mask) -{ - return __page_frag_alloc_align(nc, fragsz, gfp_mask, ~0u); -} - -extern void page_frag_free(void *addr); - #define __free_page(page) __free_pages((page), 0) #define free_page(addr) free_pages((addr), 0) diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index 6e3bdf8e38bc..92314ef2d978 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -521,9 +521,6 @@ static_assert(sizeof(struct ptdesc) <= sizeof(struct page)); */ #define STRUCT_PAGE_MAX_SHIFT (order_base_2(sizeof(struct page))) -#define PAGE_FRAG_CACHE_MAX_SIZE __ALIGN_MASK(32768, ~PAGE_MASK) -#define PAGE_FRAG_CACHE_MAX_ORDER get_order(PAGE_FRAG_CACHE_MAX_SIZE) - /* * page_private can be used on tail pages. However, PagePrivate is only * checked by the VM on the head page. So page_private on the tail pages @@ -542,21 +539,6 @@ static inline void *folio_get_private(struct folio *folio) return folio->private; } -struct page_frag_cache { - void * va; -#if (PAGE_SIZE < PAGE_FRAG_CACHE_MAX_SIZE) - __u16 offset; - __u16 size; -#else - __u32 offset; -#endif - /* we maintain a pagecount bias, so that we dont dirty cache line -* containing page->_refcount every time we allocate a fragment. -*/ - unsigned intpagecnt_bias; - bool pfmemalloc; -}; - typedef unsigned long vm_flags_t; /* diff --git a/include/linux/mm_types_task.h b/include/linux/mm_types_task.h index bff5706b76e1..0ac6daebdd5c 100644 --- a/include/linux/mm_types_task.h +++ b/include/linux/mm_types_task.h @@ -8,6 +8,7 @@ * (These are defined separately to decouple sched.h from mm_types.h as much as possible.) */ +#include #include #include @@ -43,6 +44,23 @@ struct page_frag { #endif }; +#define PAGE_FRAG_CACHE_MAX_SIZE __ALIGN_MASK(32768, ~PAGE_MASK) +#define PAGE_FRAG_CACHE_MAX_ORDER get_order(PAGE_FRAG_CACHE_MAX_SIZE) +struct page_frag_cache { + void *va; +#if (PAGE_SIZE < PAGE_FRAG_CACHE_MAX_SIZE) + __u16 offset; + __u16 size; +#else + __u32 offset; +#endif + /* we maintain a pagecount bias, so that we dont dirty cache line +* containing pa
[PATCH net-next v20 04/14] mm: page_frag: avoid caller accessing 'page_frag_cache' directly
Use appropriate frag_page API instead of caller accessing 'page_frag_cache' directly. CC: Alexander Duyck Signed-off-by: Yunsheng Lin Reviewed-by: Alexander Duyck Acked-by: Chuck Lever --- drivers/vhost/net.c | 2 +- include/linux/page_frag_cache.h | 10 ++ net/core/skbuff.c | 6 +++--- net/rxrpc/conn_object.c | 4 +--- net/rxrpc/local_object.c | 4 +--- net/sunrpc/svcsock.c | 6 ++ tools/testing/selftests/mm/page_frag/page_frag_test.c | 2 +- 7 files changed, 19 insertions(+), 15 deletions(-) diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c index f16279351db5..9ad37c012189 100644 --- a/drivers/vhost/net.c +++ b/drivers/vhost/net.c @@ -1325,7 +1325,7 @@ static int vhost_net_open(struct inode *inode, struct file *f) vqs[VHOST_NET_VQ_RX]); f->private_data = n; - n->pf_cache.va = NULL; + page_frag_cache_init(&n->pf_cache); return 0; } diff --git a/include/linux/page_frag_cache.h b/include/linux/page_frag_cache.h index 67ac8626ed9b..0a52f7a179c8 100644 --- a/include/linux/page_frag_cache.h +++ b/include/linux/page_frag_cache.h @@ -7,6 +7,16 @@ #include #include +static inline void page_frag_cache_init(struct page_frag_cache *nc) +{ + nc->va = NULL; +} + +static inline bool page_frag_cache_is_pfmemalloc(struct page_frag_cache *nc) +{ + return !!nc->pfmemalloc; +} + void page_frag_cache_drain(struct page_frag_cache *nc); void __page_frag_cache_drain(struct page *page, unsigned int count); void *__page_frag_alloc_align(struct page_frag_cache *nc, unsigned int fragsz, diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 74149dc4ee31..ca01880c7ad0 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -753,14 +753,14 @@ struct sk_buff *__netdev_alloc_skb(struct net_device *dev, unsigned int len, if (in_hardirq() || irqs_disabled()) { nc = this_cpu_ptr(&netdev_alloc_cache); data = page_frag_alloc(nc, len, gfp_mask); - pfmemalloc = nc->pfmemalloc; + pfmemalloc = page_frag_cache_is_pfmemalloc(nc); } else { local_bh_disable(); local_lock_nested_bh(&napi_alloc_cache.bh_lock); nc = this_cpu_ptr(&napi_alloc_cache.page); data = page_frag_alloc(nc, len, gfp_mask); - pfmemalloc = nc->pfmemalloc; + pfmemalloc = page_frag_cache_is_pfmemalloc(nc); local_unlock_nested_bh(&napi_alloc_cache.bh_lock); local_bh_enable(); @@ -850,7 +850,7 @@ struct sk_buff *napi_alloc_skb(struct napi_struct *napi, unsigned int len) len = SKB_HEAD_ALIGN(len); data = page_frag_alloc(&nc->page, len, gfp_mask); - pfmemalloc = nc->page.pfmemalloc; + pfmemalloc = page_frag_cache_is_pfmemalloc(&nc->page); } local_unlock_nested_bh(&napi_alloc_cache.bh_lock); diff --git a/net/rxrpc/conn_object.c b/net/rxrpc/conn_object.c index 1539d315afe7..694c4df7a1a3 100644 --- a/net/rxrpc/conn_object.c +++ b/net/rxrpc/conn_object.c @@ -337,9 +337,7 @@ static void rxrpc_clean_up_connection(struct work_struct *work) */ rxrpc_purge_queue(&conn->rx_queue); - if (conn->tx_data_alloc.va) - __page_frag_cache_drain(virt_to_page(conn->tx_data_alloc.va), - conn->tx_data_alloc.pagecnt_bias); + page_frag_cache_drain(&conn->tx_data_alloc); call_rcu(&conn->rcu, rxrpc_rcu_free_connection); } diff --git a/net/rxrpc/local_object.c b/net/rxrpc/local_object.c index 504453c688d7..a8cffe47cf01 100644 --- a/net/rxrpc/local_object.c +++ b/net/rxrpc/local_object.c @@ -452,9 +452,7 @@ void rxrpc_destroy_local(struct rxrpc_local *local) #endif rxrpc_purge_queue(&local->rx_queue); rxrpc_purge_client_connections(local); - if (local->tx_alloc.va) - __page_frag_cache_drain(virt_to_page(local->tx_alloc.va), - local->tx_alloc.pagecnt_bias); + page_frag_cache_drain(&local->tx_alloc); } /* diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c index 825ec5357691..b785425c3315 100644 --- a/net/sunrpc/svcsock.c +++ b/net/sunrpc/svcsock.c @@ -1608,7 +1608,6 @@ static void svc_tcp_sock_detach(struct svc_xprt *xprt) static void svc_sock_free(struct svc_xprt *xprt) { struct svc_sock *svsk = container_of(xprt, struct svc_sock, sk_xprt); - struct page_frag_cache *pfc = &svsk->sk_frag_cache; struct socket *sock = svsk->sk_sock; trace_svcsock_free(svsk, sock); @@ -1618,8 +1617,7 @@ static void svc_sock_free(struct svc_xprt *xprt) sockfd_put(sock); else sock_release(sock); - if (pfc-
Re: [PATCH net-next v8 03/24] ovpn: add basic netlink support
Tue, Oct 08, 2024 at 10:01:40AM CEST, anto...@openvpn.net wrote: >Hi, > >On 07/10/24 17:32, Jiri Pirko wrote: >> Wed, Oct 02, 2024 at 11:02:17AM CEST, anto...@openvpn.net wrote: >> >> [...] >> >> >> > +operations: >> > + list: >> > +- >> > + name: dev-new >> > + attribute-set: ovpn >> > + flags: [ admin-perm ] >> > + doc: Create a new interface of type ovpn >> > + do: >> > +request: >> > + attributes: >> > +- ifname >> > +- mode >> > +reply: >> > + attributes: >> > +- ifname >> > +- ifindex >> > +- >> > + name: dev-del >> >> Why you expose new and del here in ovn specific generic netlink iface? >> Why can't you use the exising RTNL api which is used for creation and >> destruction of other types of devices? > >That was my original approach in v1, but it was argued that an ovpn interface >needs a userspace program to be configured and used in a meaningful way, >therefore it was decided to concentrate all iface mgmt APIs along with the >others in the netlink family and to not expose any RTNL ops. Can you please point me to the message id? > >However, recently we decided to add a dellink implementation for better >integration with network namespaces and to allow the user to wipe a dangling >interface. Hmm, one more argument to have symmetric add/del impletentation in RTNL > >In the future we are planning to also add the possibility to create a >"persistent interface", that is an interface created before launching any >userspace program and that survives when the latter is stopped. >I can guess this functionality may be better suited for RTNL, but I am not >sure yet. That would be quite confusing to have RTNL and genetlink iface to add/del device. From what you described above, makes more sent to have it just in RTNL > >@Jiri: do you have any particular opinion why we should use RTNL ops and not >netlink for creating/destroying interfaces? I feel this is mostly a matter of >taste, but maybe there are technical reasons we should consider. Well. technically, you can probabaly do both. But it is quite common that you can add/delete these kind of devices over RTNL. Lots of examples. People are used to it, aligns with existing flows. > >Thanks a lot for your contribution. > >Regards, > > >> >> >> ip link add [link DEV | parentdev NAME] [ name ] NAME >> [ txqueuelen PACKETS ] >> [ address LLADDR ] >> [ broadcast LLADDR ] >> [ mtu MTU ] [index IDX ] >> [ numtxqueues QUEUE_COUNT ] >> [ numrxqueues QUEUE_COUNT ] >> [ netns { PID | NETNSNAME | NETNSFILE } ] >> type TYPE [ ARGS ] >> >> ip link delete { DEVICE | dev DEVICE | group DEVGROUP } type TYPE [ ARGS ] >> >> Lots of examples of existing types creation is for example here: >> https://developers.redhat.com/blog/2018/10/22/introduction-to-linux-interfaces-for-virtual-networking >> >> >> >> > + attribute-set: ovpn >> > + flags: [ admin-perm ] >> > + doc: Delete existing interface of type ovpn >> > + do: >> > +pre: ovpn-nl-pre-doit >> > +post: ovpn-nl-post-doit >> > +request: >> > + attributes: >> > +- ifindex >> >> [...] > >-- >Antonio Quartulli >OpenVPN Inc.
[PATCH bpf v7 1/2] bpf: fix unpopulated name_len field in perf_event link info
Previously when retrieving `bpf_link_info.perf_event` for kprobe/uprobe/tracepoint, the `name_len` field was not populated by the kernel, leaving it to reflect the value initially set by the user. This behavior was inconsistent with how other input/output string buffer fields function (e.g. `raw_tracepoint.tp_name_len`). This patch fills `name_len` with the actual size of the string name. Fixes: 1b715e1b0ec5 ("bpf: Support ->fill_link_info for perf_event") Signed-off-by: Tyrone Wu Acked-by: Jiri Olsa Acked-by: Yafang Shao --- V6 -> V7: - Use *ulenp suggestion from Jiri for user name_len - Set user name_len to 1 when buf returned from bpf_get_perf_event_info() is NULL V5 -> V6: - Use simpler buf check while keeping V4 - Fix netdev/checkpatch warning for 80 cols exceeded - Fix Signed-off-by to use real name instead of git username V4 -> V5: - Check that buf is not NULL before retrieving/using its length V3 -> V4: - Split patch into separate kernel and selftest change V2 -> V3: - Use clearer variable name for user set/inputted name len (name_len -> input_len) - Change (name_len -> input_len) type from size_t to u32 since it's only received and used as u32 V1 -> V2: - Use user set *ulen in bpf_copy_to_user before overwriting *ulen kernel/bpf/syscall.c | 29 ++--- 1 file changed, 22 insertions(+), 7 deletions(-) diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index a8f1808a1ca5..8cfa7183d2ef 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -3565,15 +3565,16 @@ static void bpf_perf_link_dealloc(struct bpf_link *link) } static int bpf_perf_link_fill_common(const struct perf_event *event, -char __user *uname, u32 ulen, +char __user *uname, u32 *ulenp, u64 *probe_offset, u64 *probe_addr, u32 *fd_type, unsigned long *missed) { const char *buf; - u32 prog_id; + u32 prog_id, ulen; size_t len; int err; + ulen = *ulenp; if (!ulen ^ !uname) return -EINVAL; @@ -3581,10 +3582,17 @@ static int bpf_perf_link_fill_common(const struct perf_event *event, probe_offset, probe_addr, missed); if (err) return err; + + if (buf) { + len = strlen(buf); + *ulenp = len + 1; + } else { + *ulenp = 1; + } if (!uname) return 0; + if (buf) { - len = strlen(buf); err = bpf_copy_to_user(uname, buf, ulen, len); if (err) return err; @@ -3609,7 +3617,7 @@ static int bpf_perf_link_fill_kprobe(const struct perf_event *event, uname = u64_to_user_ptr(info->perf_event.kprobe.func_name); ulen = info->perf_event.kprobe.name_len; - err = bpf_perf_link_fill_common(event, uname, ulen, &offset, &addr, + err = bpf_perf_link_fill_common(event, uname, &ulen, &offset, &addr, &type, &missed); if (err) return err; @@ -3617,7 +3625,7 @@ static int bpf_perf_link_fill_kprobe(const struct perf_event *event, info->perf_event.type = BPF_PERF_EVENT_KRETPROBE; else info->perf_event.type = BPF_PERF_EVENT_KPROBE; - + info->perf_event.kprobe.name_len = ulen; info->perf_event.kprobe.offset = offset; info->perf_event.kprobe.missed = missed; if (!kallsyms_show_value(current_cred())) @@ -3639,7 +3647,7 @@ static int bpf_perf_link_fill_uprobe(const struct perf_event *event, uname = u64_to_user_ptr(info->perf_event.uprobe.file_name); ulen = info->perf_event.uprobe.name_len; - err = bpf_perf_link_fill_common(event, uname, ulen, &offset, &addr, + err = bpf_perf_link_fill_common(event, uname, &ulen, &offset, &addr, &type, NULL); if (err) return err; @@ -3648,6 +3656,7 @@ static int bpf_perf_link_fill_uprobe(const struct perf_event *event, info->perf_event.type = BPF_PERF_EVENT_URETPROBE; else info->perf_event.type = BPF_PERF_EVENT_UPROBE; + info->perf_event.uprobe.name_len = ulen; info->perf_event.uprobe.offset = offset; info->perf_event.uprobe.cookie = event->bpf_cookie; return 0; @@ -3673,12 +3682,18 @@ static int bpf_perf_link_fill_tracepoint(const struct perf_event *event, { char __user *uname; u32 ulen; + int err; uname = u64_to_user_ptr(info->perf_event.tracepoint.tp_name); ulen = info->perf_event.tracepoint.name_len; + err = bpf_perf_link_fill_common(event, uname, &ulen, NULL, NULL, NULL, NULL); + if (err) + return err; + info->perf_event.type = BPF_PERF_EVENT_TRACEPOINT; +
[PATCH bpf v7 2/2] selftests/bpf: fix perf_event link info name_len assertion
Fix `name_len` field assertions in `bpf_link_info.perf_event` for kprobe/uprobe/tracepoint to validate correct name size instead of 0. Fixes: 23cf7aa539dc ("selftests/bpf: Add selftest for fill_link_info") Signed-off-by: Tyrone Wu Acked-by: Jiri Olsa Acked-by: Yafang Shao --- V6 -> V7: no change V5 -> V6: - Fix netdev/checkpatch warning for 80 cols exceeded - Fix Signed-off-by to use real name instead of git username V4 -> V5: no change V3 -> V4: - Split patch into separate kernel and selftest change tools/testing/selftests/bpf/prog_tests/fill_link_info.c | 9 ++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/tools/testing/selftests/bpf/prog_tests/fill_link_info.c b/tools/testing/selftests/bpf/prog_tests/fill_link_info.c index f3932941bbaa..745c5ada4c4b 100644 --- a/tools/testing/selftests/bpf/prog_tests/fill_link_info.c +++ b/tools/testing/selftests/bpf/prog_tests/fill_link_info.c @@ -67,8 +67,9 @@ static int verify_perf_link_info(int fd, enum bpf_perf_event_type type, long add ASSERT_EQ(info.perf_event.kprobe.cookie, PERF_EVENT_COOKIE, "kprobe_cookie"); + ASSERT_EQ(info.perf_event.kprobe.name_len, strlen(KPROBE_FUNC) + 1, + "name_len"); if (!info.perf_event.kprobe.func_name) { - ASSERT_EQ(info.perf_event.kprobe.name_len, 0, "name_len"); info.perf_event.kprobe.func_name = ptr_to_u64(&buf); info.perf_event.kprobe.name_len = sizeof(buf); goto again; @@ -79,8 +80,9 @@ static int verify_perf_link_info(int fd, enum bpf_perf_event_type type, long add ASSERT_EQ(err, 0, "cmp_kprobe_func_name"); break; case BPF_PERF_EVENT_TRACEPOINT: + ASSERT_EQ(info.perf_event.tracepoint.name_len, strlen(TP_NAME) + 1, + "name_len"); if (!info.perf_event.tracepoint.tp_name) { - ASSERT_EQ(info.perf_event.tracepoint.name_len, 0, "name_len"); info.perf_event.tracepoint.tp_name = ptr_to_u64(&buf); info.perf_event.tracepoint.name_len = sizeof(buf); goto again; @@ -96,8 +98,9 @@ static int verify_perf_link_info(int fd, enum bpf_perf_event_type type, long add case BPF_PERF_EVENT_URETPROBE: ASSERT_EQ(info.perf_event.uprobe.offset, offset, "uprobe_offset"); + ASSERT_EQ(info.perf_event.uprobe.name_len, strlen(UPROBE_FILE) + 1, + "name_len"); if (!info.perf_event.uprobe.file_name) { - ASSERT_EQ(info.perf_event.uprobe.name_len, 0, "name_len"); info.perf_event.uprobe.file_name = ptr_to_u64(&buf); info.perf_event.uprobe.name_len = sizeof(buf); goto again; -- 2.43.0
Re: [PATCH bpf] selftests/bpf: add missing header include for htons
Hello: This patch was applied to bpf/bpf-next.git (net) by Martin KaFai Lau : On Tue, 08 Oct 2024 16:50:57 +0200 you wrote: > Including the network_helpers.h header in tests can lead to the following > build error: > > ./network_helpers.h: In function ‘csum_tcpudp_magic’: > ./network_helpers.h:116:14: error: implicit declaration of function \ > ‘htons’ [-Werror=implicit-function-declaration] > 116 | s += htons(proto + len); > > [...] Here is the summary with links: - [bpf] selftests/bpf: add missing header include for htons https://git.kernel.org/bpf/bpf-next/c/bc9b3fb827fc You are awesome, thank you! -- Deet-doot-dot, I am a bot. https://korg.docs.kernel.org/patchwork/pwbot.html
[PATCH v4 14/19] gendwarfksyms: Add support for kABI rules
Distributions that want to maintain a stable kABI need the ability to make ABI compatible changes to kernel without affecting symbol versions, either because of LTS updates or backports. With genksyms, developers would typically hide these changes from version calculation with #ifndef __GENKSYMS__, which would result in the symbol version not changing even though the actual type has changed. When we process precompiled object files, this isn't an option. To support this use case, add a --stable command line flag that gates kABI stability features that are not needed in mainline kernels, but can be useful for distributions, and add support for kABI rules, which can be used to restrict gendwarfksyms output. The rules are specified as a set of null-terminated strings stored in the .discard.gendwarfksyms.kabi_rules section. Each rule consists of four strings as follows: "version\0type\0target\0value" The version string ensures the structure can be changed in a backwards compatible way. The type string indicates the type of the rule, and target and value strings contain rule-specific data. Initially support two simple rules: 1. Declaration-only structures A structure declaration can change into a full definition when additional includes are pulled in to the TU, which changes the versions of any symbol that references the struct. Add support for defining declaration-only structs whose definition is not expanded during versioning. 2. Ignored enum fields It's possible to add new enum fields without changing the ABI, but as the fields are included in symbol versioning, this would change the versions. Add support for ignoring specific fields. Add examples for using the rules under the examples/ directory. Signed-off-by: Sami Tolvanen Acked-by: Neal Gompa --- scripts/gendwarfksyms/Makefile | 1 + scripts/gendwarfksyms/dwarf.c | 19 +- scripts/gendwarfksyms/examples/kabi.h | 61 ++ scripts/gendwarfksyms/examples/kabi_rules.c | 56 + scripts/gendwarfksyms/gendwarfksyms.c | 11 +- scripts/gendwarfksyms/gendwarfksyms.h | 57 ++ scripts/gendwarfksyms/kabi.c| 214 7 files changed, 415 insertions(+), 4 deletions(-) create mode 100644 scripts/gendwarfksyms/examples/kabi.h create mode 100644 scripts/gendwarfksyms/examples/kabi_rules.c create mode 100644 scripts/gendwarfksyms/kabi.c diff --git a/scripts/gendwarfksyms/Makefile b/scripts/gendwarfksyms/Makefile index 6540282dc746..27258c31e839 100644 --- a/scripts/gendwarfksyms/Makefile +++ b/scripts/gendwarfksyms/Makefile @@ -5,6 +5,7 @@ gendwarfksyms-objs += gendwarfksyms.o gendwarfksyms-objs += cache.o gendwarfksyms-objs += die.o gendwarfksyms-objs += dwarf.o +gendwarfksyms-objs += kabi.o gendwarfksyms-objs += symbols.o gendwarfksyms-objs += types.o diff --git a/scripts/gendwarfksyms/dwarf.c b/scripts/gendwarfksyms/dwarf.c index a47a3a0f7a69..b15f1a5db452 100644 --- a/scripts/gendwarfksyms/dwarf.c +++ b/scripts/gendwarfksyms/dwarf.c @@ -80,11 +80,12 @@ static bool match_export_symbol(struct state *state, Dwarf_Die *die) return !!state->sym; } -static bool is_declaration(Dwarf_Die *die) +static bool is_declaration(struct die *cache, Dwarf_Die *die) { bool value; - return get_flag_attr(die, DW_AT_declaration, &value) && value; + return (get_flag_attr(die, DW_AT_declaration, &value) && value) || + kabi_is_struct_declonly(cache->fqn); } /* @@ -472,10 +473,11 @@ static void __process_structure_type(struct state *state, struct die *cache, process(cache, " {"); process_linebreak(cache, 1); - is_decl = is_declaration(die); + is_decl = is_declaration(cache, die); if (!is_decl && state->expand.expand) { cache_mark_expanded(&state->expansion_cache, die->addr); + state->expand.current_fqn = cache->fqn; check(process_die_container(state, cache, die, process_func, match_func)); } @@ -508,6 +510,15 @@ static void process_enumerator_type(struct state *state, struct die *cache, { Dwarf_Word value; + if (stable) { + /* Get the fqn before we process anything */ + update_fqn(cache, die); + + if (kabi_is_enumerator_ignored(state->expand.current_fqn, + cache->fqn)) + return; + } + process_list_comma(state, cache); process(cache, "enumerator"); process_fqn(cache, die); @@ -580,6 +591,7 @@ static void state_init(struct state *state) state->expand.expand = true; state->expand.ptr_depth = 0; state->expand.ptr_expansion_depth = 0; + state->expand.current_fqn = NULL; hash_init(state->expansion_cache.cache); } @@ -589,6 +601,7 @@ static void expansion_s
[PATCH v4 12/19] gendwarfksyms: Add symtypes output
Add support for producing genksyms-style symtypes files. Process die_map to find the longest expansions for each type, and use symtypes references in type definitions. The basic file format is similar to genksyms, with two notable exceptions: 1. Type names with spaces (common with Rust) in references are wrapped in single quotes. E.g.: s#'core::result::Result' 2. The actual type definition is the simple parsed DWARF format we output with --dump-dies, not the preprocessed C-style format genksyms produces. Signed-off-by: Sami Tolvanen Acked-by: Neal Gompa --- scripts/gendwarfksyms/Makefile| 1 + scripts/gendwarfksyms/die.c | 11 + scripts/gendwarfksyms/dwarf.c | 1 + scripts/gendwarfksyms/gendwarfksyms.c | 36 ++- scripts/gendwarfksyms/gendwarfksyms.h | 19 ++ scripts/gendwarfksyms/symbols.c | 4 +- scripts/gendwarfksyms/types.c | 359 ++ 7 files changed, 428 insertions(+), 3 deletions(-) create mode 100644 scripts/gendwarfksyms/types.c diff --git a/scripts/gendwarfksyms/Makefile b/scripts/gendwarfksyms/Makefile index c06145d84df8..6540282dc746 100644 --- a/scripts/gendwarfksyms/Makefile +++ b/scripts/gendwarfksyms/Makefile @@ -6,5 +6,6 @@ gendwarfksyms-objs += cache.o gendwarfksyms-objs += die.o gendwarfksyms-objs += dwarf.o gendwarfksyms-objs += symbols.o +gendwarfksyms-objs += types.o HOSTLDLIBS_gendwarfksyms := -ldw -lelf diff --git a/scripts/gendwarfksyms/die.c b/scripts/gendwarfksyms/die.c index 2829387fd815..df1ca3a032bb 100644 --- a/scripts/gendwarfksyms/die.c +++ b/scripts/gendwarfksyms/die.c @@ -22,6 +22,7 @@ static inline unsigned int die_hash(uintptr_t addr, enum die_state state) static void init_die(struct die *cd) { cd->state = DIE_INCOMPLETE; + cd->mapped = false; cd->fqn = NULL; cd->tag = -1; cd->addr = 0; @@ -83,6 +84,16 @@ static void reset_die(struct die *cd) init_die(cd); } +void die_map_for_each(die_map_callback_t func, void *arg) +{ + struct hlist_node *tmp; + struct die *cd; + + hash_for_each_safe(die_map, cd, tmp, hash) { + func(cd, arg); + } +} + void die_map_free(void) { struct hlist_node *tmp; diff --git a/scripts/gendwarfksyms/dwarf.c b/scripts/gendwarfksyms/dwarf.c index d67cac16f8fb..e1a9e9061b1d 100644 --- a/scripts/gendwarfksyms/dwarf.c +++ b/scripts/gendwarfksyms/dwarf.c @@ -728,6 +728,7 @@ static void process_symbol(struct state *state, Dwarf_Die *die, { debug("%s", state->sym->name); check(process_func(state, NULL, die)); + state->sym->state = SYMBOL_MAPPED; if (dump_dies) fputs("\n", stderr); } diff --git a/scripts/gendwarfksyms/gendwarfksyms.c b/scripts/gendwarfksyms/gendwarfksyms.c index d40692a703bb..24c87523fc3a 100644 --- a/scripts/gendwarfksyms/gendwarfksyms.c +++ b/scripts/gendwarfksyms/gendwarfksyms.c @@ -21,6 +21,11 @@ int debug; int dump_dies; /* Print debugging information about die_map changes */ int dump_die_map; +/* Print out type strings (i.e. type_map) */ +int dump_types; +/* Write a symtypes file */ +int symtypes; +static const char *symtypes_file; static void usage(void) { @@ -29,6 +34,8 @@ static void usage(void) " -d, --debug Print debugging information\n" " --dump-dies Dump DWARF DIE contents\n" " --dump-die-map Print debugging information about die_map changes\n" + " --dump-types Dump type strings\n" + " -T, --symtypes file Write a symtypes file\n" " -h, --help Print this message\n" "\n", stderr); @@ -41,6 +48,7 @@ static int process_module(Dwfl_Module *mod, void **userdata, const char *name, Dwarf_Die cudie; Dwarf_CU *cu = NULL; Dwarf *dbg; + FILE *symfile = arg; int res; debug("%s", name); @@ -60,6 +68,10 @@ static int process_module(Dwfl_Module *mod, void **userdata, const char *name, process_cu(&cudie); } while (cu); + /* +* Use die_map to expand type strings and write them to `symfile`. +*/ + generate_symtypes(symfile); die_map_free(); return DWARF_CB_OK; @@ -72,22 +84,29 @@ static const Dwfl_Callbacks callbacks = { int main(int argc, char **argv) { + FILE *symfile = NULL; unsigned int n; int opt; struct option opts[] = { { "debug", 0, NULL, 'd' }, { "dump-dies", 0, &dump_dies, 1 }, { "dump-die-map", 0, &dump_die_map, 1 }, +{ "dump-types", 0, &dump_types, 1 }, +{ "symtypes", 1, NULL, 'T' }, { "help", 0, NULL, 'h' }, { 0, 0, NULL, 0 } }; - while ((opt = getopt_long(argc, ar
[PATCH v4 13/19] gendwarfksyms: Add symbol versioning
Calculate symbol versions from the fully expanded type strings in type_map, and output the versions in a genksyms-compatible format. Signed-off-by: Sami Tolvanen Acked-by: Neal Gompa --- scripts/gendwarfksyms/dwarf.c | 25 +- scripts/gendwarfksyms/gendwarfksyms.c | 11 ++- scripts/gendwarfksyms/gendwarfksyms.h | 13 ++- scripts/gendwarfksyms/symbols.c | 59 + scripts/gendwarfksyms/types.c | 122 +- 5 files changed, 222 insertions(+), 8 deletions(-) diff --git a/scripts/gendwarfksyms/dwarf.c b/scripts/gendwarfksyms/dwarf.c index e1a9e9061b1d..a47a3a0f7a69 100644 --- a/scripts/gendwarfksyms/dwarf.c +++ b/scripts/gendwarfksyms/dwarf.c @@ -723,12 +723,33 @@ static int process_type(struct state *state, struct die *parent, Dwarf_Die *die) /* * Exported symbol processing */ +static struct die *get_symbol_cache(struct state *state, Dwarf_Die *die) +{ + struct die *cache; + + cache = die_map_get(die, DIE_SYMBOL); + + if (cache->state != DIE_INCOMPLETE) + return NULL; /* We already processed a symbol for this DIE */ + + cache->tag = dwarf_tag(die); + return cache; +} + static void process_symbol(struct state *state, Dwarf_Die *die, die_callback_t process_func) { + struct die *cache; + + symbol_set_die(state->sym, die); + + cache = get_symbol_cache(state, die); + if (!cache) + return; + debug("%s", state->sym->name); - check(process_func(state, NULL, die)); - state->sym->state = SYMBOL_MAPPED; + check(process_func(state, cache, die)); + cache->state = DIE_SYMBOL; if (dump_dies) fputs("\n", stderr); } diff --git a/scripts/gendwarfksyms/gendwarfksyms.c b/scripts/gendwarfksyms/gendwarfksyms.c index 24c87523fc3a..e90d909d259b 100644 --- a/scripts/gendwarfksyms/gendwarfksyms.c +++ b/scripts/gendwarfksyms/gendwarfksyms.c @@ -23,6 +23,8 @@ int dump_dies; int dump_die_map; /* Print out type strings (i.e. type_map) */ int dump_types; +/* Print out expanded type strings used for symbol versions */ +int dump_versions; /* Write a symtypes file */ int symtypes; static const char *symtypes_file; @@ -35,6 +37,7 @@ static void usage(void) " --dump-dies Dump DWARF DIE contents\n" " --dump-die-map Print debugging information about die_map changes\n" " --dump-types Dump type strings\n" + " --dump-versions Dump expanded type strings used for symbol versions\n" " -T, --symtypes file Write a symtypes file\n" " -h, --help Print this message\n" "\n", @@ -69,9 +72,10 @@ static int process_module(Dwfl_Module *mod, void **userdata, const char *name, } while (cu); /* -* Use die_map to expand type strings and write them to `symfile`. +* Use die_map to expand type strings, write them to `symfile`, and +* calculate symbol versions. */ - generate_symtypes(symfile); + generate_symtypes_and_versions(symfile); die_map_free(); return DWARF_CB_OK; @@ -92,6 +96,7 @@ int main(int argc, char **argv) { "dump-dies", 0, &dump_dies, 1 }, { "dump-die-map", 0, &dump_die_map, 1 }, { "dump-types", 0, &dump_types, 1 }, +{ "dump-versions", 0, &dump_versions, 1 }, { "symtypes", 1, NULL, 'T' }, { "help", 0, NULL, 'h' }, { 0, 0, NULL, 0 } }; @@ -167,5 +172,7 @@ int main(int argc, char **argv) if (symfile) check(fclose(symfile)); + symbol_print_versions(); + return 0; } diff --git a/scripts/gendwarfksyms/gendwarfksyms.h b/scripts/gendwarfksyms/gendwarfksyms.h index e47b5e967520..814f53ef799e 100644 --- a/scripts/gendwarfksyms/gendwarfksyms.h +++ b/scripts/gendwarfksyms/gendwarfksyms.h @@ -26,6 +26,7 @@ extern int debug; extern int dump_dies; extern int dump_die_map; extern int dump_types; +extern int dump_versions; extern int symtypes; /* @@ -98,6 +99,7 @@ static inline unsigned int addr_hash(uintptr_t addr) enum symbol_state { SYMBOL_UNPROCESSED, SYMBOL_MAPPED, + SYMBOL_PROCESSED }; struct symbol_addr { @@ -112,6 +114,7 @@ struct symbol { struct hlist_node name_hash; enum symbol_state state; uintptr_t die_addr; + unsigned long crc; }; typedef void (*symbol_callback_t)(struct symbol *, void *arg); @@ -119,6 +122,10 @@ typedef void (*symbol_callback_t)(struct symbol *, void *arg); void symbol_read_exports(FILE *file); void symbol_read_symtab(int fd); struct symbol *symbol_get(const char *name); +void symbol_set_die(struct symbol *sym, Dwarf_Die *die); +void symbol_s
[PATCH v4 10/19] gendwarfksyms: Limit structure expansion
Expand each structure type only once per exported symbol. This is necessary to support self-referential structures, which would otherwise result in infinite recursion, but is still sufficient for catching ABI changes. For pointers, limit structure expansion after the first pointer in the symbol type. This should be plenty for detecting ABI differences, but it stops us from pulling in half the kernel for types that contain pointers to large kernel data structures, like task_struct, for example. Signed-off-by: Sami Tolvanen Acked-by: Neal Gompa --- scripts/gendwarfksyms/Makefile| 1 + scripts/gendwarfksyms/cache.c | 44 +++ scripts/gendwarfksyms/dwarf.c | 109 +++--- scripts/gendwarfksyms/gendwarfksyms.h | 37 + 4 files changed, 182 insertions(+), 9 deletions(-) create mode 100644 scripts/gendwarfksyms/cache.c diff --git a/scripts/gendwarfksyms/Makefile b/scripts/gendwarfksyms/Makefile index c0d4ce50fc27..c06145d84df8 100644 --- a/scripts/gendwarfksyms/Makefile +++ b/scripts/gendwarfksyms/Makefile @@ -2,6 +2,7 @@ hostprogs-always-y += gendwarfksyms gendwarfksyms-objs += gendwarfksyms.o +gendwarfksyms-objs += cache.o gendwarfksyms-objs += die.o gendwarfksyms-objs += dwarf.o gendwarfksyms-objs += symbols.o diff --git a/scripts/gendwarfksyms/cache.c b/scripts/gendwarfksyms/cache.c new file mode 100644 index ..2f1517133a20 --- /dev/null +++ b/scripts/gendwarfksyms/cache.c @@ -0,0 +1,44 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2024 Google LLC + */ + +#include "gendwarfksyms.h" + +struct expanded { + uintptr_t addr; + struct hlist_node hash; +}; + +void __cache_mark_expanded(struct expansion_cache *ec, uintptr_t addr) +{ + struct expanded *es; + + es = xmalloc(sizeof(struct expanded)); + es->addr = addr; + hash_add(ec->cache, &es->hash, addr_hash(addr)); +} + +bool __cache_was_expanded(struct expansion_cache *ec, uintptr_t addr) +{ + struct expanded *es; + + hash_for_each_possible(ec->cache, es, hash, addr_hash(addr)) { + if (es->addr == addr) + return true; + } + + return false; +} + +void cache_clear_expanded(struct expansion_cache *ec) +{ + struct hlist_node *tmp; + struct expanded *es; + + hash_for_each_safe(ec->cache, es, tmp, hash) { + free(es); + } + + hash_init(ec->cache); +} diff --git a/scripts/gendwarfksyms/dwarf.c b/scripts/gendwarfksyms/dwarf.c index f5cebbdcc212..51dd8e82f9e7 100644 --- a/scripts/gendwarfksyms/dwarf.c +++ b/scripts/gendwarfksyms/dwarf.c @@ -26,6 +26,7 @@ static void process_linebreak(struct die *cache, int n) !dwarf_form##attr(&da, value); \ } +DEFINE_GET_ATTR(flag, bool) DEFINE_GET_ATTR(udata, Dwarf_Word) static bool get_ref_die_attr(Dwarf_Die *die, unsigned int id, Dwarf_Die *value) @@ -79,6 +80,13 @@ static bool match_export_symbol(struct state *state, Dwarf_Die *die) return !!state->sym; } +static bool is_declaration(Dwarf_Die *die) +{ + bool value; + + return get_flag_attr(die, DW_AT_declaration, &value) && value; +} + /* * Type string processing */ @@ -455,19 +463,28 @@ static void __process_structure_type(struct state *state, struct die *cache, die_callback_t process_func, die_match_callback_t match_func) { + bool is_decl; + process(cache, type); process_fqn(cache, die); process(cache, " {"); process_linebreak(cache, 1); - check(process_die_container(state, cache, die, process_func, - match_func)); + is_decl = is_declaration(die); + + if (!is_decl && state->expand.expand) { + cache_mark_expanded(&state->expansion_cache, die->addr); + check(process_die_container(state, cache, die, process_func, + match_func)); + } process_linebreak(cache, -1); process(cache, "}"); - process_byte_size_attr(cache, die); - process_alignment_attr(cache, die); + if (!is_decl && state->expand.expand) { + process_byte_size_attr(cache, die); + process_alignment_attr(cache, die); + } } #define DEFINE_PROCESS_STRUCTURE_TYPE(structure)\ @@ -520,7 +537,7 @@ static void process_unspecified_type(struct state *state, struct die *cache, Dwarf_Die *die) { /* -* These can be emitted for stand-elone assembly code, which means we +* These can be emitted for stand-alone assembly code, which means we * might run into them in vmlinux.o. */ process(cache, "unspecified_type"); @@ -552,6 +569,42 @@ static void process_cached(struct state *state, struct die *cache,
[PATCH v4 11/19] gendwarfksyms: Add die_map debugging
Debugging the DWARF processing can be somewhat challenging, so add more detailed debugging output for die_map operations. Add the --dump-die-map flag, which adds color coded tags to the output for die_map changes. Signed-off-by: Sami Tolvanen Acked-by: Neal Gompa --- scripts/gendwarfksyms/dwarf.c | 15 +++ scripts/gendwarfksyms/gendwarfksyms.c | 7 +++ scripts/gendwarfksyms/gendwarfksyms.h | 13 + 3 files changed, 35 insertions(+) diff --git a/scripts/gendwarfksyms/dwarf.c b/scripts/gendwarfksyms/dwarf.c index 51dd8e82f9e7..d67cac16f8fb 100644 --- a/scripts/gendwarfksyms/dwarf.c +++ b/scripts/gendwarfksyms/dwarf.c @@ -103,6 +103,8 @@ static void process(struct die *cache, const char *s) if (dump_dies) fputs(s, stderr); + if (cache) + die_debug_r("cache %p string '%s'", cache, s); die_map_add_string(cache, s); } @@ -552,6 +554,8 @@ static void process_cached(struct state *state, struct die *cache, list_for_each_entry(df, &cache->fragments, list) { switch (df->type) { case FRAGMENT_STRING: + die_debug_b("cache %p STRING '%s'", cache, + df->data.str); process(NULL, df->data.str); break; case FRAGMENT_LINEBREAK: @@ -561,6 +565,8 @@ static void process_cached(struct state *state, struct die *cache, if (!dwarf_die_addr_die(dwarf_cu_getdwarf(die->cu), (void *)df->data.addr, &child)) error("dwarf_die_addr_die failed"); + die_debug_b("cache %p DIE addr %" PRIxPTR " tag %x", + cache, df->data.addr, dwarf_tag(&child)); check(process_type(state, NULL, &child)); break; default: @@ -651,6 +657,9 @@ static int process_type(struct state *state, struct die *parent, Dwarf_Die *die) cache = die_map_get(die, want_state); if (cache->state == want_state) { + die_debug_g("cached addr %p tag %x -- %s", die->addr, tag, + die_state_name(cache->state)); + if (want_state == DIE_COMPLETE && is_expanded_type(tag)) cache_mark_expanded(&state->expansion_cache, die->addr); @@ -661,6 +670,9 @@ static int process_type(struct state *state, struct die *parent, Dwarf_Die *die) return 0; } + die_debug_g("addr %p tag %x -- %s -> %s", die->addr, tag, + die_state_name(cache->state), die_state_name(want_state)); + switch (tag) { /* Type modifiers */ PROCESS_TYPE(atomic) @@ -696,6 +708,9 @@ static int process_type(struct state *state, struct die *parent, Dwarf_Die *die) error("unexpected type: %x", tag); } + die_debug_r("parent %p cache %p die addr %p tag %x", parent, cache, + die->addr, tag); + /* Update cache state and append to the parent (if any) */ cache->tag = tag; cache->state = want_state; diff --git a/scripts/gendwarfksyms/gendwarfksyms.c b/scripts/gendwarfksyms/gendwarfksyms.c index 310cc9257d6e..d40692a703bb 100644 --- a/scripts/gendwarfksyms/gendwarfksyms.c +++ b/scripts/gendwarfksyms/gendwarfksyms.c @@ -19,6 +19,8 @@ int debug; /* Dump DIE contents */ int dump_dies; +/* Print debugging information about die_map changes */ +int dump_die_map; static void usage(void) { @@ -26,6 +28,7 @@ static void usage(void) "Options:\n" " -d, --debug Print debugging information\n" " --dump-dies Dump DWARF DIE contents\n" + " --dump-die-map Print debugging information about die_map changes\n" " -h, --help Print this message\n" "\n", stderr); @@ -74,6 +77,7 @@ int main(int argc, char **argv) struct option opts[] = { { "debug", 0, NULL, 'd' }, { "dump-dies", 0, &dump_dies, 1 }, +{ "dump-die-map", 0, &dump_die_map, 1 }, { "help", 0, NULL, 'h' }, { 0, 0, NULL, 0 } }; @@ -93,6 +97,9 @@ int main(int argc, char **argv) } } + if (dump_die_map) + dump_dies = 1; + if (optind >= argc) { usage(); error("no input files?"); diff --git a/scripts/gendwarfksyms/gendwarfksyms.h b/scripts/gendwarfksyms/gendwarfksyms.h index 6147859ae2af..126916e85ee0 100644 --- a/scripts/gendwarfksyms/gendwarfksyms.h +++ b/scripts/gendwarfksyms/gendwarfksyms.h @@ -24,6 +24,7 @@ */ extern int debug; extern int dump_dies; +extern int dump_die_map; /* * Output helpers @@ -46,6 +47,18 @@ extern in
[PATCH v4 08/19] gendwarfksyms: Expand array_type
Add support for expanding DW_TAG_array_type, and the subrange type indicating array size. Example source code: const char *s[34]; Output with --dump-dies: variable array_type[34] { pointer_type { const_type { base_type char byte_size(1) encoding(6) } } byte_size(8) } Signed-off-by: Sami Tolvanen Acked-by: Neal Gompa Reviewed-by: Petr Pavlu --- scripts/gendwarfksyms/dwarf.c | 30 ++ 1 file changed, 30 insertions(+) diff --git a/scripts/gendwarfksyms/dwarf.c b/scripts/gendwarfksyms/dwarf.c index 7e6b477d7c12..ade9b3b7b119 100644 --- a/scripts/gendwarfksyms/dwarf.c +++ b/scripts/gendwarfksyms/dwarf.c @@ -219,6 +219,7 @@ DEFINE_PROCESS_UDATA_ATTRIBUTE(encoding) } DEFINE_MATCH(formal_parameter) +DEFINE_MATCH(subrange) bool match_all(Dwarf_Die *die) { @@ -341,6 +342,33 @@ DEFINE_PROCESS_TYPE(shared) DEFINE_PROCESS_TYPE(volatile) DEFINE_PROCESS_TYPE(typedef) +static void process_subrange_type(struct state *state, struct die *cache, + Dwarf_Die *die) +{ + Dwarf_Word count = 0; + + if (get_udata_attr(die, DW_AT_count, &count)) + process_fmt(cache, "[%" PRIu64 "]", count); + else if (get_udata_attr(die, DW_AT_upper_bound, &count)) + process_fmt(cache, "[%" PRIu64 "]", count + 1); + else + process(cache, "[]"); +} + +static void process_array_type(struct state *state, struct die *cache, + Dwarf_Die *die) +{ + process(cache, "array_type"); + /* Array size */ + check(process_die_container(state, cache, die, process_type, + match_subrange_type)); + process(cache, " {"); + process_linebreak(cache, 1); + process_type_attr(state, cache, die); + process_linebreak(cache, -1); + process(cache, "}"); +} + static void __process_subroutine_type(struct state *state, struct die *cache, Dwarf_Die *die, const char *type) { @@ -436,7 +464,9 @@ static int process_type(struct state *state, struct die *parent, Dwarf_Die *die) PROCESS_TYPE(volatile) /* Subtypes */ PROCESS_TYPE(formal_parameter) + PROCESS_TYPE(subrange) /* Other types */ + PROCESS_TYPE(array) PROCESS_TYPE(base) PROCESS_TYPE(subroutine) PROCESS_TYPE(typedef) -- 2.47.0.rc0.187.ge670bccf7e-goog
[PATCH v4 15/19] gendwarfksyms: Add support for reserved and ignored fields
Distributions that want to maintain a stable kABI need the ability to make ABI compatible changes to kernel data structures without affecting symbol versions, either because of LTS updates or backports. With genksyms, developers would typically hide these changes from version calculation with #ifndef __GENKSYMS__, which would result in the symbol version not changing even though the actual type has changed. When we process precompiled object files, this isn't an option. Change union processing to recognize field name prefixes that allow the user to ignore the union completely during symbol versioning with a __kabi_ignored prefix in a field name, or to replace the type of a placeholder field using a __kabi_reserved field name prefix. For example, assume we want to add a new field to an existing alignment hole in a data structure, and ignore the new field when calculating symbol versions: struct struct1 { int a; /* a 4-byte alignment hole */ unsigned long b; }; To add `int n` to the alignment hole, we can add a union that includes a __kabi_ignored field that causes gendwarfksyms to ignore the entire union: struct struct1 { int a; union { char __kabi_ignored_0; int n; }; unsigned long b; }; With --stable, both structs produce the same symbol version. Alternatively, when a distribution expects future modification to a data structure, they can explicitly add reserved fields: struct struct2 { long a; long __kabi_reserved_0; /* reserved for future use */ }; To take the field into use, we can again replace it with a union, with one of the fields keeping the __kabi_reserved name prefix to indicate the original type: struct struct2 { long a; union { long __kabi_reserved_0; struct { int b; int v; }; }; Here gendwarfksyms --stable replaces the union with the type of the placeholder field when calculating versions. Signed-off-by: Sami Tolvanen Acked-by: Neal Gompa --- scripts/gendwarfksyms/dwarf.c | 202 +- scripts/gendwarfksyms/examples/kabi.h | 80 + scripts/gendwarfksyms/examples/kabi_ex0.c | 86 + scripts/gendwarfksyms/examples/kabi_ex1.c | 89 ++ scripts/gendwarfksyms/examples/kabi_ex2.c | 98 +++ scripts/gendwarfksyms/gendwarfksyms.h | 29 6 files changed, 583 insertions(+), 1 deletion(-) create mode 100644 scripts/gendwarfksyms/examples/kabi_ex0.c create mode 100644 scripts/gendwarfksyms/examples/kabi_ex1.c create mode 100644 scripts/gendwarfksyms/examples/kabi_ex2.c diff --git a/scripts/gendwarfksyms/dwarf.c b/scripts/gendwarfksyms/dwarf.c index b15f1a5db452..72e24140b6e3 100644 --- a/scripts/gendwarfksyms/dwarf.c +++ b/scripts/gendwarfksyms/dwarf.c @@ -308,6 +308,9 @@ static void __process_list_type(struct state *state, struct die *cache, { const char *name = get_name_attr(die); + if (stable && is_kabi_prefix(name)) + name = NULL; + process_list_comma(state, cache); process(cache, type); process_type_attr(state, cache, die); @@ -441,11 +444,193 @@ static void process_variant_part_type(struct state *state, struct die *cache, process(cache, "}"); } +static int get_kabi_status(Dwarf_Die *die) +{ + const char *name = get_name_attr(die); + + if (is_kabi_prefix(name)) { + name += KABI_PREFIX_LEN; + + if (!strncmp(name, KABI_RESERVED_PREFIX, +KABI_RESERVED_PREFIX_LEN)) + return KABI_RESERVED; + if (!strncmp(name, KABI_IGNORED_PREFIX, +KABI_IGNORED_PREFIX_LEN)) + return KABI_IGNORED; + } + + return KABI_NORMAL; +} + +static int check_struct_member_kabi_status(struct state *state, + struct die *__unused, Dwarf_Die *die) +{ + int res; + + if (dwarf_tag(die) != DW_TAG_member_type) + error("expected a member"); + + /* +* If the union member is a struct, expect the __kabi field to +* be the first member of the structure, i.e..: +* +* union { +* type new_member; +* struct { +* type __kabi_field; +* } +* }; +*/ + res = get_kabi_status(die); + + if (res == KABI_RESERVED && + !get_ref_die_attr(die, DW_AT_type, &state->kabi.placeholder)) + error("structure member missing a type?"); + + return res; +} + +static int check_union_member_kabi_status(struct state *state, + struct die *__unused, Dwarf_Die *die) +{ + Dwarf_Die type; + int res; + + if (dwarf_tag(die) != DW_TAG_member_type) + error("expected a member"); + + if (!get_ref_die_attr(die, DW_AT_type, &type)) + error("union mem
[PATCH v4 19/19] Documentation/kbuild: Add DWARF module versioning
Add documentation for gendwarfksyms changes, and the kABI stability features that can be useful for distributions even though they're not used in mainline kernels. Signed-off-by: Sami Tolvanen Acked-by: Neal Gompa --- Documentation/kbuild/gendwarfksyms.rst | 274 + Documentation/kbuild/index.rst | 1 + 2 files changed, 275 insertions(+) create mode 100644 Documentation/kbuild/gendwarfksyms.rst diff --git a/Documentation/kbuild/gendwarfksyms.rst b/Documentation/kbuild/gendwarfksyms.rst new file mode 100644 index ..4b89743d2a88 --- /dev/null +++ b/Documentation/kbuild/gendwarfksyms.rst @@ -0,0 +1,274 @@ +=== +DWARF module versioning +=== + +1. Introduction +=== + +When CONFIG_MODVERSIONS is enabled, symbol versions for modules +are typically calculated from preprocessed source code using the +**genksyms** tool. However, this is incompatible with languages such +as Rust, where the source code has insufficient information about +the resulting ABI. With CONFIG_GENDWARFKSYMS (and CONFIG_DEBUG_INFO) +selected, **gendwarfksyms** is used instead to calculate symbol versions +from the DWARF debugging information, which contains the necessary +details about the final module ABI. + +1.1. Usage +== + +gendwarfksyms accepts a list of object files on the command line, and a +list of symbol names (one per line) in standard input:: + +Usage: gendwarfksyms [options] elf-object-file ... < symbol-list + +Options: + -d, --debug Print debugging information + --dump-dies Dump DWARF DIE contents + --dump-die-map Print debugging information about die_map changes + --dump-types Dump type strings + --dump-versions Dump expanded type strings used for symbol versions + -s, --stable Support kABI stability features + -T, --symtypes file Write a symtypes file + -h, --help Print this message + + +2. Type information availability + + +While symbols are typically exported in the same translation unit (TU) +where they're defined, it's also perfectly fine for a TU to export +external symbols. For example, this is done when calculating symbol +versions for exports in stand-alone assembly code. + +To ensure the compiler emits the necessary DWARF type information in the +TU where symbols are actually exported, gendwarfksyms adds a pointer +to exported symbols in the `EXPORT_SYMBOL()` macro using the following +macro:: + +#define __GENDWARFKSYMS_EXPORT(sym) \ +static typeof(sym) *__gendwarfksyms_ptr_##sym __used\ +__section(".discard.gendwarfksyms") = &sym; + + +When a symbol pointer is found in DWARF, gendwarfksyms can use its +type for calculating symbol versions even if the symbol is defined +elsewhere. The name of the symbol pointer is expected to start with +`__gendwarfksyms_ptr_`, followed by the name of the exported symbol. + +3. Symtypes output format += + +Similarly to genksyms, gendwarfksyms supports writing a symtypes file +for each processed object that contain types for exported symbols and +each referenced type that was used in calculating symbol versions. These +files can be useful when trying to determine what exactly caused symbol +versions to change between builds. + +Matching the existing format, the first column of each line contains +either a type reference or a symbol name. Type references have a +one-letter prefix followed by "#" and the name of the type. Four +reference types are supported:: + +e# = enum +s# = struct +t# = typedef +u# = union + +Type names with spaces in them are wrapped in single quotes, e.g.:: + +s#'core::result::Result' + +The rest of the line contains a type string. Unlike with genksyms that +produces C-style type strings, gendwarfksyms uses the same simple parsed +DWARF format produced by **--dump-dies**, but with type references +instead of fully expanded strings. + +4. Maintaining a stable kABI + + +Distribution maintainers often need the ability to make ABI compatible +changes to kernel data structures due to LTS updates or backports. Using +the traditional `#ifndef __GENKSYMS__` to hide these changes from symbol +versioning won't work when processing object files. To support this +use case, gendwarfksyms provides kABI stability features designed to +hide changes that won't affect the ABI when calculating versions. These +features are all gated behind the **--stable** command line flag and are +not used in the mainline kernel. + +Examples for using these features are provided in the +**scripts/gendwarfksyms/examples** directory, including helper macros +for source code annotation. Note that as these features are only used to +transform the inp
[PATCH v4 16/19] gendwarfksyms: Add support for symbol type pointers
The compiler may choose not to emit type information in DWARF for external symbols. Clang, for example, does this for symbols not defined in the current TU. To provide a way to work around this issue, add support for __gendwarfksyms_ptr_ pointers that force the compiler to emit the necessary type information in DWARF also for the missing symbols. Example usage: #define GENDWARFKSYMS_PTR(sym) \ static typeof(sym) *__gendwarfksyms_ptr_##sym __used \ __section(".discard.gendwarfksyms") = &sym; extern int external_symbol(void); GENDWARFKSYMS_PTR(external_symbol); Signed-off-by: Sami Tolvanen Acked-by: Neal Gompa --- scripts/gendwarfksyms/dwarf.c | 55 +- scripts/gendwarfksyms/examples/symbolptr.c | 33 + scripts/gendwarfksyms/gendwarfksyms.h | 7 +++ scripts/gendwarfksyms/symbols.c| 27 +++ 4 files changed, 121 insertions(+), 1 deletion(-) create mode 100644 scripts/gendwarfksyms/examples/symbolptr.c diff --git a/scripts/gendwarfksyms/dwarf.c b/scripts/gendwarfksyms/dwarf.c index 72e24140b6e3..0112b5e8fbf5 100644 --- a/scripts/gendwarfksyms/dwarf.c +++ b/scripts/gendwarfksyms/dwarf.c @@ -992,6 +992,31 @@ static void process_variable(struct state *state, Dwarf_Die *die) process_symbol(state, die, __process_variable); } +static void save_symbol_ptr(struct state *state) +{ + Dwarf_Die ptr_type; + Dwarf_Die type; + + if (!get_ref_die_attr(&state->die, DW_AT_type, &ptr_type) || + dwarf_tag(&ptr_type) != DW_TAG_pointer_type) + error("%s must be a pointer type!", + get_symbol_name(&state->die)); + + if (!get_ref_die_attr(&ptr_type, DW_AT_type, &type)) + error("%s pointer missing a type attribute?", + get_symbol_name(&state->die)); + + /* +* Save the symbol pointer DIE in case the actual symbol is +* missing from the DWARF. Clang, for example, intentionally +* omits external symbols from the debugging information. +*/ + if (dwarf_tag(&type) == DW_TAG_subroutine_type) + symbol_set_ptr(state->sym, &type); + else + symbol_set_ptr(state->sym, &ptr_type); +} + static int process_exported_symbols(struct state *unused, struct die *cache, Dwarf_Die *die) { @@ -1015,7 +1040,9 @@ static int process_exported_symbols(struct state *unused, struct die *cache, state_init(&state); - if (tag == DW_TAG_subprogram) + if (is_symbol_ptr(get_symbol_name(&state.die))) + save_symbol_ptr(&state); + else if (tag == DW_TAG_subprogram) process_subprogram(&state, &state.die); else process_variable(&state, &state.die); @@ -1028,8 +1055,34 @@ static int process_exported_symbols(struct state *unused, struct die *cache, } } +static void process_symbol_ptr(struct symbol *sym, void *arg) +{ + struct state state; + Dwarf *dwarf = arg; + + if (sym->state != SYMBOL_UNPROCESSED || !sym->ptr_die_addr) + return; + + debug("%s", sym->name); + state_init(&state); + state.sym = sym; + + if (!dwarf_die_addr_die(dwarf, (void *)sym->ptr_die_addr, &state.die)) + error("dwarf_die_addr_die failed for symbol ptr: '%s'", + sym->name); + + if (dwarf_tag(&state.die) == DW_TAG_subroutine_type) + process_subprogram(&state, &state.die); + else + process_variable(&state, &state.die); + + cache_clear_expanded(&state.expansion_cache); +} + void process_cu(Dwarf_Die *cudie) { check(process_die_container(NULL, NULL, cudie, process_exported_symbols, match_all)); + + symbol_for_each(process_symbol_ptr, dwarf_cu_getdwarf(cudie->cu)); } diff --git a/scripts/gendwarfksyms/examples/symbolptr.c b/scripts/gendwarfksyms/examples/symbolptr.c new file mode 100644 index ..b7b97cd39769 --- /dev/null +++ b/scripts/gendwarfksyms/examples/symbolptr.c @@ -0,0 +1,33 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2024 Google LLC + * + * Example for symbol pointers. When compiled with Clang, gendwarfkyms + * uses a symbol pointer for `f`. + * + * $ clang -g -c examples/symbolptr.c examples/symbolptr.o + * $ echo -e "f\ng\np" | ./gendwarfksyms -d examples/symbolptr.o + */ + +/* Kernel macros for userspace testing. */ +#ifndef __used +#define __used __attribute__((__used__)) +#endif +#ifndef __section +#define __section(section) __attribute__((__section__(section))) +#endif + +#define __GENDWARFKSYMS_EXPORT(sym)\ + static typeof(sym) *__gendwarfksyms_ptr_##sym __used\ + __section(".discard.gendwarfksyms") = &sym; + +extern void f(unsigned int arg); +v
[PATCH v4 18/19] kbuild: Add gendwarfksyms as an alternative to genksyms
When MODVERSIONS is enabled, allow selecting gendwarfksyms as the implementation, but default to genksyms. Signed-off-by: Sami Tolvanen Acked-by: Neal Gompa --- kernel/module/Kconfig | 25 - scripts/Makefile | 2 +- scripts/Makefile.build | 39 +++ 3 files changed, 56 insertions(+), 10 deletions(-) diff --git a/kernel/module/Kconfig b/kernel/module/Kconfig index f9e5f82fa88b..e6b2427e5c19 100644 --- a/kernel/module/Kconfig +++ b/kernel/module/Kconfig @@ -169,13 +169,36 @@ config MODVERSIONS make them incompatible with the kernel you are running. If unsure, say N. +choice + prompt "Module versioning implementation" + depends on MODVERSIONS + default GENKSYMS + help + Select the tool used to calculate symbol versions for modules. + + If unsure, select GENKSYMS. + +config GENKSYMS + bool "genksyms (from source code)" + help + Calculate symbol versions from pre-processed source code using + genksyms. + + If unsure, say Y. + config GENDWARFKSYMS - bool + bool "gendwarfksyms (from debugging information)" depends on DEBUG_INFO # Requires full debugging information, split DWARF not supported. depends on !DEBUG_INFO_REDUCED && !DEBUG_INFO_SPLIT # Requires ELF object files. depends on !LTO + help + Calculate symbol versions from DWARF debugging information using + gendwarfksyms. Requires DEBUG_INFO to be enabled. + + If unsure, say N. +endchoice config ASM_MODVERSIONS bool diff --git a/scripts/Makefile b/scripts/Makefile index d7fec46d38c0..8533f4498885 100644 --- a/scripts/Makefile +++ b/scripts/Makefile @@ -53,7 +53,7 @@ hostprogs += unifdef targets += module.lds subdir-$(CONFIG_GCC_PLUGINS) += gcc-plugins -subdir-$(CONFIG_MODVERSIONS) += genksyms +subdir-$(CONFIG_GENKSYMS) += genksyms subdir-$(CONFIG_GENDWARFKSYMS) += gendwarfksyms subdir-$(CONFIG_SECURITY_SELINUX) += selinux subdir-$(CONFIG_SECURITY_IPE) += ipe diff --git a/scripts/Makefile.build b/scripts/Makefile.build index 8f423a1faf50..ae13afb71123 100644 --- a/scripts/Makefile.build +++ b/scripts/Makefile.build @@ -107,18 +107,28 @@ cmd_cpp_i_c = $(CPP) $(c_flags) -o $@ $< $(obj)/%.i: $(obj)/%.c FORCE $(call if_changed_dep,cpp_i_c) +gendwarfksyms := scripts/gendwarfksyms/gendwarfksyms +getexportsymbols = $(NM) $(1) | sed -n 's/.* __export_symbol_\(.*\)/$(2)/p' + genksyms = scripts/genksyms/genksyms \ $(if $(1), -T $(2)) \ $(if $(KBUILD_PRESERVE), -p)\ -r $(or $(wildcard $(2:.symtypes=.symref)), /dev/null) # These mirror gensymtypes_S and co below, keep them in synch. +ifdef CONFIG_GENDWARFKSYMS +symtypes_dep_c = $(obj)/%.o +cmd_gensymtypes_c = $(if $(skip_gendwarfksyms),, \ + $(call getexportsymbols,$(2:.symtypes=.o),\1) | \ + $(gendwarfksyms) $(2:.symtypes=.o) $(if $(1), --symtypes $(2))) +else cmd_gensymtypes_c = $(CPP) -D__GENKSYMS__ $(c_flags) $< | $(genksyms) +endif # CONFIG_GENDWARFKSYMS quiet_cmd_cc_symtypes_c = SYM $(quiet_modtag) $@ cmd_cc_symtypes_c = $(call cmd_gensymtypes_c,true,$@) >/dev/null -$(obj)/%.symtypes : $(obj)/%.c FORCE +$(obj)/%.symtypes : $(obj)/%.c $(symtypes_dep_c) FORCE $(call cmd,cc_symtypes_c) # LLVM assembly @@ -314,19 +324,32 @@ $(obj)/%.ll: $(obj)/%.rs FORCE # This is convoluted. The .S file must first be preprocessed to run guards and # expand names, then the resulting exports must be constructed into plain # EXPORT_SYMBOL(symbol); to build our dummy C file, and that gets preprocessed -# to make the genksyms input. +# to make the genksyms input or compiled into an object for gendwarfksyms. # # These mirror gensymtypes_c and co above, keep them in synch. -cmd_gensymtypes_S = \ - { echo "\#include " ;\ - echo "\#include " ; \ - $(NM) $@ | sed -n 's/.* __export_symbol_\(.*\)/EXPORT_SYMBOL(\1);/p' ; } | \ -$(CPP) -D__GENKSYMS__ $(c_flags) -xc - | $(genksyms) +getasmexports = \ + { echo "\#include " ; \ + echo "\#include " ; \ + echo "\#include " ; \ + $(call getexportsymbols,$(2:.symtypes=.o),EXPORT_SYMBOL(\1);) ; } + +ifdef CONFIG_GENDWARFKSYMS +cmd_gensymtypes_S =\ + $(getasmexports) | \ + $(CC) $(c_flags) -c -o $(2:.symtypes=.gendwarfksyms.o) -xc -; \ + $(call getexportsymbols,$(2:.symtypes=.o),\1) | \ + $(gendwarfksyms) $(2:.symtypes=.gendwarfksyms.o)
[PATCH v4 17/19] export: Add __gendwarfksyms_ptr_ references to exported symbols
With gendwarfksyms, we need each TU where the EXPORT_SYMBOL() macro is used to also contain DWARF type information for the symbols it exports. However, as a TU can also export external symbols and compilers may choose not to emit debugging information for symbols not defined in the current TU, the missing types will result in missing symbol versions. Stand-alone assembly code also doesn't contain type information for exported symbols, so we need to compile a temporary object file with asm-prototypes.h instead, and similarly need to ensure the DWARF in the temporary object file contains the necessary types. To always emit type information for external exports, add explicit __gendwarfksyms_ptr_ references to them in EXPORT_SYMBOL(). gendwarfksyms will use the type information for __gendwarfksyms_ptr_* if needed. Discard the pointers from the final binary to avoid further bloat. Signed-off-by: Sami Tolvanen Acked-by: Neal Gompa --- include/linux/export.h | 15 +++ 1 file changed, 15 insertions(+) diff --git a/include/linux/export.h b/include/linux/export.h index 0bbd02fd351d..cf71d3202e5b 100644 --- a/include/linux/export.h +++ b/include/linux/export.h @@ -52,9 +52,24 @@ #else +#ifdef CONFIG_GENDWARFKSYMS +/* + * With CONFIG_GENDWARFKSYMS, ensure the compiler emits debugging + * information for all exported symbols, including those defined in + * different TUs, by adding a __gendwarfksyms_ptr_ pointer + * that's discarded during the final link. + */ +#define __GENDWARFKSYMS_EXPORT(sym)\ + static typeof(sym) *__gendwarfksyms_ptr_##sym __used\ + __section(".discard.gendwarfksyms") = &sym; +#else +#define __GENDWARFKSYMS_EXPORT(sym) +#endif + #define __EXPORT_SYMBOL(sym, license, ns) \ extern typeof(sym) sym; \ __ADDRESSABLE(sym) \ + __GENDWARFKSYMS_EXPORT(sym) \ asm(__stringify(___EXPORT_SYMBOL(sym, license, ns))) #endif -- 2.47.0.rc0.187.ge670bccf7e-goog
Re: [RFC PATCH v3 3/4] hazptr: Implement Hazard Pointers
On 2024-10-08 15:50, Mathieu Desnoyers wrote: [...] +/* Retire the protected hazard pointer from @slot. */ +static inline +void hazptr_retire(struct hazptr_slot *slot, void *addr) +{ + WARN_ON_ONCE(slot->addr != addr); + smp_store_release(&slot->addr, NULL); +} Actually, comparing this with the literature and past presentations from Maged Michael, "retire" is not the appropriate name here. With Hazard Pointers, AFAIU, the "retire" operation is similar to a call_rcu() memory reclaim. It marks the object for eventual reclamation when it is safe to do so. The API here really just releases the recently protected hazard pointer. So I will rename this to "hazptr_release()". At this stage, this hazptr API does not implement any retire operation, and only offers the hazptr_scan() for reader/updater synchronization, leaving the actual reclaim to the caller. Thanks, Mathieu -- Mathieu Desnoyers EfficiOS Inc. https://www.efficios.com
Re: [PATCH v10 7/7] remoteproc: stm32: Add support of an OP-TEE TA to load the firmware
Hi Arnaud, kernel test robot noticed the following build errors: [auto build test ERROR on 9852d85ec9d492ebef56dc5f229416c925758edc] url: https://github.com/intel-lab-lkp/linux/commits/Arnaud-Pouliquen/remoteproc-core-Introduce-rproc_pa_to_va-helper/20241007-212358 base: 9852d85ec9d492ebef56dc5f229416c925758edc patch link: https://lore.kernel.org/r/20241007131620.2090104-8-arnaud.pouliquen%40foss.st.com patch subject: [PATCH v10 7/7] remoteproc: stm32: Add support of an OP-TEE TA to load the firmware config: arm-randconfig-r054-20241008 (https://download.01.org/0day-ci/archive/20241009/202410090225.d3bzaaqk-...@intel.com/config) compiler: clang version 20.0.0git (https://github.com/llvm/llvm-project fef3566a25ff0e34fb87339ba5e13eca17cec00f) reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20241009/202410090225.d3bzaaqk-...@intel.com/reproduce) If you fix the issue in a separate patch/commit (i.e. not just a new version of the same patch/commit), kindly add following tags | Reported-by: kernel test robot | Closes: https://lore.kernel.org/oe-kbuild-all/202410090225.d3bzaaqk-...@intel.com/ All errors (new ones prefixed by >>): >> ld.lld: error: undefined symbol: tee_client_invoke_func >>> referenced by remoteproc_tee.c >>> drivers/remoteproc/remoteproc_tee.o:(tee_rproc_release_fw) in archive vmlinux.a >>> referenced by remoteproc_tee.c >>> drivers/remoteproc/remoteproc_tee.o:(tee_rproc_load_fw) in archive vmlinux.a >>> referenced by remoteproc_tee.c >>> drivers/remoteproc/remoteproc_tee.o:(tee_rproc_parse_fw) in archive vmlinux.a >>> referenced 3 more times -- >> ld.lld: error: undefined symbol: tee_shm_register_kernel_buf >>> referenced by remoteproc_tee.c >>> drivers/remoteproc/remoteproc_tee.o:(tee_rproc_load_fw) in archive vmlinux.a -- >> ld.lld: error: undefined symbol: tee_shm_free >>> referenced by remoteproc_tee.c >>> drivers/remoteproc/remoteproc_tee.o:(tee_rproc_load_fw) in archive vmlinux.a -- >> ld.lld: error: undefined symbol: tee_client_open_session >>> referenced by remoteproc_tee.c >>> drivers/remoteproc/remoteproc_tee.o:(tee_rproc_register) in archive vmlinux.a -- >> ld.lld: error: undefined symbol: tee_client_close_session >>> referenced by remoteproc_tee.c >>> drivers/remoteproc/remoteproc_tee.o:(tee_rproc_unregister) in archive vmlinux.a >>> referenced by remoteproc_tee.c >>> drivers/remoteproc/remoteproc_tee.o:(tee_rproc_remove) in archive vmlinux.a -- >> ld.lld: error: undefined symbol: tee_client_open_context >>> referenced by remoteproc_tee.c >>> drivers/remoteproc/remoteproc_tee.o:(tee_rproc_probe) in archive vmlinux.a -- >> ld.lld: error: undefined symbol: tee_client_close_context >>> referenced by remoteproc_tee.c >>> drivers/remoteproc/remoteproc_tee.o:(tee_rproc_probe) in archive vmlinux.a >>> referenced by remoteproc_tee.c >>> drivers/remoteproc/remoteproc_tee.o:(tee_rproc_remove) in archive vmlinux.a -- >> ld.lld: error: undefined symbol: tee_bus_type >>> referenced by remoteproc_tee.c >>> drivers/remoteproc/remoteproc_tee.o:(tee_rproc_fw_driver) in archive vmlinux.a Kconfig warnings: (for reference only) WARNING: unmet direct dependencies detected for REMOTEPROC_TEE Depends on [n]: REMOTEPROC [=y] && OPTEE [=n] Selected by [y]: - STM32_RPROC [=y] && (ARCH_STM32 [=n] || COMPILE_TEST [=y]) && REMOTEPROC [=y] -- 0-DAY CI Kernel Test Service https://github.com/intel/lkp-tests/wiki
Re: [RFC PATCH 30/39] KVM: guest_memfd: Handle folio preparation for guest_memfd mmap
On Thu, Oct 03, 2024, Ackerley Tng wrote: > Ackerley Tng writes: > > > Elliot Berman writes: > >> From x86 CoCo perspective, I think it also makes sense to not zero > >> the folio when changing faultiblity from private to shared: > >> - If guest is sharing some data with host, you've wiped the data and > >>guest has to copy again. > >> - Or, if SEV/TDX enforces that page is zero'd between transitions, > >>Linux has duplicated the work that trusted entity has already done. > >> > >> Fuad and I can help add some details for the conversion. Hopefully we > >> can figure out some of the plan at plumbers this week. > > > > Zeroing the page prevents leaking host data (see function docstring for > > kvm_gmem_prepare_folio() introduced in [1]), so we definitely don't want > > to introduce a kernel data leak bug here. > > Actually it seems like filemap_grab_folio() already gets a zeroed page. > > filemap_grab_folio() eventually calls __alloc_pages_noprof() > -> get_page_from_freelist() >-> prep_new_page() > -> post_alloc_hook() > > and post_alloc_hook() calls kernel_init_pages(), which zeroes the page, > depending on kernel config. > > Paolo, was calling clear_highpage() in kvm_gmem_prepare_folio() zeroing an > already empty page returned from filemap_grab_folio()? Yes and no. CONFIG_INIT_ON_ALLOC_DEFAULT_ON and init_on_alloc are very much hardening features, not functional behavior that other code _needs_ to be aware of. E.g. enabling init-on-alloc comes with a measurable performance cost. Ignoring hardening, the guest_memfd mapping specifically sets the gfp_mask to GFP_HIGHUSER, i.e. doesn't set __GFP_ZERO. That said, I wouldn't be opposed to skipping the clear_highpage() call when want_init_on_alloc() is true. Also, the intended behavior (or at least, what intended) of kvm_gmem_prepare_folio() was it would do clear_highpage() if and only if a trusted entity does NOT zero the page. Factoring that in is a bit harder, as it probably requires another arch hook (or providing an out-param from kvm_arch_gmem_prepare()). I.e. the want_init_on_alloc() case isn't the only time KVM could shave cycles by not redundantly zeroing memory.
Re: [PATCH] selftest: hid: add the missing tests directory
On 10/8/24 03:31, Yun Lu wrote: Commit 160c826b4dd0 ("selftest: hid: add missing run-hid-tools-tests.sh") has added the run-hid-tools-tests.sh script for it to be installed, but I forgot to add the tests directory together. In fact, the run-hid-tools-tests.sh script uses the scripts in the tests directory to run tests. The tests directory also needs to be added to be installed Include the error you are seeing in here. Fixes: ffb85d5c9e80 ("selftests: hid: import hid-tools hid-core tests") Cc: sta...@vger.kernel.org Signed-off-by: Yun Lu --- tools/testing/selftests/hid/Makefile | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/testing/selftests/hid/Makefile b/tools/testing/selftests/hid/Makefile index 38ae31bb07b5..662209f5fabc 100644 --- a/tools/testing/selftests/hid/Makefile +++ b/tools/testing/selftests/hid/Makefile @@ -18,6 +18,7 @@ TEST_PROGS += hid-usb_crash.sh TEST_PROGS += hid-wacom.sh TEST_FILES := run-hid-tools-tests.sh +TEST_FILES += tests What about the files if any under the tests directory? The install rule would handle the case, however, did you verify that those are copied as well? CXX ?= $(CROSS_COMPILE)g++ thanks, -- Shuah
Re: [RFC PATCH 30/39] KVM: guest_memfd: Handle folio preparation for guest_memfd mmap
Patrick Roy writes: > Hi Ackerley, > > On Thu, 2024-10-03 at 22:32 +0100, Ackerley Tng wrote: >> Elliot Berman writes: >> >>> On Tue, Sep 10, 2024 at 11:44:01PM +, Ackerley Tng wrote: Since guest_memfd now supports mmap(), folios have to be prepared before they are faulted into userspace. When memory attributes are switched between shared and private, the up-to-date flags will be cleared. Use the folio's up-to-date flag to indicate being ready for the guest usage and can be used to mark whether the folio is ready for shared OR private use. >>> >>> Clearing the up-to-date flag also means that the page gets zero'd out >>> whenever it transitions between shared and private (either direction). >>> pKVM (Android) hypervisor policy can allow in-place conversion between >>> shared/private. >>> >>> I believe the important thing is that sev_gmem_prepare() needs to be >>> called prior to giving page to guest. In my series, I had made a >>> ->prepare_inaccessible() callback where KVM would only do this part. >>> When transitioning to inaccessible, only that callback would be made, >>> besides the bookkeeping. The folio zeroing happens once when allocating >>> the folio if the folio is initially accessible (faultable). >>> >>> From x86 CoCo perspective, I think it also makes sense to not zero >>> the folio when changing faultiblity from private to shared: >>> - If guest is sharing some data with host, you've wiped the data and >>>guest has to copy again. >>> - Or, if SEV/TDX enforces that page is zero'd between transitions, >>>Linux has duplicated the work that trusted entity has already done. >>> >>> Fuad and I can help add some details for the conversion. Hopefully we >>> can figure out some of the plan at plumbers this week. >> >> Zeroing the page prevents leaking host data (see function docstring for >> kvm_gmem_prepare_folio() introduced in [1]), so we definitely don't want >> to introduce a kernel data leak bug here. >> >> In-place conversion does require preservation of data, so for >> conversions, shall we zero depending on VM type? >> >> + Gunyah: don't zero since ->prepare_inaccessible() is a no-op >> + pKVM: don't zero >> + TDX: don't zero >> + SEV: AMD Architecture Programmers Manual 7.10.6 says there is no >> automatic encryption and implies no zeroing, hence perform zeroing >> + KVM_X86_SW_PROTECTED_VM: Doesn't have a formal definition so I guess >> we could require zeroing on transition? > > Maybe for KVM_X86_SW_PROTECTED_VM we could make zero-ing configurable > via some CREATE_GUEST_MEMFD flag, instead of forcing one specific > behavior. Sounds good to me, I can set up a flag in the next revision. > For the "non-CoCo with direct map entries removed" VMs that we at AWS > are going for, we'd like a VM type with host-controlled in-place > conversions which doesn't zero on transitions, so if > KVM_X86_SW_PROTECTED_VM ends up zeroing, we'd need to add another new VM > type for that. > > Somewhat related sidenote: For VMs that allow inplace conversions and do > not zero, we do not need to zap the stage-2 mappings on memory attribute > changes, right? > Here are some reasons for zapping I can think of: 1. When private pages are split/merged, zapping the stage-2 mappings on memory attribute changes allows the private pages to be re-faulted by KVM at smaller/larger granularity. 2. The rationale described here https://elixir.bootlin.com/linux/v6.11.2/source/arch/x86/kvm/mmu/mmu.c#L7482 ("Zapping SPTEs in this case ensures KVM will reassess whether or not a hugepage can be used for affected ranges.") probably refers to the existing implementation, when a different set of physical pages is used to back shared and private memory. When the same set of physical pages is used for both shared and private memory, then IIUC this rationale does not apply. 3. There's another rationale for zapping https://elixir.bootlin.com/linux/v6.11.2/source/virt/kvm/kvm_main.c#L2494 to do with read vs write mappings here. I don't fully understand this, does this rationale still apply? 4. Is zapping required if the pages get removed/added to kernel direct map? >> This way, the uptodate flag means that it has been prepared (as in >> sev_gmem_prepare()), and zeroed if required by VM type. >> >> Regarding flushing the dcache/tlb in your other question [2], if we >> don't use folio_zero_user(), can we relying on unmapping within core-mm >> to flush after shared use, and unmapping within KVM To flush after >> private use? >> >> Or should flush_dcache_folio() be explicitly called on kvm_gmem_fault()? >> >> clear_highpage(), used in the non-hugetlb (original) path, doesn't flush >> the dcache. Was that intended? >> >>> Thanks, >>> Elliot >>> >> >> [1] https://lore.kernel.org/all/20240726185157.72821-8-pbonz...@redhat.com/ >> [2] >> https://lore.kernel.org/all/diqz34ldszp3@ackerleytng-ctop.c.googlers.com/ > > Best, > P
Re: [PATCH v8 8/8] clk: Add KUnit tests for clks registered with struct clk_parent_data
Quoting Guenter Roeck (2024-10-03 21:52:09) > On 10/3/24 17:42, Stephen Boyd wrote: > > > > Can you please describe how you run the kunit test? And provide the qemu > > command you run to boot arm64 with acpi? > > > > Example command line: > > qemu-system-aarch64 -M virt -m 512 \ > -kernel arch/arm64/boot/Image -no-reboot -nographic \ > -snapshot \ > -bios /opt/buildbot/rootfs/arm64/../firmware/QEMU_EFI-aarch64.fd \ > -device virtio-blk-device,drive=d0 \ > -drive file=rootfs.ext2,if=none,id=d0,format=raw \ > -cpu cortex-a57 -serial stdio -monitor none -no-reboot \ > --append "kunit.stats_enabled=2 kunit.filter=speed>slow root=/dev/vda > rootwait earlycon=pl011,0x900 console=ttyAMA0" > > That works fine for me. Configuration is arm64 defconfig plus various > debug and kunit options. I built the efi image myself from sources. > The root file system is from buildroot with modified init script. > kunit tests are all built into the kernel and run during boot. Thanks. I figured out that I was missing enabling CONFIG_ACPI. Here's my commandline ./tools/testing/kunit/kunit.py run --arch=arm64 \ --kunitconfig=drivers/of \ --qemu_args="-bios /usr/share/qemu-efi-aarch64/QEMU_EFI.fd -smp 2" \ --kconfig_add="CONFIG_ACPI=y" \ --kernel_args="earlycon=pl011,0x900" Now I can boot and reproduce the failure, but there's another problem. ACPI disables itself when it fails to find tables. ACPI: Unable to load the System Description Tables This calls disable_acpi() which sets acpi_disabled to 1. This happens before the unit test runs, meaning we can't reliably use 'acpi_disabled' as a method to skip. The best I can come up with then is to test for a NULL of_root when CONFIG_ARM64 and CONFIG_ACPI are enabled, because the tests intentionally don't work when both those configs are enabled and the 'of_root' isn't populated. In all other cases the 'of_root' missing is a bug. I'll probably make this into some sort of kunit helper function in of_private.h and send it to DT maintainers. ---8< diff --git a/drivers/of/of_kunit_helpers.c b/drivers/of/of_kunit_helpers.c index 287d6c91bb37..a1330e183230 100644 --- a/drivers/of/of_kunit_helpers.c +++ b/drivers/of/of_kunit_helpers.c @@ -36,6 +36,9 @@ int of_overlay_fdt_apply_kunit(struct kunit *test, void *overlay_fdt, int ret; int *copy_id; + if (IS_ENABLED(CONFIG_ARM64) && IS_ENABLED(CONFIG_ACPI) && !of_root) + kunit_skip(test, "arm64+acpi rejects overlays"); + copy_id = kunit_kmalloc(test, sizeof(*copy_id), GFP_KERNEL); if (!copy_id) return -ENOMEM; diff --git a/drivers/of/of_test.c b/drivers/of/of_test.c index c85a258bc6ae..6cca43bf8029 100644 --- a/drivers/of/of_test.c +++ b/drivers/of/of_test.c @@ -38,6 +38,8 @@ static int of_dtb_test_init(struct kunit *test) { if (!IS_ENABLED(CONFIG_OF_EARLY_FLATTREE)) kunit_skip(test, "requires CONFIG_OF_EARLY_FLATTREE"); + if (IS_ENABLED(CONFIG_ARM64) && IS_ENABLED(CONFIG_ACPI) && !of_root) + kunit_skip(test, "arm64+acpi doesn't populate a root node on ACPI systems"); return 0; } diff --git a/drivers/of/overlay_test.c b/drivers/of/overlay_test.c index 19a292cdeee3..3e7ac97a6796 100644 --- a/drivers/of/overlay_test.c +++ b/drivers/of/overlay_test.c @@ -64,6 +64,8 @@ static void of_overlay_apply_kunit_cleanup(struct kunit *test) if (!IS_ENABLED(CONFIG_OF_EARLY_FLATTREE)) kunit_skip(test, "requires CONFIG_OF_EARLY_FLATTREE for root node"); + if (IS_ENABLED(CONFIG_ARM64) && IS_ENABLED(CONFIG_ACPI) && !of_root) + kunit_skip(test, "arm64+acpi rejects overlays"); kunit_init_test(&fake, "fake test", NULL); KUNIT_ASSERT_EQ(test, fake.status, KUNIT_SUCCESS);
Re: [PATCH v8 8/8] clk: Add KUnit tests for clks registered with struct clk_parent_data
On 10/8/24 16:12, Stephen Boyd wrote: Quoting Guenter Roeck (2024-10-03 21:52:09) On 10/3/24 17:42, Stephen Boyd wrote: Can you please describe how you run the kunit test? And provide the qemu command you run to boot arm64 with acpi? Example command line: qemu-system-aarch64 -M virt -m 512 \ -kernel arch/arm64/boot/Image -no-reboot -nographic \ -snapshot \ -bios /opt/buildbot/rootfs/arm64/../firmware/QEMU_EFI-aarch64.fd \ -device virtio-blk-device,drive=d0 \ -drive file=rootfs.ext2,if=none,id=d0,format=raw \ -cpu cortex-a57 -serial stdio -monitor none -no-reboot \ --append "kunit.stats_enabled=2 kunit.filter=speed>slow root=/dev/vda rootwait earlycon=pl011,0x900 console=ttyAMA0" That works fine for me. Configuration is arm64 defconfig plus various debug and kunit options. I built the efi image myself from sources. The root file system is from buildroot with modified init script. kunit tests are all built into the kernel and run during boot. Thanks. I figured out that I was missing enabling CONFIG_ACPI. Here's my commandline ./tools/testing/kunit/kunit.py run --arch=arm64 \ --kunitconfig=drivers/of \ --qemu_args="-bios /usr/share/qemu-efi-aarch64/QEMU_EFI.fd -smp 2" \ --kconfig_add="CONFIG_ACPI=y" \ --kernel_args="earlycon=pl011,0x900" Now I can boot and reproduce the failure, but there's another problem. ACPI disables itself when it fails to find tables. ACPI: Unable to load the System Description Tables This calls disable_acpi() which sets acpi_disabled to 1. This happens before the unit test runs, meaning we can't reliably use 'acpi_disabled' as a method to skip. The best I can come up with then is to test for a NULL of_root when CONFIG_ARM64 and CONFIG_ACPI are enabled, because the tests intentionally don't work when both those configs are enabled and the 'of_root' isn't populated. In all other cases the 'of_root' missing is a bug. I'll probably make this into some sort of kunit helper function in of_private.h and send it to DT maintainers. Sounds good. Thanks a lot for tracking this down. That makes me wonder though why only arm64 has that restriction. Both riscv and loongarch have ACPI enabled in their defconfig files but call unflatten_device_tree() unconditionally. Oh well ... Thanks, Guenter
Re: [PATCH v3] selftests: sched_ext: Add sched_ext as proper selftest target
On Tue, Oct 08, 2024 at 05:35:18PM +0200, Björn Töpel wrote: > From: Björn Töpel > > The sched_ext selftests is missing proper cross-compilation support, a > proper target entry, and out-of-tree build support. > > When building the kselftest suite, e.g.: > > make ARCH=riscv CROSS_COMPILE=riscv64-linux-gnu- \ > TARGETS=sched_ext SKIP_TARGETS="" O=/output/foo \ > -C tools/testing/selftests install > > or: > > make ARCH=arm64 LLVM=1 TARGETS=sched_ext SKIP_TARGETS="" \ > O=/output/foo -C tools/testing/selftests install > > The expectation is that the sched_ext is included, cross-built, the > correct toolchain is picked up, and placed into /output/foo. > > In contrast to the BPF selftests, the sched_ext suite does not use > bpftool at test run-time, so it is sufficient to build bpftool for the > build host only. > > Add ARCH, CROSS_COMPILE, OUTPUT, and TARGETS support to the sched_ext > selftest. Also, remove some variables that were unused by the > Makefile. > > Signed-off-by: Björn Töpel Thanks for the improvements! Acked-by: David Vernet signature.asc Description: PGP signature
Re: [PATCH] selftests: make kselftest-clean remove libynl outputs
On Sat, Oct 5, 2024 at 2:56 PM Greg Thelen wrote: > > Starting with 6.12 commit 85585b4bc8d8 ("selftests: add ncdevmem, netcat > for devmem TCP") kselftest-all creates additional outputs that > kselftest-clean does not cleanup: > $ make defconfig > $ make kselftest-all > $ make kselftest-clean > $ git clean -ndxf | grep tools/net > Would remove tools/net/ynl/lib/__pycache__/ > Would remove tools/net/ynl/lib/ynl.a > Would remove tools/net/ynl/lib/ynl.d > Would remove tools/net/ynl/lib/ynl.o > > Make kselftest-clean remove the newly added net/ynl outputs. > > Fixes: 85585b4bc8d8 ("selftests: add ncdevmem, netcat for devmem TCP") Probably moot since this is merged already (sorry for the delay), but likely it's: Fixes: 07c3cc51a085 ("tools: net: package libynl for use in selftests") The referenced patch doesn't touch ynl.mk, but adds a dependency on it > Signed-off-by: Greg Thelen Thank you Greg, Reviewed-by: Mina Almasry -- Thanks, Mina
Re: [PATCH 3/3] rcu: Report callbacks enqueued on offline CPU blind spot
On Wed, Oct 02, 2024 at 05:00:03PM +0200, Frederic Weisbecker wrote: > Le Wed, Oct 02, 2024 at 04:57:38PM +0200, Frederic Weisbecker a écrit : > > Callbacks enqueued after rcutree_report_cpu_dead() fall into RCU barrier > > blind spot. Report any potential misuse. > > > > Reported-by: Paul E. McKenney > > Signed-off-by: Frederic Weisbecker > > --- > > kernel/rcu/tree.c | 3 +++ > > 1 file changed, 3 insertions(+) > > > > diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c > > index a60616e69b66..36070b6bf4a1 100644 > > --- a/kernel/rcu/tree.c > > +++ b/kernel/rcu/tree.c > > @@ -3084,8 +3084,11 @@ __call_rcu_common(struct rcu_head *head, > > rcu_callback_t func, bool lazy_in) > > head->func = func; > > head->next = NULL; > > kasan_record_aux_stack_noalloc(head); > > + > > local_irq_save(flags); > > rdp = this_cpu_ptr(&rcu_data); > > + RCU_LOCKDEP_WARN(rcu_rdp_cpu_online(rdp), "Callback enqueued on offline > > CPU!"); > > This should be !rcu_rdp_cpu_online(rdp) > > Sigh... I am pulling this in for testing with this change, thank you! Thanx, Paul > > + > > lazy = lazy_in && !rcu_async_should_hurry(); > > > > /* Add the callback to our list. */ > > -- > > 2.46.0 > > > >
Re: [PATCH net-next v8 03/24] ovpn: add basic netlink support
On 08/10/2024 10:58, Jiri Pirko wrote: Tue, Oct 08, 2024 at 10:01:40AM CEST, anto...@openvpn.net wrote: Hi, On 07/10/24 17:32, Jiri Pirko wrote: Wed, Oct 02, 2024 at 11:02:17AM CEST, anto...@openvpn.net wrote: [...] +operations: + list: +- + name: dev-new + attribute-set: ovpn + flags: [ admin-perm ] + doc: Create a new interface of type ovpn + do: +request: + attributes: +- ifname +- mode +reply: + attributes: +- ifname +- ifindex +- + name: dev-del Why you expose new and del here in ovn specific generic netlink iface? Why can't you use the exising RTNL api which is used for creation and destruction of other types of devices? That was my original approach in v1, but it was argued that an ovpn interface needs a userspace program to be configured and used in a meaningful way, therefore it was decided to concentrate all iface mgmt APIs along with the others in the netlink family and to not expose any RTNL ops. Can you please point me to the message id? from Sergey and subsequent replies. RTNL vs NL topic starts right after the definition of 'ovpn_link_ops' Recently Kuniyuki commented on this topic as well in: <20240919055259.17622-1-kun...@amazon.com> and that is why I added a default dellink implemetation. However, recently we decided to add a dellink implementation for better integration with network namespaces and to allow the user to wipe a dangling interface. Hmm, one more argument to have symmetric add/del impletentation in RTNL In the future we are planning to also add the possibility to create a "persistent interface", that is an interface created before launching any userspace program and that survives when the latter is stopped. I can guess this functionality may be better suited for RTNL, but I am not sure yet. That would be quite confusing to have RTNL and genetlink iface to add/del device. From what you described above, makes more sent to have it just in RTNL All in all I tend to agree. @Jiri: do you have any particular opinion why we should use RTNL ops and not netlink for creating/destroying interfaces? I feel this is mostly a matter of taste, but maybe there are technical reasons we should consider. Well. technically, you can probabaly do both. But it is quite common that you can add/delete these kind of devices over RTNL. Lots of examples. People are used to it, aligns with existing flows. The only counterargument I see is the one brought by Sergey: "the ovpn interface is not usable after creation, if no openvpn process is running". However, allowing to create "persistent interfaces" will define a use-case for having an ovpn device without any userspace process. @Sergey what is your opinion here? I am not sure persistent interfaces were discussed at the time you brought your point about RTNL vs NL. Regards, Thanks a lot for your contribution. Regards, ip link add [link DEV | parentdev NAME] [ name ] NAME [ txqueuelen PACKETS ] [ address LLADDR ] [ broadcast LLADDR ] [ mtu MTU ] [index IDX ] [ numtxqueues QUEUE_COUNT ] [ numrxqueues QUEUE_COUNT ] [ netns { PID | NETNSNAME | NETNSFILE } ] type TYPE [ ARGS ] ip link delete { DEVICE | dev DEVICE | group DEVGROUP } type TYPE [ ARGS ] Lots of examples of existing types creation is for example here: https://developers.redhat.com/blog/2018/10/22/introduction-to-linux-interfaces-for-virtual-networking + attribute-set: ovpn + flags: [ admin-perm ] + doc: Delete existing interface of type ovpn + do: +pre: ovpn-nl-pre-doit +post: ovpn-nl-post-doit +request: + attributes: +- ifindex [...] -- Antonio Quartulli OpenVPN Inc. -- Antonio Quartulli OpenVPN Inc.
[PATCH V2 4/4] selftests/mm: skip virtual_address_range tests on riscv
RISC-V doesn't currently have the behavior of restricting the virtual address space which virtual_address_range tests check, this will cause the tests fail. So lets disable the whole test suite for riscv64 for now, not build it and run_vmtests.sh will skip it if it is not present. Reviewed-by: Charlie Jenkins Signed-off-by: Chunyan Zhang --- V1: https://lore.kernel.org/linux-mm/ZuOuedBpS7i3T%2Fo0@ghost/T/ --- tools/testing/selftests/mm/Makefile | 2 ++ tools/testing/selftests/mm/run_vmtests.sh | 10 ++ 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/tools/testing/selftests/mm/Makefile b/tools/testing/selftests/mm/Makefile index 02e1204971b0..76a378c5c141 100644 --- a/tools/testing/selftests/mm/Makefile +++ b/tools/testing/selftests/mm/Makefile @@ -115,7 +115,9 @@ endif ifneq (,$(filter $(ARCH),arm64 mips64 parisc64 powerpc riscv64 s390x sparc64 x86_64 s390)) TEST_GEN_FILES += va_high_addr_switch +ifneq ($(ARCH),riscv64) TEST_GEN_FILES += virtual_address_range +endif TEST_GEN_FILES += write_to_hugetlbfs endif diff --git a/tools/testing/selftests/mm/run_vmtests.sh b/tools/testing/selftests/mm/run_vmtests.sh index c5797ad1d37b..4493bfd1911c 100755 --- a/tools/testing/selftests/mm/run_vmtests.sh +++ b/tools/testing/selftests/mm/run_vmtests.sh @@ -347,10 +347,12 @@ if [ $VADDR64 -ne 0 ]; then # allows high virtual address allocation requests independent # of platform's physical memory. - prev_policy=$(cat /proc/sys/vm/overcommit_memory) - echo 1 > /proc/sys/vm/overcommit_memory - CATEGORY="hugevm" run_test ./virtual_address_range - echo $prev_policy > /proc/sys/vm/overcommit_memory + if [ -x ./virtual_address_range ]; then + prev_policy=$(cat /proc/sys/vm/overcommit_memory) + echo 1 > /proc/sys/vm/overcommit_memory + CATEGORY="hugevm" run_test ./virtual_address_range + echo $prev_policy > /proc/sys/vm/overcommit_memory + fi # va high address boundary switch test ARCH_ARM64="arm64" -- 2.34.1
[PATCH V2 0/4] A few fixes for RISC-V
These patches are all simple fixes with no strong dependency though, I hope that making them a patchset will be more convenient for merge. The patchset are based on v6.12-rc2. Chunyan Zhang (4): riscv: Remove unused GENERATING_ASM_OFFSETS riscv: Remove duplicated GET_RM selftest/mm: Fix typo in virtual_address_range selftests/mm: skip virtual_address_range tests on riscv arch/riscv/kernel/asm-offsets.c| 2 -- arch/riscv/kernel/traps_misaligned.c | 2 -- tools/testing/selftests/mm/Makefile| 2 ++ tools/testing/selftests/mm/run_vmtests.sh | 10 ++ tools/testing/selftests/mm/virtual_address_range.c | 4 ++-- 5 files changed, 10 insertions(+), 10 deletions(-) -- 2.34.1
[PATCH V2 3/4] selftest/mm: Fix typo in virtual_address_range
The function name should be *hint* address, so correct it. Reviewed-by: Charlie Jenkins Signed-off-by: Chunyan Zhang --- V1: https://lore.kernel.org/linux-mm/ZuOuedBpS7i3T%2Fo0@ghost/T/ --- tools/testing/selftests/mm/virtual_address_range.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/testing/selftests/mm/virtual_address_range.c b/tools/testing/selftests/mm/virtual_address_range.c index 4e4c1e311247..2a2b69e91950 100644 --- a/tools/testing/selftests/mm/virtual_address_range.c +++ b/tools/testing/selftests/mm/virtual_address_range.c @@ -64,7 +64,7 @@ #define NR_CHUNKS_HIGH NR_CHUNKS_384TB #endif -static char *hind_addr(void) +static char *hint_addr(void) { int bits = HIGH_ADDR_SHIFT + rand() % (63 - HIGH_ADDR_SHIFT); @@ -185,7 +185,7 @@ int main(int argc, char *argv[]) } for (i = 0; i < NR_CHUNKS_HIGH; i++) { - hint = hind_addr(); + hint = hint_addr(); hptr[i] = mmap(hint, MAP_CHUNK_SIZE, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); -- 2.34.1
[PATCH V2 1/4] riscv: Remove unused GENERATING_ASM_OFFSETS
The macro is not used in the current version of kernel, it looks like can be removed to avoid a build warning: ../arch/riscv/kernel/asm-offsets.c: At top level: ../arch/riscv/kernel/asm-offsets.c:7: warning: macro "GENERATING_ASM_OFFSETS" is not used [-Wunused-macros] 7 | #define GENERATING_ASM_OFFSETS Fixes: 9639a44394b9 ("RISC-V: Provide a cleaner raw_smp_processor_id()") Reviewed-by: Alexandre Ghiti Tested-by: Alexandre Ghiti Signed-off-by: Chunyan Zhang --- V1: https://lore.kernel.org/lkml/20240816101912.1049329-1-zhangchun...@iscas.ac.cn/T/ --- arch/riscv/kernel/asm-offsets.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/arch/riscv/kernel/asm-offsets.c b/arch/riscv/kernel/asm-offsets.c index e94180ba432f..c2f3129a8e5c 100644 --- a/arch/riscv/kernel/asm-offsets.c +++ b/arch/riscv/kernel/asm-offsets.c @@ -4,8 +4,6 @@ * Copyright (C) 2017 SiFive */ -#define GENERATING_ASM_OFFSETS - #include #include #include -- 2.34.1
[PATCH V2 2/4] riscv: Remove duplicated GET_RM
The macro GET_RM defined twice in this file, one can be removed. Reviewed-by: Alexandre Ghiti Signed-off-by: Chunyan Zhang --- V1: https://lore.kernel.org/lkml/20240909095557.446745-1-zhangchun...@iscas.ac.cn/T/ --- arch/riscv/kernel/traps_misaligned.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/arch/riscv/kernel/traps_misaligned.c b/arch/riscv/kernel/traps_misaligned.c index d4fd8af7aaf5..1b9867136b61 100644 --- a/arch/riscv/kernel/traps_misaligned.c +++ b/arch/riscv/kernel/traps_misaligned.c @@ -136,8 +136,6 @@ #define REG_PTR(insn, pos, regs) \ (ulong *)((ulong)(regs) + REG_OFFSET(insn, pos)) -#define GET_RM(insn) (((insn) >> 12) & 7) - #define GET_RS1(insn, regs)(*REG_PTR(insn, SH_RS1, regs)) #define GET_RS2(insn, regs)(*REG_PTR(insn, SH_RS2, regs)) #define GET_RS1S(insn, regs) (*REG_PTR(RVC_RS1S(insn), 0, regs)) -- 2.34.1
Re: [PATCH 11/20] sched: Handle CPU isolation on last resort fallback rq selection
On Fri, Sep 27, 2024 at 12:48:59AM +0200, Frederic Weisbecker wrote: > When a kthread or any other task has an affinity mask that is fully > offline or unallowed, the scheduler reaffines the task to all possible > CPUs as a last resort. > > This default decision doesn't mix up very well with nohz_full CPUs that > are part of the possible cpumask but don't want to be disturbed by > unbound kthreads or even detached pinned user tasks. > > Make the fallback affinity setting aware of nohz_full. This applies to > all architectures supporting nohz_full except arm32. However this > architecture that overrides the task possible mask is unlikely to be > willing to integrate new development. I'm not sure I understand this last sentence. The possible mask is overridden for 32-bit tasks on an *arm64* kernel when running on an SoC featuring some CPUs that can execute only 64-bit tasks. Who is unwilling to integrate what? Will
Re: [PATCH 11/20] sched: Handle CPU isolation on last resort fallback rq selection
Le Tue, Oct 08, 2024 at 11:54:35AM +0100, Will Deacon a écrit : > On Fri, Sep 27, 2024 at 12:48:59AM +0200, Frederic Weisbecker wrote: > > When a kthread or any other task has an affinity mask that is fully > > offline or unallowed, the scheduler reaffines the task to all possible > > CPUs as a last resort. > > > > This default decision doesn't mix up very well with nohz_full CPUs that > > are part of the possible cpumask but don't want to be disturbed by > > unbound kthreads or even detached pinned user tasks. > > > > Make the fallback affinity setting aware of nohz_full. This applies to > > all architectures supporting nohz_full except arm32. However this > > architecture that overrides the task possible mask is unlikely to be > > willing to integrate new development. > > I'm not sure I understand this last sentence. The possible mask is > overridden for 32-bit tasks on an *arm64* kernel when running on an SoC > featuring some CPUs that can execute only 64-bit tasks. Who is unwilling > to integrate what? Right I've been lazy on that, assuming that nohz_full is a niche, and nohz_full on arm 32 bits tasks must be even more a niche. But I can make it a macro just like task_cpu_possible_mask() so that architectures can override it? Thanks.
Re: [PATCH 1/1] s390/virtio_ccw: fix dma_parm pointer not set up
On Mon, Oct 07, 2024 at 10:10:30PM +0200, Halil Pasic wrote: > At least since commit 334304ac2bac ("dma-mapping: don't return errors > from dma_set_max_seg_size") setting up device.dma_parms is basically > mandated by the DMA API. As of now Channel (CCW) I/O in general does not > utilize the DMA API, except for virtio. For virtio-ccw however the > common virtio DMA infrastructure is such that most of the DMA stuff > hinges on the virtio parent device, which is a CCW device. > > So lets set up the dma_parms pointer for the CCW parent device and hope > for the best! > > Signed-off-by: Halil Pasic > Fixes: 334304ac2bac ("dma-mapping: don't return errors from > dma_set_max_seg_size") > Reported-by: "Marc Hartmayer" > Closes: https://bugzilla.linux.ibm.com/show_bug.cgi?id=209131 > Reviewed-by: Eric Farman > --- Applied, thanks!
Re: [PATCH 1/1] s390/virtio_ccw: fix dma_parm pointer not set up
On Tue, 08 Oct 2024 10:47:48 +0200 "Marc Hartmayer" wrote: > > Closes: https://bugzilla.linux.ibm.com/show_bug.cgi?id=209131 > > I guess, this line can be removed as it’s internal only. checkpatch.pl complains about the Reported-by if I do. It does not complain about Closes: N/A but if I read the process documentation correctly if the report is not available on the web Closes should be omitted: """ Using Reported-by:, Tested-by:, Reviewed-by:, Suggested-by: and Fixes: -- The Reported-by tag gives credit to people who find bugs and report them and it hopefully inspires them to help us again in the future. The tag is intended for bugs; please do not use it to credit feature requests. The tag should be followed by a Closes: tag pointing to the report, unless the report is not available on the web. """ So I guess I have to make peace with getting checkpatch warnings when I give credits to the reporter for reports not available on the web. Regards, Halil
Re: 6.12-rc1: Lockdep regression bissected (virtio-net/console/scheduler)
On 2024-10-04, Petr Mladek wrote: > On Fri 2024-10-04 02:08:52, Breno Leitao wrote: >> = >> WARNING: HARDIRQ-safe -> HARDIRQ-unsafe lock order detected >> 6.12.0-rc1-kbuilder-virtme-00033-gd4ac164bde7a #50 Not tainted >> - >> swapper/0/1 [HC0[0]:SC0[0]:HE0:SE1] is trying to acquire: >> ff1100010a260518 (_xmit_ETHER#2){+.-.}-{2:2}, at: virtnet_poll_tx >> (./include/linux/netdevice.h:4361 drivers/net/virtio_net.c:2969) >> >> and this task is already holding: >> 86f2b5b8 (target_list_lock){}-{2:2}, at: write_ext_msg >> (drivers/net/netconsole.c:?) >> which would create a new lock dependency: >>(target_list_lock){}-{2:2} -> (_xmit_ETHER#2){+.-.}-{2:2} >> >> but this new dependency connects a HARDIRQ-irq-safe lock: >>(console_owner){-...}-{0:0} ... >> to a HARDIRQ-irq-unsafe lock: >>(_xmit_ETHER#2){+.-.}-{2:2} ... >> other info that might help us debug this: >> >> Chain exists of: >> console_owner --> target_list_lock --> _xmit_ETHER#2 >> >>Possible interrupt unsafe locking scenario: >> >> CPU0CPU1 >> >> lock(_xmit_ETHER#2); >> local_irq_disable(); >> lock(console_owner); >> lock(target_list_lock); >> >> lock(console_owner); I can trigger this lockdep splat on v6.11 as well. It only requires a printk() call within any interrupt handler, sometime after the netconsole is initialized and has had at least one run from softirq context. > My understanding is that the fix is to always take "_xmit_ETHER#2" > lock with interrupts disabled. That seems to be one possible solution. But maybe there is reasoning why that should not be done. (??) Right now it is clearly a spinlock that is being taken from both interrupt and softirq contexts and does not disable interrupts. I will check if there is some previous kernel release where this problem does not exist. John Ogness
Re: [PATCH net v2 0/3] selftests: net: add missing gitignore and EXTRA_CLEAN entries.
Hello: This series was applied to netdev/net.git (main) by Jakub Kicinski : On Sat, 05 Oct 2024 07:29:39 +0200 you wrote: > This series is a cherry-pick on top of v6.12-rc1 from the one I sent > for selftests with other patches that were not net-related: > > https://lore.kernel.org/all/20240925-selftests-gitignore-v3-0-9db896474...@gmail.com/ > > The patches have not been modified, and the Reviewed-by tags have > been kept. > > [...] Here is the summary with links: - [net,v2,1/3] selftests: net: add msg_oob to gitignore (no matching commit) - [net,v2,2/3] selftests: net: rds: add include.sh to EXTRA_CLEAN https://git.kernel.org/netdev/net/c/4227b50cff05 - [net,v2,3/3] selftests: net: rds: add gitignore file for include.sh https://git.kernel.org/netdev/net/c/0e43a5a7b253 You are awesome, thank you! -- Deet-doot-dot, I am a bot. https://korg.docs.kernel.org/patchwork/pwbot.html
[PATCH v3] selftests: sched_ext: Add sched_ext as proper selftest target
From: Björn Töpel The sched_ext selftests is missing proper cross-compilation support, a proper target entry, and out-of-tree build support. When building the kselftest suite, e.g.: make ARCH=riscv CROSS_COMPILE=riscv64-linux-gnu- \ TARGETS=sched_ext SKIP_TARGETS="" O=/output/foo \ -C tools/testing/selftests install or: make ARCH=arm64 LLVM=1 TARGETS=sched_ext SKIP_TARGETS="" \ O=/output/foo -C tools/testing/selftests install The expectation is that the sched_ext is included, cross-built, the correct toolchain is picked up, and placed into /output/foo. In contrast to the BPF selftests, the sched_ext suite does not use bpftool at test run-time, so it is sufficient to build bpftool for the build host only. Add ARCH, CROSS_COMPILE, OUTPUT, and TARGETS support to the sched_ext selftest. Also, remove some variables that were unused by the Makefile. Signed-off-by: Björn Töpel --- v3: * Always build a build host version of bpftool (Mark) * Make sure LLVM style "ARCH only" cross-build works (Mark) v2: * Removed the duplicated LLVM prefix parsing (David) * Made sure make clean didn't do a complete mess (David) * Added sched_ext to default skip (Shuah) --- tools/testing/selftests/Makefile | 9 +-- tools/testing/selftests/sched_ext/Makefile | 73 ++ 2 files changed, 38 insertions(+), 44 deletions(-) diff --git a/tools/testing/selftests/Makefile b/tools/testing/selftests/Makefile index b38199965f99..363d031a16f7 100644 --- a/tools/testing/selftests/Makefile +++ b/tools/testing/selftests/Makefile @@ -88,6 +88,7 @@ TARGETS += rlimits TARGETS += rseq TARGETS += rtc TARGETS += rust +TARGETS += sched_ext TARGETS += seccomp TARGETS += sgx TARGETS += sigaltstack @@ -129,10 +130,10 @@ ifeq ($(filter net/lib,$(TARGETS)),) endif endif -# User can optionally provide a TARGETS skiplist. By default we skip -# BPF since it has cutting edge build time dependencies which require -# more effort to install. -SKIP_TARGETS ?= bpf +# User can optionally provide a TARGETS skiplist. By default we skip +# targets using BPF since it has cutting edge build time dependencies +# which require more effort to install. +SKIP_TARGETS ?= bpf sched_ext ifneq ($(SKIP_TARGETS),) TMP := $(filter-out $(SKIP_TARGETS), $(TARGETS)) override TARGETS := $(TMP) diff --git a/tools/testing/selftests/sched_ext/Makefile b/tools/testing/selftests/sched_ext/Makefile index 0754a2c110a1..06ae9c107049 100644 --- a/tools/testing/selftests/sched_ext/Makefile +++ b/tools/testing/selftests/sched_ext/Makefile @@ -3,24 +3,13 @@ include ../../../build/Build.include include ../../../scripts/Makefile.arch include ../../../scripts/Makefile.include + +TEST_GEN_PROGS := runner + +# override lib.mk's default rules +OVERRIDE_TARGETS := 1 include ../lib.mk -ifneq ($(LLVM),) -ifneq ($(filter %/,$(LLVM)),) -LLVM_PREFIX := $(LLVM) -else ifneq ($(filter -%,$(LLVM)),) -LLVM_SUFFIX := $(LLVM) -endif - -CC := $(LLVM_PREFIX)clang$(LLVM_SUFFIX) $(CLANG_FLAGS) -fintegrated-as -else -CC := gcc -endif # LLVM - -ifneq ($(CROSS_COMPILE),) -$(error CROSS_COMPILE not supported for scx selftests) -endif # CROSS_COMPILE - CURDIR := $(abspath .) REPOROOT := $(abspath ../../../..) TOOLSDIR := $(REPOROOT)/tools @@ -34,18 +23,23 @@ GENHDR := $(GENDIR)/autoconf.h SCXTOOLSDIR := $(TOOLSDIR)/sched_ext SCXTOOLSINCDIR := $(TOOLSDIR)/sched_ext/include -OUTPUT_DIR := $(CURDIR)/build +OUTPUT_DIR := $(OUTPUT)/build OBJ_DIR := $(OUTPUT_DIR)/obj INCLUDE_DIR := $(OUTPUT_DIR)/include BPFOBJ_DIR := $(OBJ_DIR)/libbpf SCXOBJ_DIR := $(OBJ_DIR)/sched_ext BPFOBJ := $(BPFOBJ_DIR)/libbpf.a LIBBPF_OUTPUT := $(OBJ_DIR)/libbpf/libbpf.a -DEFAULT_BPFTOOL := $(OUTPUT_DIR)/sbin/bpftool -HOST_BUILD_DIR := $(OBJ_DIR) -HOST_OUTPUT_DIR := $(OUTPUT_DIR) -VMLINUX_BTF_PATHS ?= ../../../../vmlinux \ +DEFAULT_BPFTOOL := $(OUTPUT_DIR)/host/sbin/bpftool +HOST_OBJ_DIR := $(OBJ_DIR)/host/bpftool +HOST_LIBBPF_OUTPUT := $(OBJ_DIR)/host/libbpf/ +HOST_LIBBPF_DESTDIR := $(OUTPUT_DIR)/host/ +HOST_DESTDIR := $(OUTPUT_DIR)/host/ + +VMLINUX_BTF_PATHS ?= $(if $(O),$(O)/vmlinux) \ +$(if $(KBUILD_OUTPUT),$(KBUILD_OUTPUT)/vmlinux) \ +../../../../vmlinux \ /sys/kernel/btf/vmlinux \ /boot/vmlinux-$(shell uname -r) VMLINUX_BTF ?= $(abspath $(firstword $(wildcard $(VMLINUX_BTF_PATHS @@ -80,17 +74,23 @@ IS_LITTLE_ENDIAN = $(shell $(CC) -dM -E - &1 \ +$(shell $(1) $(2) -v -E - &1 \ | sed -n '/<...> search starts here:/,/End of search list./{ s| \(/.*\)|-idirafter \1|p }') \ -$(shell $(1) -dM -E -