date:20241008

To avoid duplication between host programs, move the crc32 code to a
shared header file.

Suggested-by: Petr Pavlu 
Signed-off-by: Sami Tolvanen 
Acked-by: Neal Gompa 
---
 scripts/genksyms/genksyms.c | 77 +-
 scripts/include/crc32.h | 93 +
 2 files changed, 94 insertions(+), 76 deletions(-)
 create mode 100644 scripts/include/crc32.h

diff --git a/scripts/genksyms/genksyms.c b/scripts/genksyms/genksyms.c
index f3901c55df23..2885bbcb9eec 100644
--- a/scripts/genksyms/genksyms.c
+++ b/scripts/genksyms/genksyms.c
@@ -18,6 +18,7 @@
 #include 
 #include 
 
+#include 
 #include "genksyms.h"
 /*--*/
 
@@ -58,82 +59,6 @@ static struct string_list *mk_node(const char *string);
 static void print_location(void);
 static void print_type_name(enum symbol_type type, const char *name);
 
-/*--*/
-
-static const unsigned int crctab32[] = {
-   0xU, 0x77073096U, 0xee0e612cU, 0x990951baU, 0x076dc419U,
-   0x706af48fU, 0xe963a535U, 0x9e6495a3U, 0x0edb8832U, 0x79dcb8a4U,
-   0xe0d5e91eU, 0x97d2d988U, 0x09b64c2bU, 0x7eb17cbdU, 0xe7b82d07U,
-   0x90bf1d91U, 0x1db71064U, 0x6ab020f2U, 0xf3b97148U, 0x84be41deU,
-   0x1adad47dU, 0x6ddde4ebU, 0xf4d4b551U, 0x83d385c7U, 0x136c9856U,
-   0x646ba8c0U, 0xfd62f97aU, 0x8a65c9ecU, 0x14015c4fU, 0x63066cd9U,
-   0xfa0f3d63U, 0x8d080df5U, 0x3b6e20c8U, 0x4c69105eU, 0xd56041e4U,
-   0xa2677172U, 0x3c03e4d1U, 0x4b04d447U, 0xd20d85fdU, 0xa50ab56bU,
-   0x35b5a8faU, 0x42b2986cU, 0xdbbbc9d6U, 0xacbcf940U, 0x32d86ce3U,
-   0x45df5c75U, 0xdcd60dcfU, 0xabd13d59U, 0x26d930acU, 0x51de003aU,
-   0xc8d75180U, 0xbfd06116U, 0x21b4f4b5U, 0x56b3c423U, 0xcfba9599U,
-   0xb8bda50fU, 0x2802b89eU, 0x5f058808U, 0xc60cd9b2U, 0xb10be924U,
-   0x2f6f7c87U, 0x58684c11U, 0xc1611dabU, 0xb6662d3dU, 0x76dc4190U,
-   0x01db7106U, 0x98d220bcU, 0xefd5102aU, 0x71b18589U, 0x06b6b51fU,
-   0x9fbfe4a5U, 0xe8b8d433U, 0x7807c9a2U, 0x0f00f934U, 0x9609a88eU,
-   0xe10e9818U, 0x7f6a0dbbU, 0x086d3d2dU, 0x91646c97U, 0xe6635c01U,
-   0x6b6b51f4U, 0x1c6c6162U, 0x856530d8U, 0xf262004eU, 0x6c0695edU,
-   0x1b01a57bU, 0x8208f4c1U, 0xf50fc457U, 0x65b0d9c6U, 0x12b7e950U,
-   0x8bbeb8eaU, 0xfcb9887cU, 0x62dd1ddfU, 0x15da2d49U, 0x8cd37cf3U,
-   0xfbd44c65U, 0x4db26158U, 0x3ab551ceU, 0xa3bc0074U, 0xd4bb30e2U,
-   0x4adfa541U, 0x3dd895d7U, 0xa4d1c46dU, 0xd3d6f4fbU, 0x4369e96aU,
-   0x346ed9fcU, 0xad678846U, 0xda60b8d0U, 0x44042d73U, 0x33031de5U,
-   0xaa0a4c5fU, 0xdd0d7cc9U, 0x5005713cU, 0x270241aaU, 0xbe0b1010U,
-   0xc90c2086U, 0x5768b525U, 0x206f85b3U, 0xb966d409U, 0xce61e49fU,
-   0x5edef90eU, 0x29d9c998U, 0xb0d09822U, 0xc7d7a8b4U, 0x59b33d17U,
-   0x2eb40d81U, 0xb7bd5c3bU, 0xc0ba6cadU, 0xedb88320U, 0x9abfb3b6U,
-   0x03b6e20cU, 0x74b1d29aU, 0xead54739U, 0x9dd277afU, 0x04db2615U,
-   0x73dc1683U, 0xe3630b12U, 0x94643b84U, 0x0d6d6a3eU, 0x7a6a5aa8U,
-   0xe40ecf0bU, 0x9309ff9dU, 0x0a00ae27U, 0x7d079eb1U, 0xf00f9344U,
-   0x8708a3d2U, 0x1e01f268U, 0x6906c2feU, 0xf762575dU, 0x806567cbU,
-   0x196c3671U, 0x6e6b06e7U, 0xfed41b76U, 0x89d32be0U, 0x10da7a5aU,
-   0x67dd4accU, 0xf9b9df6fU, 0x8ebeeff9U, 0x17b7be43U, 0x60b08ed5U,
-   0xd6d6a3e8U, 0xa1d1937eU, 0x38d8c2c4U, 0x4fdff252U, 0xd1bb67f1U,
-   0xa6bc5767U, 0x3fb506ddU, 0x48b2364bU, 0xd80d2bdaU, 0xaf0a1b4cU,
-   0x36034af6U, 0x41047a60U, 0xdf60efc3U, 0xa867df55U, 0x316e8eefU,
-   0x4669be79U, 0xcb61b38cU, 0xbc66831aU, 0x256fd2a0U, 0x5268e236U,
-   0xcc0c7795U, 0xbb0b4703U, 0x220216b9U, 0x5505262fU, 0xc5ba3bbeU,
-   0xb2bd0b28U, 0x2bb45a92U, 0x5cb36a04U, 0xc2d7ffa7U, 0xb5d0cf31U,
-   0x2cd99e8bU, 0x5bdeae1dU, 0x9b64c2b0U, 0xec63f226U, 0x756aa39cU,
-   0x026d930aU, 0x9c0906a9U, 0xeb0e363fU, 0x72076785U, 0x05005713U,
-   0x95bf4a82U, 0xe2b87a14U, 0x7bb12baeU, 0x0cb61b38U, 0x92d28e9bU,
-   0xe5d5be0dU, 0x7cdcefb7U, 0x0bdbdf21U, 0x86d3d2d4U, 0xf1d4e242U,
-   0x68ddb3f8U, 0x1fda836eU, 0x81be16cdU, 0xf6b9265bU, 0x6fb077e1U,
-   0x18b74777U, 0x88085ae6U, 0xff0f6a70U, 0x66063bcaU, 0x11010b5cU,
-   0x8f659effU, 0xf862ae69U, 0x616bffd3U, 0x166ccf45U, 0xa00ae278U,
-   0xd70dd2eeU, 0x4e048354U, 0x3903b3c2U, 0xa7672661U, 0xd06016f7U,
-   0x4969474dU, 0x3e6e77dbU, 0xaed16a4aU, 0xd9d65adcU, 0x40df0b66U,
-   0x37d83bf0U, 0xa9bcae53U, 0xdebb9ec5U, 0x47b2cf7fU, 0x30b5ffe9U,
-   0xbdbdf21cU, 0xcabac28aU, 0x53b39330U, 0x24b4a3a6U, 0xbad03605U,
-   0xcdd70693U, 0x54de5729U, 0x23d967bfU, 0xb3667a2eU, 0xc4614ab8U,
-   0x5d681b02U, 0x2a6f2b94U, 0xb40bbe37U, 0xc30c8ea1U, 0x5a05df1bU,
-   0x2d02ef8dU
-};
-
-static unsigned long partial_crc32_one(unsigned char c, unsigned long crc)
-{
-   return crctab32[(crc ^ c) & 0xff] ^ (crc >> 8);
-}
-
-static unsigned long partial_crc32(const char *s, unsigned long crc

[PATCH v4 02/19] tools: Add gendwarfksyms

Add a basic DWARF parser, which uses libdw to traverse the debugging
information in an object file and looks for functions and variables.
In follow-up patches, this will be expanded to produce symbol versions
for CONFIG_MODVERSIONS from DWARF.

Signed-off-by: Sami Tolvanen 
Acked-by: Neal Gompa 
Reviewed-by: Petr Pavlu 
---
 kernel/module/Kconfig |   8 ++
 scripts/Makefile  |   1 +
 scripts/gendwarfksyms/.gitignore  |   2 +
 scripts/gendwarfksyms/Makefile|   8 ++
 scripts/gendwarfksyms/dwarf.c | 166 ++
 scripts/gendwarfksyms/gendwarfksyms.c | 124 +++
 scripts/gendwarfksyms/gendwarfksyms.h |  97 +++
 scripts/gendwarfksyms/symbols.c   |  83 +
 8 files changed, 489 insertions(+)
 create mode 100644 scripts/gendwarfksyms/.gitignore
 create mode 100644 scripts/gendwarfksyms/Makefile
 create mode 100644 scripts/gendwarfksyms/dwarf.c
 create mode 100644 scripts/gendwarfksyms/gendwarfksyms.c
 create mode 100644 scripts/gendwarfksyms/gendwarfksyms.h
 create mode 100644 scripts/gendwarfksyms/symbols.c

diff --git a/kernel/module/Kconfig b/kernel/module/Kconfig
index 7c6588148d42..f9e5f82fa88b 100644
--- a/kernel/module/Kconfig
+++ b/kernel/module/Kconfig
@@ -169,6 +169,14 @@ config MODVERSIONS
  make them incompatible with the kernel you are running.  If
  unsure, say N.
 
+config GENDWARFKSYMS
+   bool
+   depends on DEBUG_INFO
+   # Requires full debugging information, split DWARF not supported.
+   depends on !DEBUG_INFO_REDUCED && !DEBUG_INFO_SPLIT
+   # Requires ELF object files.
+   depends on !LTO
+
 config ASM_MODVERSIONS
bool
default HAVE_ASM_MODVERSIONS && MODVERSIONS
diff --git a/scripts/Makefile b/scripts/Makefile
index 6bcda4b9d054..d7fec46d38c0 100644
--- a/scripts/Makefile
+++ b/scripts/Makefile
@@ -54,6 +54,7 @@ targets += module.lds
 
 subdir-$(CONFIG_GCC_PLUGINS) += gcc-plugins
 subdir-$(CONFIG_MODVERSIONS) += genksyms
+subdir-$(CONFIG_GENDWARFKSYMS) += gendwarfksyms
 subdir-$(CONFIG_SECURITY_SELINUX) += selinux
 subdir-$(CONFIG_SECURITY_IPE) += ipe
 
diff --git a/scripts/gendwarfksyms/.gitignore b/scripts/gendwarfksyms/.gitignore
new file mode 100644
index ..0927f8d3cd96
--- /dev/null
+++ b/scripts/gendwarfksyms/.gitignore
@@ -0,0 +1,2 @@
+# SPDX-License-Identifier: GPL-2.0
+/gendwarfksyms
diff --git a/scripts/gendwarfksyms/Makefile b/scripts/gendwarfksyms/Makefile
new file mode 100644
index ..9f8fec4fd39b
--- /dev/null
+++ b/scripts/gendwarfksyms/Makefile
@@ -0,0 +1,8 @@
+# SPDX-License-Identifier: GPL-2.0
+hostprogs-always-y += gendwarfksyms
+
+gendwarfksyms-objs += gendwarfksyms.o
+gendwarfksyms-objs += dwarf.o
+gendwarfksyms-objs += symbols.o
+
+HOSTLDLIBS_gendwarfksyms := -ldw -lelf
diff --git a/scripts/gendwarfksyms/dwarf.c b/scripts/gendwarfksyms/dwarf.c
new file mode 100644
index ..81df3e2ad3ae
--- /dev/null
+++ b/scripts/gendwarfksyms/dwarf.c
@@ -0,0 +1,166 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2024 Google LLC
+ */
+
+#include "gendwarfksyms.h"
+
+static bool get_ref_die_attr(Dwarf_Die *die, unsigned int id, Dwarf_Die *value)
+{
+   Dwarf_Attribute da;
+
+   /* dwarf_formref_die returns a pointer instead of an error value. */
+   return dwarf_attr(die, id, &da) && dwarf_formref_die(&da, value);
+}
+
+#define DEFINE_GET_STRING_ATTR(attr) \
+   static const char *get_##attr##_attr(Dwarf_Die *die) \
+   {\
+   Dwarf_Attribute da;  \
+   if (dwarf_attr(die, DW_AT_##attr, &da))  \
+   return dwarf_formstring(&da);\
+   return NULL; \
+   }
+
+DEFINE_GET_STRING_ATTR(name)
+DEFINE_GET_STRING_ATTR(linkage_name)
+
+static const char *get_symbol_name(Dwarf_Die *die)
+{
+   const char *name;
+
+   /* rustc uses DW_AT_linkage_name for exported symbols */
+   name = get_linkage_name_attr(die);
+   if (!name)
+   name = get_name_attr(die);
+
+   return name;
+}
+
+static bool match_export_symbol(struct state *state, Dwarf_Die *die)
+{
+   Dwarf_Die *source = die;
+   Dwarf_Die origin;
+
+   /* If the DIE has an abstract origin, use it for type information. */
+   if (get_ref_die_attr(die, DW_AT_abstract_origin, &origin))
+   source = &origin;
+
+   state->sym = symbol_get(get_symbol_name(die));
+
+   /* Look up using the origin name if there are no matches. */
+   if (!state->sym && source != die)
+   state->sym = symbol_get(get_symbol_name(source));
+
+   state->die = *source;
+   return !!state->sym;
+}
+
+/*
+ * Type string processing
+ */
+static void process(const char *s)
+{
+   s = s ?: "";
+
+   if (dump_dies)
+   fputs(s, stderr

[PATCH v4 00/19] Implement DWARF modversions

Hi,

Here's v4 of the DWARF modversions series. The main motivation is
modversions support for Rust, which is important for distributions
like Android that are about to ship Rust kernel modules. Per Luis'
request [1], v2 dropped the Rust specific bits from the series and
instead added the feature as an option for the entire kernel. Matt is
addressing Rust modversion_info compatibility issues in a separate
series [2], and we'll follow up with a patch to actually allow
CONFIG_MODVERSIONS with Rust once everything else has been sorted
out.

Short background: Unlike C, Rust source code doesn't have sufficient
information about the final ABI, as the compiler has considerable
freedom in adjusting structure layout, for example, which makes
using a source code parser like genksyms a non-starter. Based on
earlier feedback, this series uses DWARF debugging information for
computing versions. DWARF is an established and a relatively stable
format, which includes all the necessary ABI details, and adding a
CONFIG_DEBUG_INFO dependency for Rust symbol versioning seems like a
reasonable trade-off.

The first patch moves the genksyms CRC32 implementation to a shared
header file and the next 15 patches add gendwarfksyms, a tool for
computing symbol versions from DWARF. When passed a list of exported
symbols and object files, the tool generates an expanded type string
for each symbol and computes symbol CRCs similarly to genksyms.
gendwarfksyms is written in C and uses libdw to process DWARF. Patch
17 ensures that debugging information is present where we need it,
patch 18 adds gendwarfksyms as an alternative to genksyms, and the
last patch adds documentation.

v4 is based on v6.12-rc2 and for your convenience the series is also
available here:

https://github.com/samitolvanen/linux/commits/gendwarfksyms-v4

If you also want to test the series with Rust modules, this branch
adds Matt's latest modversion_info series and a small patch to enable
Rust modversions:

https://github.com/samitolvanen/linux/commits/rustmodversions-v4

Sami


[1] https://lore.kernel.org/lkml/znizetkkqweig...@bombadil.infradead.org/
[2] https://lore.kernel.org/lkml/20240925233854.90072-1-mmau...@google.com/

---

v4:
- Rebased on v6.12-rc2, which now includes all the prerequisites.

- Dropped unnecessary name_only parameter for symbols.c::for_each
  and cleaned up error handling. (Patch 3)

- Fixed anonymous scope handling to ensure unnamed DIEs don't get
  names. (Patch 4)

- Added non-variant children to variant_type output, and included
  DW_AT_discr_value attributes for variants. (Patch 9)

- Added another symbol pointer test case. (Patch 16)

- Picked up (Acked|Reviewed)-by tags from v3.


v3: 
https://lore.kernel.org/lkml/20240923181846.549877-22-samitolva...@google.com/
- Updated SPX license headers.

- Squashed the first two patches in v2 and tried to reduce churn as
  much as reasonable.

- Dropped patch 18 from v2 ("x86/asm-prototypes: Include
  ") as it's addressed by a separate patch.

- Changed the error handling code to immediately terminate instead
  of propagating the errors back to main, which cleaned up the code
  quite a bit.

- Switched to the list and hashtable implementations in scripts and
  dropped the remaining tools/include dependencies. Added a couple
  missing list macros. (patch 1)

- Moved the genksyms CRC32 implementation to scripts/include and
  dropped the duplicate code. (patches 2 and 14)

- Switched from ad-hoc command line parsing to getopt_long (patch 3).

- Added structure member and function parameter names to the DIE
  output to match genksyms behavior, and tweaked the symtypes format
  to be more parser-friendly in general based on Petr's suggestions.

- Replaced the declaration-only struct annotations with more generic
  kABI stability rules that allow source code annotations to be used
  where #ifndef __GENKSYMS__ was previously used.  Added support for
  rules that can be used to exclude enumerators from versioning.
  (patch 16)

- Per Miroslav's suggestion, added an option to hide structure
  members from versioning when they're added to existing alignment
  holes, for example. (patch 16)

- Per Greg's request, added documentation and example macros for the
  --stable features, and a couple of test cases. (patches 15, 16, and
  20)

- Fixed making symtypes files, which need to depend on .o files with
  gendwarfksyms. (patch 19)

- Addressed several other smaller issues that Petr and Masahiro
  kindly pointed out during the v2 review.


v2: 
https://lore.kernel.org/lkml/20240815173903.4172139-21-samitolva...@google.com/
- Per Luis' request, dropped Rust-specific patches and added
  gendwarfksyms as an alternative to genksyms for the entire
  kernel.

- Added support for missing DWARF features needed to handle
  also non-Rust code.

- Changed symbol address matching to use the symbol table
  information instead of relying on addresses in DWARF.

- Added __gendwarfksyms_ptr patches to ensure the compiler emi

[PATCH v4 05/19] gendwarfksyms: Add a cache for processed DIEs

Basic types in DWARF repeat frequently and traversing the DIEs using
libdw is relatively slow. Add a simple hashtable based cache for the
processed DIEs.

Signed-off-by: Sami Tolvanen 
Acked-by: Neal Gompa 
---
 scripts/gendwarfksyms/Makefile|   1 +
 scripts/gendwarfksyms/die.c   | 143 ++
 scripts/gendwarfksyms/dwarf.c | 136 +---
 scripts/gendwarfksyms/gendwarfksyms.c |   6 ++
 scripts/gendwarfksyms/gendwarfksyms.h |  63 +++-
 5 files changed, 308 insertions(+), 41 deletions(-)
 create mode 100644 scripts/gendwarfksyms/die.c

diff --git a/scripts/gendwarfksyms/Makefile b/scripts/gendwarfksyms/Makefile
index 9f8fec4fd39b..c0d4ce50fc27 100644
--- a/scripts/gendwarfksyms/Makefile
+++ b/scripts/gendwarfksyms/Makefile
@@ -2,6 +2,7 @@
 hostprogs-always-y += gendwarfksyms
 
 gendwarfksyms-objs += gendwarfksyms.o
+gendwarfksyms-objs += die.o
 gendwarfksyms-objs += dwarf.o
 gendwarfksyms-objs += symbols.o
 
diff --git a/scripts/gendwarfksyms/die.c b/scripts/gendwarfksyms/die.c
new file mode 100644
index ..28d89fce89fc
--- /dev/null
+++ b/scripts/gendwarfksyms/die.c
@@ -0,0 +1,143 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2024 Google LLC
+ */
+
+#include 
+#include "gendwarfksyms.h"
+
+#define DIE_HASH_BITS 20
+
+/* {die->addr, state} -> struct die * */
+static HASHTABLE_DEFINE(die_map, 1 << DIE_HASH_BITS);
+
+static unsigned int map_hits;
+static unsigned int map_misses;
+
+static inline unsigned int die_hash(uintptr_t addr, enum die_state state)
+{
+   return hash_32(addr_hash(addr) ^ (unsigned int)state);
+}
+
+static void init_die(struct die *cd)
+{
+   cd->state = DIE_INCOMPLETE;
+   cd->fqn = NULL;
+   cd->tag = -1;
+   cd->addr = 0;
+   INIT_LIST_HEAD(&cd->fragments);
+}
+
+static struct die *create_die(Dwarf_Die *die, enum die_state state)
+{
+   struct die *cd;
+
+   cd = xmalloc(sizeof(struct die));
+   init_die(cd);
+   cd->addr = (uintptr_t)die->addr;
+
+   hash_add(die_map, &cd->hash, die_hash(cd->addr, state));
+   return cd;
+}
+
+int __die_map_get(uintptr_t addr, enum die_state state, struct die **res)
+{
+   struct die *cd;
+
+   hash_for_each_possible(die_map, cd, hash, die_hash(addr, state)) {
+   if (cd->addr == addr && cd->state == state) {
+   *res = cd;
+   return 0;
+   }
+   }
+
+   return -1;
+}
+
+struct die *die_map_get(Dwarf_Die *die, enum die_state state)
+{
+   struct die *cd;
+
+   if (__die_map_get((uintptr_t)die->addr, state, &cd) == 0) {
+   map_hits++;
+   return cd;
+   }
+
+   map_misses++;
+   return create_die(die, state);
+}
+
+static void reset_die(struct die *cd)
+{
+   struct die_fragment *tmp;
+   struct die_fragment *df;
+
+   list_for_each_entry_safe(df, tmp, &cd->fragments, list) {
+   if (df->type == FRAGMENT_STRING)
+   free(df->data.str);
+   free(df);
+   }
+
+   if (cd->fqn && *cd->fqn)
+   free(cd->fqn);
+   init_die(cd);
+}
+
+void die_map_free(void)
+{
+   struct hlist_node *tmp;
+   unsigned int stats[DIE_LAST + 1];
+   struct die *cd;
+   int i;
+
+   memset(stats, 0, sizeof(stats));
+
+   hash_for_each_safe(die_map, cd, tmp, hash) {
+   stats[cd->state]++;
+   reset_die(cd);
+   free(cd);
+   }
+   hash_init(die_map);
+
+   if (map_hits + map_misses > 0)
+   debug("hits %u, misses %u (hit rate %.02f%%)", map_hits,
+ map_misses,
+ (100.0f * map_hits) / (map_hits + map_misses));
+
+   for (i = 0; i <= DIE_LAST; i++)
+   debug("%s: %u entries", die_state_name(i), stats[i]);
+}
+
+static struct die_fragment *append_item(struct die *cd)
+{
+   struct die_fragment *df;
+
+   df = xmalloc(sizeof(struct die_fragment));
+   df->type = FRAGMENT_EMPTY;
+   list_add_tail(&df->list, &cd->fragments);
+   return df;
+}
+
+void die_map_add_string(struct die *cd, const char *str)
+{
+   struct die_fragment *df;
+
+   if (!cd)
+   return;
+
+   df = append_item(cd);
+   df->data.str = xstrdup(str);
+   df->type = FRAGMENT_STRING;
+}
+
+void die_map_add_die(struct die *cd, struct die *child)
+{
+   struct die_fragment *df;
+
+   if (!cd)
+   return;
+
+   df = append_item(cd);
+   df->data.addr = child->addr;
+   df->type = FRAGMENT_DIE;
+}
diff --git a/scripts/gendwarfksyms/dwarf.c b/scripts/gendwarfksyms/dwarf.c
index 35fd1dfeeadc..c2cd4743515e 100644
--- a/scripts/gendwarfksyms/dwarf.c
+++ b/scripts/gendwarfksyms/dwarf.c
@@ -71,17 +71,19 @@ static bool match_export_symbol(struct state *state, 
Dwarf_Die *die)
 /*
  * Type string processing
  */
-static void process(const char *s)
+static void process(

[PATCH v4 04/19] gendwarfksyms: Expand base_type

Start making gendwarfksyms more useful by adding support for
expanding DW_TAG_base_type types and basic DWARF attributes.

Example:

  $ echo loops_per_jiffy | \
  scripts/gendwarfksyms/gendwarfksyms \
--debug --dump-dies vmlinux.o
  ...
  gendwarfksyms: process_symbol: loops_per_jiffy
  variable base_type unsigned long byte_size(8) encoding(7)
  ...

Signed-off-by: Sami Tolvanen 
Acked-by: Neal Gompa 
Reviewed-by: Petr Pavlu 
---
 scripts/gendwarfksyms/dwarf.c | 159 ++
 1 file changed, 159 insertions(+)

diff --git a/scripts/gendwarfksyms/dwarf.c b/scripts/gendwarfksyms/dwarf.c
index 81df3e2ad3ae..35fd1dfeeadc 100644
--- a/scripts/gendwarfksyms/dwarf.c
+++ b/scripts/gendwarfksyms/dwarf.c
@@ -3,8 +3,20 @@
  * Copyright (C) 2024 Google LLC
  */
 
+#include 
 #include "gendwarfksyms.h"
 
+#define DEFINE_GET_ATTR(attr, type)\
+   static bool get_##attr##_attr(Dwarf_Die *die, unsigned int id, \
+ type *value) \
+   {  \
+   Dwarf_Attribute da;\
+   return dwarf_attr(die, id, &da) && \
+  !dwarf_form##attr(&da, value);  \
+   }
+
+DEFINE_GET_ATTR(udata, Dwarf_Word)
+
 static bool get_ref_die_attr(Dwarf_Die *die, unsigned int id, Dwarf_Die *value)
 {
Dwarf_Attribute da;
@@ -67,6 +79,109 @@ static void process(const char *s)
fputs(s, stderr);
 }
 
+#define MAX_FMT_BUFFER_SIZE 128
+
+static void process_fmt(const char *fmt, ...)
+{
+   char buf[MAX_FMT_BUFFER_SIZE];
+   va_list args;
+
+   va_start(args, fmt);
+
+   if (checkp(vsnprintf(buf, sizeof(buf), fmt, args)) >= sizeof(buf))
+   error("vsnprintf overflow: increase MAX_FMT_BUFFER_SIZE");
+
+   process(buf);
+   va_end(args);
+}
+
+#define MAX_FQN_SIZE 64
+
+/* Get a fully qualified name from DWARF scopes */
+static char *get_fqn(Dwarf_Die *die)
+{
+   const char *list[MAX_FQN_SIZE];
+   Dwarf_Die *scopes = NULL;
+   bool has_name = false;
+   char *fqn = NULL;
+   char *p;
+   int count = 0;
+   int len = 0;
+   int res;
+   int i;
+
+   res = checkp(dwarf_getscopes_die(die, &scopes));
+   if (!res) {
+   list[count] = get_name_attr(die);
+
+   if (!list[count])
+   return NULL;
+
+   len += strlen(list[count]);
+   count++;
+
+   goto done;
+   }
+
+   for (i = res - 1; i >= 0 && count < MAX_FQN_SIZE; i--) {
+   if (dwarf_tag(&scopes[i]) == DW_TAG_compile_unit)
+   continue;
+
+   list[count] = get_name_attr(&scopes[i]);
+
+   if (list[count]) {
+   has_name = true;
+   } else {
+   list[count] = "";
+   has_name = false;
+   }
+
+   len += strlen(list[count]);
+   count++;
+
+   if (i > 0) {
+   list[count++] = "::";
+   len += 2;
+   }
+   }
+
+   free(scopes);
+
+   if (count == MAX_FQN_SIZE)
+   warn("increase MAX_FQN_SIZE: reached the maximum");
+
+   /* Consider the DIE unnamed if the last scope doesn't have a name */
+   if (!has_name)
+   return NULL;
+done:
+   fqn = xmalloc(len + 1);
+   *fqn = '\0';
+
+   p = fqn;
+   for (i = 0; i < count; i++)
+   p = stpcpy(p, list[i]);
+
+   return fqn;
+}
+
+static void process_fqn(Dwarf_Die *die)
+{
+   process(" ");
+   process(get_fqn(die) ?: "");
+}
+
+#define DEFINE_PROCESS_UDATA_ATTRIBUTE(attribute)   \
+   static void process_##attribute##_attr(Dwarf_Die *die)  \
+   {   \
+   Dwarf_Word value;   \
+   if (get_udata_attr(die, DW_AT_##attribute, &value)) \
+   process_fmt(" " #attribute "(%" PRIu64 ")", value); \
+   }
+
+DEFINE_PROCESS_UDATA_ATTRIBUTE(alignment)
+DEFINE_PROCESS_UDATA_ATTRIBUTE(byte_size)
+DEFINE_PROCESS_UDATA_ATTRIBUTE(encoding)
+
 bool match_all(Dwarf_Die *die)
 {
return true;
@@ -93,6 +208,49 @@ int process_die_container(struct state *state, Dwarf_Die 
*die,
return 0;
 }
 
+static int process_type(struct state *state, Dwarf_Die *die);
+
+static void process_type_attr(struct state *state, Dwarf_Die *die)
+{
+   Dwarf_Die type;
+
+   if (get_ref_die_attr(die, DW_AT_type, &type)) {
+   check(process_type(state, &type));
+   return;
+   }
+
+   /* Compilers can omit DW_AT_type -- print out 'void' to clarify */
+   p

[PATCH v4 06/19] gendwarfksyms: Expand type modifiers and typedefs

Add support for expanding DWARF type modifiers, such as pointers,
const values etc., and typedefs. These types all have DW_AT_type
attribute pointing to the underlying type, and thus produce similar
output.

Also add linebreaks and indentation to debugging output to make it
more readable.

Signed-off-by: Sami Tolvanen 
Acked-by: Neal Gompa 
Reviewed-by: Petr Pavlu 
---
 scripts/gendwarfksyms/die.c   | 12 +
 scripts/gendwarfksyms/dwarf.c | 67 +++
 scripts/gendwarfksyms/gendwarfksyms.h |  5 ++
 3 files changed, 84 insertions(+)

diff --git a/scripts/gendwarfksyms/die.c b/scripts/gendwarfksyms/die.c
index 28d89fce89fc..2829387fd815 100644
--- a/scripts/gendwarfksyms/die.c
+++ b/scripts/gendwarfksyms/die.c
@@ -130,6 +130,18 @@ void die_map_add_string(struct die *cd, const char *str)
df->type = FRAGMENT_STRING;
 }
 
+void die_map_add_linebreak(struct die *cd, int linebreak)
+{
+   struct die_fragment *df;
+
+   if (!cd)
+   return;
+
+   df = append_item(cd);
+   df->data.linebreak = linebreak;
+   df->type = FRAGMENT_LINEBREAK;
+}
+
 void die_map_add_die(struct die *cd, struct die *child)
 {
struct die_fragment *df;
diff --git a/scripts/gendwarfksyms/dwarf.c b/scripts/gendwarfksyms/dwarf.c
index c2cd4743515e..1d67ee18a388 100644
--- a/scripts/gendwarfksyms/dwarf.c
+++ b/scripts/gendwarfksyms/dwarf.c
@@ -6,6 +6,17 @@
 #include 
 #include "gendwarfksyms.h"
 
+static bool do_linebreak;
+static int indentation_level;
+
+/* Line breaks and indentation for pretty-printing */
+static void process_linebreak(struct die *cache, int n)
+{
+   indentation_level += n;
+   do_linebreak = true;
+   die_map_add_linebreak(cache, n);
+}
+
 #define DEFINE_GET_ATTR(attr, type)\
static bool get_##attr##_attr(Dwarf_Die *die, unsigned int id, \
  type *value) \
@@ -75,6 +86,12 @@ static void process(struct die *cache, const char *s)
 {
s = s ?: "";
 
+   if (dump_dies && do_linebreak) {
+   fputs("\n", stderr);
+   for (int i = 0; i < indentation_level; i++)
+   fputs("  ", stderr);
+   do_linebreak = false;
+   }
if (dump_dies)
fputs(s, stderr);
 
@@ -238,6 +255,40 @@ static void process_type_attr(struct state *state, struct 
die *cache,
process(cache, "base_type void");
 }
 
+/* Container types with DW_AT_type */
+static void __process_type(struct state *state, struct die *cache,
+  Dwarf_Die *die, const char *type)
+{
+   process(cache, type);
+   process_fqn(cache, die);
+   process(cache, " {");
+   process_linebreak(cache, 1);
+   process_type_attr(state, cache, die);
+   process_linebreak(cache, -1);
+   process(cache, "}");
+   process_byte_size_attr(cache, die);
+   process_alignment_attr(cache, die);
+}
+
+#define DEFINE_PROCESS_TYPE(type)\
+   static void process_##type##_type(struct state *state,   \
+ struct die *cache, Dwarf_Die *die) \
+   {\
+   __process_type(state, cache, die, #type "_type");\
+   }
+
+DEFINE_PROCESS_TYPE(atomic)
+DEFINE_PROCESS_TYPE(const)
+DEFINE_PROCESS_TYPE(immutable)
+DEFINE_PROCESS_TYPE(packed)
+DEFINE_PROCESS_TYPE(pointer)
+DEFINE_PROCESS_TYPE(reference)
+DEFINE_PROCESS_TYPE(restrict)
+DEFINE_PROCESS_TYPE(rvalue_reference)
+DEFINE_PROCESS_TYPE(shared)
+DEFINE_PROCESS_TYPE(volatile)
+DEFINE_PROCESS_TYPE(typedef)
+
 static void process_base_type(struct state *state, struct die *cache,
  Dwarf_Die *die)
 {
@@ -259,6 +310,9 @@ static void process_cached(struct state *state, struct die 
*cache,
case FRAGMENT_STRING:
process(NULL, df->data.str);
break;
+   case FRAGMENT_LINEBREAK:
+   process_linebreak(NULL, df->data.linebreak);
+   break;
case FRAGMENT_DIE:
if (!dwarf_die_addr_die(dwarf_cu_getdwarf(die->cu),
(void *)df->data.addr, &child))
@@ -294,7 +348,20 @@ static int process_type(struct state *state, struct die 
*parent, Dwarf_Die *die)
}
 
switch (tag) {
+   /* Type modifiers */
+   PROCESS_TYPE(atomic)
+   PROCESS_TYPE(const)
+   PROCESS_TYPE(immutable)
+   PROCESS_TYPE(packed)
+   PROCESS_TYPE(pointer)
+   PROCESS_TYPE(reference)
+   PROCESS_TYPE(restrict)
+   PROCESS_TYPE(rvalue_reference)
+   PROCESS_TYPE(shared)
+   PROCESS_TYPE(volatile)
+   /* Other types */
PROCESS_TYPE(base)
+   PROCESS_TYPE(typedef)
default:

[PATCH v4 03/19] gendwarfksyms: Add address matching

The compiler may choose not to emit type information in DWARF for all
aliases, but it's possible for each alias to be exported separately.
To ensure we find type information for the aliases as well, read
{section, address} tuples from the symbol table and match symbols also
by address.

Signed-off-by: Sami Tolvanen 
Acked-by: Neal Gompa 
---
 scripts/gendwarfksyms/gendwarfksyms.c |   2 +
 scripts/gendwarfksyms/gendwarfksyms.h |  13 +++
 scripts/gendwarfksyms/symbols.c   | 148 ++
 3 files changed, 163 insertions(+)

diff --git a/scripts/gendwarfksyms/gendwarfksyms.c 
b/scripts/gendwarfksyms/gendwarfksyms.c
index 1a9be8fa18c8..6fb12f9f6023 100644
--- a/scripts/gendwarfksyms/gendwarfksyms.c
+++ b/scripts/gendwarfksyms/gendwarfksyms.c
@@ -103,6 +103,8 @@ int main(int argc, char **argv)
error("open failed for '%s': %s", argv[n],
  strerror(errno));
 
+   symbol_read_symtab(fd);
+
dwfl = dwfl_begin(&callbacks);
if (!dwfl)
error("dwfl_begin failed for '%s': %s", argv[n],
diff --git a/scripts/gendwarfksyms/gendwarfksyms.h 
b/scripts/gendwarfksyms/gendwarfksyms.h
index 1a10d18f178e..a058647e2361 100644
--- a/scripts/gendwarfksyms/gendwarfksyms.h
+++ b/scripts/gendwarfksyms/gendwarfksyms.h
@@ -66,14 +66,27 @@ extern int dump_dies;
  * symbols.c
  */
 
+static inline unsigned int addr_hash(uintptr_t addr)
+{
+   return hash_ptr((const void *)addr);
+}
+
+struct symbol_addr {
+   uint32_t section;
+   Elf64_Addr address;
+};
+
 struct symbol {
const char *name;
+   struct symbol_addr addr;
+   struct hlist_node addr_hash;
struct hlist_node name_hash;
 };
 
 typedef void (*symbol_callback_t)(struct symbol *, void *arg);
 
 void symbol_read_exports(FILE *file);
+void symbol_read_symtab(int fd);
 struct symbol *symbol_get(const char *name);
 
 /*
diff --git a/scripts/gendwarfksyms/symbols.c b/scripts/gendwarfksyms/symbols.c
index 4df685deb9e0..6cb99b8769ea 100644
--- a/scripts/gendwarfksyms/symbols.c
+++ b/scripts/gendwarfksyms/symbols.c
@@ -6,8 +6,39 @@
 #include "gendwarfksyms.h"
 
 #define SYMBOL_HASH_BITS 15
+
+/* struct symbol_addr -> struct symbol */
+static HASHTABLE_DEFINE(symbol_addrs, 1 << SYMBOL_HASH_BITS);
+/* name -> struct symbol */
 static HASHTABLE_DEFINE(symbol_names, 1 << SYMBOL_HASH_BITS);
 
+static inline unsigned int symbol_addr_hash(const struct symbol_addr *addr)
+{
+   return hash_32(addr->section ^ addr_hash(addr->address));
+}
+
+static unsigned int __for_each_addr(struct symbol *sym, symbol_callback_t func,
+   void *data)
+{
+   struct hlist_node *tmp;
+   struct symbol *match = NULL;
+   unsigned int processed = 0;
+
+   hash_for_each_possible_safe(symbol_addrs, match, tmp, addr_hash,
+   symbol_addr_hash(&sym->addr)) {
+   if (match == sym)
+   continue; /* Already processed */
+
+   if (match->addr.section == sym->addr.section &&
+   match->addr.address == sym->addr.address) {
+   func(match, data);
+   ++processed;
+   }
+   }
+
+   return processed;
+}
+
 static unsigned int for_each(const char *name, symbol_callback_t func,
 void *data)
 {
@@ -22,9 +53,13 @@ static unsigned int for_each(const char *name, 
symbol_callback_t func,
if (strcmp(match->name, name))
continue;
 
+   /* Call func for the match, and all address matches */
if (func)
func(match, data);
 
+   if (match->addr.section != SHN_UNDEF)
+   return __for_each_addr(match, func, data) + 1;
+
return 1;
}
 
@@ -56,6 +91,7 @@ void symbol_read_exports(FILE *file)
 
sym = xcalloc(1, sizeof(struct symbol));
sym->name = name;
+   sym->addr.section = SHN_UNDEF;
 
hash_add(symbol_names, &sym->name_hash, hash_str(sym->name));
++nsym;
@@ -81,3 +117,115 @@ struct symbol *symbol_get(const char *name)
for_each(name, get_symbol, &sym);
return sym;
 }
+
+typedef void (*elf_symbol_callback_t)(const char *name, GElf_Sym *sym,
+ Elf32_Word xndx, void *arg);
+
+static void elf_for_each_global(int fd, elf_symbol_callback_t func, void *arg)
+{
+   size_t sym_size;
+   GElf_Shdr shdr_mem;
+   GElf_Shdr *shdr;
+   Elf_Data *xndx_data = NULL;
+   Elf_Scn *scn;
+   Elf *elf;
+
+   if (elf_version(EV_CURRENT) != EV_CURRENT)
+   error("elf_version failed: %s", elf_errmsg(-1));
+
+   elf = elf_begin(fd, ELF_C_READ_MMAP, NULL);
+   if (!elf)
+   error("elf_begin failed: %s", elf_errmsg(-1));
+
+   scn = elf_nextscn

[PATCH v4 09/19] gendwarfksyms: Expand structure types

Recursively expand DWARF structure types, i.e. structs, unions, and
enums. Also include relevant DWARF attributes in type strings to
encode structure layout, for example.

Example output with --dump-dies:

  subprogram (
formal_parameter structure_type &str {
  member pointer_type {
base_type u8 byte_size(1) encoding(7)
  } data_ptr data_member_location(0) ,
  member base_type usize byte_size(8) encoding(7) length 
data_member_location(8)
} byte_size(16) alignment(8) msg
  )
  -> base_type void

Signed-off-by: Sami Tolvanen 
Acked-by: Neal Gompa 
---
 scripts/gendwarfksyms/dwarf.c | 138 +-
 scripts/gendwarfksyms/gendwarfksyms.h |   5 +
 2 files changed, 141 insertions(+), 2 deletions(-)

diff --git a/scripts/gendwarfksyms/dwarf.c b/scripts/gendwarfksyms/dwarf.c
index ade9b3b7b119..f5cebbdcc212 100644
--- a/scripts/gendwarfksyms/dwarf.c
+++ b/scripts/gendwarfksyms/dwarf.c
@@ -207,9 +207,14 @@ static void process_fqn(struct die *cache, Dwarf_Die *die)
value);\
}
 
+DEFINE_PROCESS_UDATA_ATTRIBUTE(accessibility)
 DEFINE_PROCESS_UDATA_ATTRIBUTE(alignment)
+DEFINE_PROCESS_UDATA_ATTRIBUTE(bit_size)
 DEFINE_PROCESS_UDATA_ATTRIBUTE(byte_size)
 DEFINE_PROCESS_UDATA_ATTRIBUTE(encoding)
+DEFINE_PROCESS_UDATA_ATTRIBUTE(data_bit_offset)
+DEFINE_PROCESS_UDATA_ATTRIBUTE(data_member_location)
+DEFINE_PROCESS_UDATA_ATTRIBUTE(discr_value)
 
 /* Match functions -- die_match_callback_t */
 #define DEFINE_MATCH(type) \
@@ -218,7 +223,9 @@ DEFINE_PROCESS_UDATA_ATTRIBUTE(encoding)
return dwarf_tag(die) == DW_TAG_##type##_type; \
}
 
+DEFINE_MATCH(enumerator)
 DEFINE_MATCH(formal_parameter)
+DEFINE_MATCH(member)
 DEFINE_MATCH(subrange)
 
 bool match_all(Dwarf_Die *die)
@@ -297,6 +304,10 @@ static void __process_list_type(struct state *state, 
struct die *cache,
process(cache, " ");
process(cache, name);
}
+   process_accessibility_attr(cache, die);
+   process_bit_size_attr(cache, die);
+   process_data_bit_offset_attr(cache, die);
+   process_data_member_location_attr(cache, die);
 }
 
 #define DEFINE_PROCESS_LIST_TYPE(type)   \
@@ -307,6 +318,7 @@ static void __process_list_type(struct state *state, struct 
die *cache,
}
 
 DEFINE_PROCESS_LIST_TYPE(formal_parameter)
+DEFINE_PROCESS_LIST_TYPE(member)
 
 /* Container types with DW_AT_type */
 static void __process_type(struct state *state, struct die *cache,
@@ -339,6 +351,7 @@ DEFINE_PROCESS_TYPE(reference)
 DEFINE_PROCESS_TYPE(restrict)
 DEFINE_PROCESS_TYPE(rvalue_reference)
 DEFINE_PROCESS_TYPE(shared)
+DEFINE_PROCESS_TYPE(template_type_parameter)
 DEFINE_PROCESS_TYPE(volatile)
 DEFINE_PROCESS_TYPE(typedef)
 
@@ -392,6 +405,107 @@ static void process_subroutine_type(struct state *state, 
struct die *cache,
__process_subroutine_type(state, cache, die, "subroutine_type");
 }
 
+static void process_variant_type(struct state *state, struct die *cache,
+Dwarf_Die *die)
+{
+   process_list_comma(state, cache);
+   process(cache, "variant {");
+   process_linebreak(cache, 1);
+   check(process_die_container(state, cache, die, process_type,
+   match_member_type));
+   process_linebreak(cache, -1);
+   process(cache, "}");
+   process_discr_value_attr(cache, die);
+}
+
+static void process_variant_part_type(struct state *state, struct die *cache,
+ Dwarf_Die *die)
+{
+   process_list_comma(state, cache);
+   process(cache, "variant_part {");
+   process_linebreak(cache, 1);
+   check(process_die_container(state, cache, die, process_type,
+   match_all));
+   process_linebreak(cache, -1);
+   process(cache, "}");
+}
+
+static int ___process_structure_type(struct state *state, struct die *cache,
+Dwarf_Die *die)
+{
+   switch (dwarf_tag(die)) {
+   case DW_TAG_member:
+   case DW_TAG_variant_part:
+   return check(process_type(state, cache, die));
+   case DW_TAG_class_type:
+   case DW_TAG_enumeration_type:
+   case DW_TAG_structure_type:
+   case DW_TAG_template_type_parameter:
+   case DW_TAG_union_type:
+   case DW_TAG_subprogram:
+   /* Skip non-member types, including member functions */
+   return 0;
+   default:
+   error("unexpected structure_type child: %x", dwarf_tag(die));
+   }
+}
+
+static void __process_structure_type(struct state *state, struct die *cache,
+Dwarf_Die *die, const char *type,
+die_callback_t process_func,
+die_match_callback_t match_func)
+{
+

[PATCH v4 07/19] gendwarfksyms: Expand subroutine_type

Add support for expanding DW_TAG_subroutine_type and the parameters
in DW_TAG_formal_parameter. Use this to also expand subprograms.

Example output with --dump-dies:

  subprogram (
formal_parameter pointer_type {
  const_type {
base_type char byte_size(1) encoding(6)
  }
}
  )
  -> base_type unsigned long byte_size(8) encoding(7)

Signed-off-by: Sami Tolvanen 
Acked-by: Neal Gompa 
Reviewed-by: Petr Pavlu 
---
 scripts/gendwarfksyms/dwarf.c | 84 ++-
 scripts/gendwarfksyms/gendwarfksyms.h |  4 ++
 2 files changed, 85 insertions(+), 3 deletions(-)

diff --git a/scripts/gendwarfksyms/dwarf.c b/scripts/gendwarfksyms/dwarf.c
index 1d67ee18a388..7e6b477d7c12 100644
--- a/scripts/gendwarfksyms/dwarf.c
+++ b/scripts/gendwarfksyms/dwarf.c
@@ -211,6 +211,15 @@ DEFINE_PROCESS_UDATA_ATTRIBUTE(alignment)
 DEFINE_PROCESS_UDATA_ATTRIBUTE(byte_size)
 DEFINE_PROCESS_UDATA_ATTRIBUTE(encoding)
 
+/* Match functions -- die_match_callback_t */
+#define DEFINE_MATCH(type) \
+   static bool match_##type##_type(Dwarf_Die *die)\
+   {  \
+   return dwarf_tag(die) == DW_TAG_##type##_type; \
+   }
+
+DEFINE_MATCH(formal_parameter)
+
 bool match_all(Dwarf_Die *die)
 {
return true;
@@ -223,19 +232,28 @@ int process_die_container(struct state *state, struct die 
*cache,
Dwarf_Die current;
int res;
 
+   /* Track the first item in lists. */
+   if (state)
+   state->first_list_item = true;
+
res = checkp(dwarf_child(die, ¤t));
while (!res) {
if (match(¤t)) {
/* <0 = error, 0 = continue, >0 = stop */
res = checkp(func(state, cache, ¤t));
if (res)
-   return res;
+   goto out;
}
 
res = checkp(dwarf_siblingof(¤t, ¤t));
}
 
-   return 0;
+   res = 0;
+out:
+   if (state)
+   state->first_list_item = false;
+
+   return res;
 }
 
 static int process_type(struct state *state, struct die *parent,
@@ -255,6 +273,40 @@ static void process_type_attr(struct state *state, struct 
die *cache,
process(cache, "base_type void");
 }
 
+static void process_list_comma(struct state *state, struct die *cache)
+{
+   if (state->first_list_item) {
+   state->first_list_item = false;
+   } else {
+   process(cache, " ,");
+   process_linebreak(cache, 0);
+   }
+}
+
+/* Comma-separated with DW_AT_type */
+static void __process_list_type(struct state *state, struct die *cache,
+   Dwarf_Die *die, const char *type)
+{
+   const char *name = get_name_attr(die);
+
+   process_list_comma(state, cache);
+   process(cache, type);
+   process_type_attr(state, cache, die);
+   if (name) {
+   process(cache, " ");
+   process(cache, name);
+   }
+}
+
+#define DEFINE_PROCESS_LIST_TYPE(type)   \
+   static void process_##type##_type(struct state *state,   \
+ struct die *cache, Dwarf_Die *die) \
+   {\
+   __process_list_type(state, cache, die, #type " ");   \
+   }
+
+DEFINE_PROCESS_LIST_TYPE(formal_parameter)
+
 /* Container types with DW_AT_type */
 static void __process_type(struct state *state, struct die *cache,
   Dwarf_Die *die, const char *type)
@@ -289,6 +341,29 @@ DEFINE_PROCESS_TYPE(shared)
 DEFINE_PROCESS_TYPE(volatile)
 DEFINE_PROCESS_TYPE(typedef)
 
+static void __process_subroutine_type(struct state *state, struct die *cache,
+ Dwarf_Die *die, const char *type)
+{
+   process(cache, type);
+   process(cache, " (");
+   process_linebreak(cache, 1);
+   /* Parameters */
+   check(process_die_container(state, cache, die, process_type,
+   match_formal_parameter_type));
+   process_linebreak(cache, -1);
+   process(cache, ")");
+   process_linebreak(cache, 0);
+   /* Return type */
+   process(cache, "-> ");
+   process_type_attr(state, cache, die);
+}
+
+static void process_subroutine_type(struct state *state, struct die *cache,
+   Dwarf_Die *die)
+{
+   __process_subroutine_type(state, cache, die, "subroutine_type");
+}
+
 static void process_base_type(struct state *state, struct die *cache,
  Dwarf_Die *die)
 {
@@ -359,8 +434,11 @@ static int process_type(struct state *state, struct die 
*parent, Dwarf_Die *die)
PROCESS_TYPE(rvalue_reference)
PROCESS_TYPE(shared)
PROCESS_TYPE(volat

Re: [PATCH v2] selftests: vDSO: Explicitly include sched.h


On 10/7/24 20:33, Yu Liao wrote:

The previous commit introduced the use of CLONE_NEWTIME without including
 which contains its definition.

Add an explicit include of  to ensure that CLONE_NEWTIME
is correctly defined before it is used.

Fixes: 2aec90036dcd ("selftests: vDSO: ensure vgetrandom works in a time 
namespace")
Signed-off-by: Yu Liao 
---
Changes in v2:
- Include  instead of 

v1: https://lore.kernel.org/all/20240919111841.20226-1-liaoy...@huawei.com/

  tools/testing/selftests/vDSO/vdso_test_getrandom.c | 1 +
  1 file changed, 1 insertion(+)

diff --git a/tools/testing/selftests/vDSO/vdso_test_getrandom.c 
b/tools/testing/selftests/vDSO/vdso_test_getrandom.c
index 72a1d9b43a84..ddf37e3ab18b 100644
--- a/tools/testing/selftests/vDSO/vdso_test_getrandom.c
+++ b/tools/testing/selftests/vDSO/vdso_test_getrandom.c
@@ -11,6 +11,7 @@
  #include 
  #include 
  #include 
+#include 
  #include 
  #include 
  #include 


Thank you. Applied to linux-kselftest fixes for next rc.

thanks,
-- Shuah

Re: [PATCH net-next v20 01/14] mm: page_frag: add a test module for page_frag


On 10/8/24 05:20, Yunsheng Lin wrote:

The testing is done by ensuring that the fragment allocated
from a frag_frag_cache instance is pushed into a ptr_ring
instance in a kthread binded to a specified cpu, and a kthread
binded to a specified cpu will pop the fragment from the
ptr_ring and free the fragment.

CC: Alexander Duyck 
Signed-off-by: Yunsheng Lin 
Reviewed-by: Alexander Duyck 


Signed-off-by should be last. Same comment on all the other
patches in this series. When you have 4 patches, it is a good
practice to add cover-letter.


---
  tools/testing/selftests/mm/Makefile   |   3 +
  tools/testing/selftests/mm/page_frag/Makefile |  18 ++
  .../selftests/mm/page_frag/page_frag_test.c   | 173 ++
  tools/testing/selftests/mm/run_vmtests.sh |   8 +
  tools/testing/selftests/mm/test_page_frag.sh  | 171 +
  5 files changed, 373 insertions(+)
  create mode 100644 tools/testing/selftests/mm/page_frag/Makefile
  create mode 100644 tools/testing/selftests/mm/page_frag/page_frag_test.c
  create mode 100755 tools/testing/selftests/mm/test_page_frag.sh

diff --git a/tools/testing/selftests/mm/Makefile 
b/tools/testing/selftests/mm/Makefile
index 02e1204971b0..acec529baaca 100644
--- a/tools/testing/selftests/mm/Makefile
+++ b/tools/testing/selftests/mm/Makefile
@@ -36,6 +36,8 @@ MAKEFLAGS += --no-builtin-rules
  CFLAGS = -Wall -I $(top_srcdir) $(EXTRA_CFLAGS) $(KHDR_INCLUDES) 
$(TOOLS_INCLUDES)
  LDLIBS = -lrt -lpthread -lm
  
+TEST_GEN_MODS_DIR := page_frag

+
  TEST_GEN_FILES = cow
  TEST_GEN_FILES += compaction_test
  TEST_GEN_FILES += gup_longterm
@@ -126,6 +128,7 @@ TEST_FILES += test_hmm.sh
  TEST_FILES += va_high_addr_switch.sh
  TEST_FILES += charge_reserved_hugetlb.sh
  TEST_FILES += hugetlb_reparenting_test.sh
+TEST_FILES += test_page_frag.sh
  
  # required by charge_reserved_hugetlb.sh

  TEST_FILES += write_hugetlb_memory.sh
diff --git a/tools/testing/selftests/mm/page_frag/Makefile 
b/tools/testing/selftests/mm/page_frag/Makefile
new file mode 100644
index ..58dda74d50a3
--- /dev/null
+++ b/tools/testing/selftests/mm/page_frag/Makefile
@@ -0,0 +1,18 @@
+PAGE_FRAG_TEST_DIR := $(realpath $(dir $(abspath $(lastword 
$(MAKEFILE_LIST)
+KDIR ?= $(abspath $(PAGE_FRAG_TEST_DIR)/../../../../..)
+
+ifeq ($(V),1)
+Q =
+else
+Q = @
+endif
+
+MODULES = page_frag_test.ko
+
+obj-m += page_frag_test.o
+
+all:
+   +$(Q)make -C $(KDIR) M=$(PAGE_FRAG_TEST_DIR) modules
+
+clean:
+   +$(Q)make -C $(KDIR) M=$(PAGE_FRAG_TEST_DIR) clean
diff --git a/tools/testing/selftests/mm/page_frag/page_frag_test.c 
b/tools/testing/selftests/mm/page_frag/page_frag_test.c
new file mode 100644
index ..eeb2b6bc681a
--- /dev/null
+++ b/tools/testing/selftests/mm/page_frag/page_frag_test.c
@@ -0,0 +1,173 @@
+// SPDX-License-Identifier: GPL-2.0


I think this would throw a checkpatch warning about
comment should be "/*" and not "//"

+
+/*
+ * Test module for page_frag cache
+ *
+ * Copyright (C) 2024 Yunsheng Lin 
+ */
+
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+
+static struct ptr_ring ptr_ring;
+static int nr_objs = 512;
+static atomic_t nthreads;
+static struct completion wait;
+static struct page_frag_cache test_nc;
+static int test_popped;
+static int test_pushed;
+
+static int nr_test = 200;
+module_param(nr_test, int, 0);
+MODULE_PARM_DESC(nr_test, "number of iterations to test");
+
+static bool test_align;
+module_param(test_align, bool, 0);
+MODULE_PARM_DESC(test_align, "use align API for testing");
+
+static int test_alloc_len = 2048;
+module_param(test_alloc_len, int, 0);
+MODULE_PARM_DESC(test_alloc_len, "alloc len for testing");
+
+static int test_push_cpu;
+module_param(test_push_cpu, int, 0);
+MODULE_PARM_DESC(test_push_cpu, "test cpu for pushing fragment");
+
+static int test_pop_cpu;
+module_param(test_pop_cpu, int, 0);
+MODULE_PARM_DESC(test_pop_cpu, "test cpu for popping fragment");
+
+static int page_frag_pop_thread(void *arg)
+{
+   struct ptr_ring *ring = arg;
+
+   pr_info("page_frag pop test thread begins on cpu %d\n",
+   smp_processor_id());
+
+   while (test_popped < nr_test) {
+   void *obj = __ptr_ring_consume(ring);
+
+   if (obj) {
+   test_popped++;
+   page_frag_free(obj);
+   } else {
+   cond_resched();
+   }
+   }
+
+   if (atomic_dec_and_test(&nthreads))
+   complete(&wait);
+
+   pr_info("page_frag pop test thread exits on cpu %d\n",
+   smp_processor_id());
+
+   return 0;
+}
+
+static int page_frag_push_thread(void *arg)
+{
+   struct ptr_ring *ring = arg;
+
+   pr_info("page_frag push test thread begins on cpu %d\n",
+   smp_processor_id());
+
+   while (test_pushed < nr_test) {
+   void *va;
+   int ret;
+
+   if (test_align) {
+   va = page

Re: [RFC PATCH 30/39] KVM: guest_memfd: Handle folio preparation for guest_memfd mmap

2024-10-08 Thread Sean Christopherson

On Tue, Oct 08, 2024, Ackerley Tng wrote:
> Patrick Roy  writes:
> > For the "non-CoCo with direct map entries removed" VMs that we at AWS
> > are going for, we'd like a VM type with host-controlled in-place
> > conversions which doesn't zero on transitions,

Hmm, your use case shouldn't need conversions _for KVM_, as there's no need for
KVM to care if userspace or the guest _wants_ a page to be shared vs. private.
Userspace is fully trusted to manage things; KVM simply reacts to the current
state of things.

And more importantly, whether or not the direct map is zapped needs to be a
property of the guest_memfd inode, i.e. can't be associated with a struct kvm.
I forget who got volunteered to do the work, but we're going to need similar
functionality for tracking the state of individual pages in a huge folio, as
folio_mark_uptodate() is too coarse-grained.  I.e. at some point, I expect that
guest_memfd will make it easy-ish to determine whether or not the direct map has
been obliterated.

The shared vs. private attributes tracking in KVM is still needed (I think), as
it communicates what userspace _wants_, whereas he guest_memfd machinery will
track what the state _is_.

> > so if KVM_X86_SW_PROTECTED_VM ends up zeroing, we'd need to add another new
> > VM type for that.

Maybe we should sneak in a s/KVM_X86_SW_PROTECTED_VM/KVM_X86_SW_HARDENED_VM 
rename?
The original thought behind "software protected VM" was to do a slow build of
something akin to pKVM, but realistically I don't think that idea is going 
anywhere.

Alternatively, depending on how KVM accesses guest memory that's been removed 
from
the direct map, another solution would be to allow "regular" VMs to bind 
memslots
to guest_memfd, i.e. if the non-CoCo use case needs/wnats to bind all memory to
guest_memfd, not just "private" mappings.

That's probably the biggest topic of discussion: how do we want to allow mapping
guest_memfd into the guest, without direct map entries, but while still allowing
KVM to access guest memory as needed, e.g. for shadow paging.  One approach is
your RFC, where KVM maps guest_memfd pfns on-demand.

Another (slightly crazy) approach would be use protection keys to provide the
security properties that you want, while giving KVM (and userspace) a 
quick-and-easy
override to access guest memory.

 1. mmap() guest_memfd into userpace with RW protections
 2. Configure PKRU to make guest_memfd memory inaccessible by default
 3. Swizzle PKRU on-demand when intentionally accessing guest memory

It's essentially the same idea as SMAP+STAC/CLAC, just applied to guest memory
instead of to usersepace memory.

The benefit of the PKRU approach is that there are no PTE modifications, and 
thus
no TLB flushes, and only the CPU that is access guest memory gains temporary
access.  The big downside is that it would be limited to modern hardware, but
that might be acceptable, especially if it simplifies KVM's implementation.

> > Somewhat related sidenote: For VMs that allow inplace conversions and do
> > not zero, we do not need to zap the stage-2 mappings on memory attribute
> > changes, right?

See above.  I don't think conversions by toggling the shared/private flag in
KVM's memory attributes is the right fit for your use case.

Re: [RFC PATCH v3 4/4] sched+mm: Use hazard pointers to track lazy active mm existence

2024-10-08 Thread Joel Fernandes

On Tue, Oct 8, 2024 at 9:52 AM Mathieu Desnoyers
 wrote:
>
> Replace lazy active mm existence tracking with hazard pointers. This
> removes the following implementations and their associated config
> options:
>
> - MMU_LAZY_TLB_REFCOUNT
> - MMU_LAZY_TLB_SHOOTDOWN
> - This removes the call_rcu delayed mm drop for RT.
>
> It leverages the fact that each CPU only ever have at most one single
> lazy active mm. This makes it a very good fit for a hazard pointer
> domain implemented with one hazard pointer slot per CPU.
>
> -static void cleanup_lazy_tlbs(struct mm_struct *mm)
> +static void remove_lazy_mm_hp(int cpu, struct hazptr_slot *slot, void *addr)
>  {
> -   if (!IS_ENABLED(CONFIG_MMU_LAZY_TLB_SHOOTDOWN)) {
> -   /*
> -* In this case, lazy tlb mms are refounted and would not 
> reach
> -* __mmdrop until all CPUs have switched away and mmdrop()ed.
> -*/
> -   return;
> -   }
> +   smp_call_function_single(cpu, do_shoot_lazy_tlb, addr, 1);
> +   smp_call_function_single(cpu, do_check_lazy_tlb, addr, 1);
> +}
>
> +static void cleanup_lazy_tlbs(struct mm_struct *mm)
> +{
[...]
> -   on_each_cpu_mask(mm_cpumask(mm), do_shoot_lazy_tlb, (void *)mm, 1);
> -   if (IS_ENABLED(CONFIG_DEBUG_VM_SHOOT_LAZIES))
> -   on_each_cpu(do_check_lazy_tlb, (void *)mm, 1);
> +   hazptr_scan(&hazptr_domain_sched_lazy_mm, mm, remove_lazy_mm_hp);

Hey Mathieu, Take comments with a grain of salt because I am digging
into active_mm after a while.
It seems to me IMO this seems a strange hazard pointer callback
usecase. Because "scan" here immediately retires even though the
reader has a "reference". Here it is more like, the callback is
forcing all other readers holding a "reference" to switch immediately
whether they like it or not and not wait until _they_ release the
reference. There is no such precedent in RCU for instance, where a
callback never runs before a reader even finishes.

That might work for active_mm, but it sounds like a fringe usecase to
me that it might probably be better to just force
CONFIG_MMU_LAZY_TLB_SHOOTDOWN=y for everyone and use on_each_cpu()
instead. That will give the same code simplification for this patch
without requiring hazard pointers AFAICS? Or maybe start with that,
and _then_ convert to HP if it makes sense to? These are just some
thoughts and I am Ok with all the reviewer's consensus.

And if I understand correctly, for this usecase - we are not even
grabbing a "true reference" to the mm_struct object because direct
access to mm_struct should require a proper mmgrab(), not a lazy_tlb
flavored one? -- correct me if I'm wrong though.

Also, isn't it that on x86, now with this patch there will be more
IPIs, whereas previously the global refcount was not requiring that as
the last kthread switching out would no longer access the old mm?
Might it be worth checking the performance of fork/exit and if that
scales?

thanks,

- Joel

Re: [PATCH v3] selftests: sched_ext: Add sched_ext as proper selftest target


On 10/8/24 09:35, Björn Töpel wrote:

From: Björn Töpel 

The sched_ext selftests is missing proper cross-compilation support, a
proper target entry, and out-of-tree build support.

When building the kselftest suite, e.g.:

   make ARCH=riscv CROSS_COMPILE=riscv64-linux-gnu-  \
 TARGETS=sched_ext SKIP_TARGETS="" O=/output/foo \
 -C tools/testing/selftests install

or:

   make ARCH=arm64 LLVM=1 TARGETS=sched_ext SKIP_TARGETS="" \
 O=/output/foo -C tools/testing/selftests install

The expectation is that the sched_ext is included, cross-built, the
correct toolchain is picked up, and placed into /output/foo.

In contrast to the BPF selftests, the sched_ext suite does not use
bpftool at test run-time, so it is sufficient to build bpftool for the
build host only.

Add ARCH, CROSS_COMPILE, OUTPUT, and TARGETS support to the sched_ext
selftest. Also, remove some variables that were unused by the
Makefile.


Thank you for adding this to change log.
Maybe we need to add this to kselftest.rst at some point.



Signed-off-by: Björn Töpel 
---
v3: * Always build a build host version of bpftool (Mark)
 * Make sure LLVM style "ARCH only" cross-build works (Mark)
 
v2: * Removed the duplicated LLVM prefix parsing (David)

 * Made sure make clean didn't do a complete mess (David)
 * Added sched_ext to default skip (Shuah)


Thank you.

Reviewed-by: Shuah Khan 

thanks,
-- Shuah

Re: [PATCH v2 5/8] net: af_can: do not leave a dangling sk pointer in can_create()

2024-10-08 Thread Ignat Korchagin

On Tue, Oct 8, 2024 at 3:38 AM Vincent MAILHOL
 wrote:
>
> Hi Ignat,
>
> Thanks for the patch.
>
> On Tue. 8 Oct. 2024 at 06:37, Ignat Korchagin  wrote:
> > On error can_create() frees the allocated sk object, but sock_init_data()
> > has already attached it to the provided sock object. This will leave a
> > dangling sk pointer in the sock object and may cause use-after-free later.
>
> I was about to suggest that this should be backported to stable, but
> after reading the cover letter, I now understand that this patch is
> more an improvement to avoid false positives on kmemleak & Cie. Maybe
> the description could be a bit more nuanced? After patch 1/8 of this
> series, it seems that the use-after-free is not possible anymore.

If we go with Kuniyuki's suggestion in the cover email to replace the
explicit NULL with DEBUG_NET_WARN_ON_ONCE(sock->sk) in __sock_create()
then use-after-free would be possible again except we will easily
catch it. But I will review the description, when I respin the patches
to net-next as requested by Jakub.

> > Signed-off-by: Ignat Korchagin 
>
> See above comment as notwithstanding. This said:
>
> Reviewed-by: Vincent Mailhol 

Thank you.

>
> > ---
> >  net/can/af_can.c | 1 +
> >  1 file changed, 1 insertion(+)
> >
> > diff --git a/net/can/af_can.c b/net/can/af_can.c
> > index 707576eeeb58..01f3fbb3b67d 100644
> > --- a/net/can/af_can.c
> > +++ b/net/can/af_can.c
> > @@ -171,6 +171,7 @@ static int can_create(struct net *net, struct socket 
> > *sock, int protocol,
> > /* release sk on errors */
> > sock_orphan(sk);
> > sock_put(sk);
> > +   sock->sk = NULL;
> > }
> >
> >   errout:
> > --
> > 2.39.5
> >
> >

[RESEND PATCH v4] list: test: Check the size of every lists for list_cut_position*()

2024-10-08 Thread I Hsin Cheng

Check the total number of elements in both resultant lists are correct
within list_cut_position*(). Previously, only the first list's size was
checked. wo additional elements in the second list would not have been
caught.

Signed-off-by: I Hsin Cheng 
---
change in v4:
Amend the description of commit message, make it less confusing
and focus on the correct check which is performed now.

 lib/list-test.c | 4 
 1 file changed, 4 insertions(+)

diff --git a/lib/list-test.c b/lib/list-test.c
index 37cbc33e9fdb..b4b3810c71d0 100644
--- a/lib/list-test.c
+++ b/lib/list-test.c
@@ -408,6 +408,8 @@ static void list_test_list_cut_position(struct kunit *test)
KUNIT_EXPECT_PTR_EQ(test, cur, &entries[i]);
i++;
}
+
+   KUNIT_EXPECT_EQ(test, i, 3);
 }
 
 static void list_test_list_cut_before(struct kunit *test)
@@ -436,6 +438,8 @@ static void list_test_list_cut_before(struct kunit *test)
KUNIT_EXPECT_PTR_EQ(test, cur, &entries[i]);
i++;
}
+
+   KUNIT_EXPECT_EQ(test, i, 3);
 }
 
 static void list_test_list_splice(struct kunit *test)
-- 
2.43.0

[RESEND PATCH v4] list: test: Check the size of every lists for list_cut_position*()

2024-10-08 Thread I Hsin Cheng

Check the total number of elements in both resultant lists are correct
within list_cut_position*(). Previously, only the first list's size was
checked. so additional elements in the second list would not have been
caught.

Signed-off-by: I Hsin Cheng 
---
change in v4:
Amend the description of commit message, make it less confusing
and focus on the correct check which is performed now.

 lib/list-test.c | 4 
 1 file changed, 4 insertions(+)

diff --git a/lib/list-test.c b/lib/list-test.c
index 37cbc33e9fdb..b4b3810c71d0 100644
--- a/lib/list-test.c
+++ b/lib/list-test.c
@@ -408,6 +408,8 @@ static void list_test_list_cut_position(struct kunit *test)
KUNIT_EXPECT_PTR_EQ(test, cur, &entries[i]);
i++;
}
+
+   KUNIT_EXPECT_EQ(test, i, 3);
 }
 
 static void list_test_list_cut_before(struct kunit *test)
@@ -436,6 +438,8 @@ static void list_test_list_cut_before(struct kunit *test)
KUNIT_EXPECT_PTR_EQ(test, cur, &entries[i]);
i++;
}
+
+   KUNIT_EXPECT_EQ(test, i, 3);
 }
 
 static void list_test_list_splice(struct kunit *test)
-- 
2.43.0

Re: [RESEND PATCH v4] list: test: Check the size of every lists for list_cut_position*()

2024-10-08 Thread Kuan-Wei Chiu

Hi I Hsin,

On Tue, Oct 08, 2024 at 02:51:23PM +0800, I Hsin Cheng wrote:
> Check the total number of elements in both resultant lists are correct
> within list_cut_position*(). Previously, only the first list's size was
> checked. wo additional elements in the second list would not have been
> caught.
> 
> Signed-off-by: I Hsin Cheng 
> ---
> change in v4:
>   Amend the description of commit message, make it less confusing
>   and focus on the correct check which is performed now.
>
Generally, we don't add the prefix "RESEND" to the subject line of the
next version of a patch. "RESEND" is only used for resubmissions of a
patch that hasn't been modified in any way from the previous submission.

See: 
https://www.kernel.org/doc/html/v6.11/process/submitting-patches.html#don-t-get-discouraged-or-impatient

Regards,
Kuan-Wei

Re: [PATCH 1/1] s390/virtio_ccw: fix dma_parm pointer not set up

2024-10-08 Thread Cornelia Huck

On Mon, Oct 07 2024, Halil Pasic  wrote:

> At least since commit 334304ac2bac ("dma-mapping: don't return errors
> from dma_set_max_seg_size") setting up device.dma_parms is basically
> mandated by the DMA API. As of now Channel (CCW) I/O in general does not
> utilize the DMA API, except for virtio. For virtio-ccw however the
> common virtio DMA infrastructure is such that most of the DMA stuff
> hinges on the virtio parent device, which is a CCW device.
>
> So lets set up the dma_parms pointer for the CCW parent device and hope
> for the best!
>
> Signed-off-by: Halil Pasic 
> Fixes: 334304ac2bac ("dma-mapping: don't return errors from 
> dma_set_max_seg_size")
> Reported-by: "Marc Hartmayer" 
> Closes: https://bugzilla.linux.ibm.com/show_bug.cgi?id=209131
> Reviewed-by: Eric Farman 
> ---
>
> In the long run it may make sense to move dma_parms into struct
> ccw_device, since layering-wise it is much cleaner. I decided
> to put it in virtio_ccw_device because currently it is only used for
> virtio.

Yes, ccw_device would make more sense as a resting place; no idea what
other devices (dasd, QDIO based, ...) would do with it ATM -- I agree
that if adding it to virtio_ccw_device get things going again, we should
do that and consider the possible generic case later.

Reviewed-by: Cornelia Huck 

[I assume this one can be picked up together with other s390 patches?]

>
> ---
>  drivers/s390/virtio/virtio_ccw.c | 4 
>  1 file changed, 4 insertions(+)
>
> diff --git a/drivers/s390/virtio/virtio_ccw.c 
> b/drivers/s390/virtio/virtio_ccw.c
> index 62eca9419ad7..21fa7ac849e5 100644
> --- a/drivers/s390/virtio/virtio_ccw.c
> +++ b/drivers/s390/virtio/virtio_ccw.c
> @@ -58,6 +58,8 @@ struct virtio_ccw_device {
>   struct virtio_device vdev;
>   __u8 config[VIRTIO_CCW_CONFIG_SIZE];
>   struct ccw_device *cdev;
> + /* we make cdev->dev.dma_parms point to this */
> + struct device_dma_parameters dma_parms;
>   __u32 curr_io;
>   int err;
>   unsigned int revision; /* Transport revision */
> @@ -1303,6 +1305,7 @@ static int virtio_ccw_offline(struct ccw_device *cdev)
>   unregister_virtio_device(&vcdev->vdev);
>   spin_lock_irqsave(get_ccwdev_lock(cdev), flags);
>   dev_set_drvdata(&cdev->dev, NULL);
> + cdev->dev.dma_parms = NULL;
>   spin_unlock_irqrestore(get_ccwdev_lock(cdev), flags);
>   return 0;
>  }
> @@ -1366,6 +1369,7 @@ static int virtio_ccw_online(struct ccw_device *cdev)
>   }
>   vcdev->vdev.dev.parent = &cdev->dev;
>   vcdev->cdev = cdev;
> + cdev->dev.dma_parms = &vcdev->dma_parms;
>   vcdev->dma_area = ccw_device_dma_zalloc(vcdev->cdev,
>   sizeof(*vcdev->dma_area),
>   &vcdev->dma_area_addr);
>
> base-commit: 87d6aab2389e5ce0197d8257d5f8ee965a67c4cd

[PATCH 1/1] selftests: livepatch: add test cases of stack_order sysfs interface

2024-10-08 Thread Wardenjohn

Add selftest test cases to sysfs attribute 'stack_order'.

Signed-off-by: Wardenjohn 
---
 .../testing/selftests/livepatch/test-sysfs.sh | 71 +++
 .../selftests/livepatch/test_modules/Makefile |  5 +-
 .../test_klp_livepatch_noreplace.c| 53 ++
 .../test_klp_livepatch_noreplace2.c   | 53 ++
 .../test_klp_livepatch_noreplace3.c   | 53 ++
 5 files changed, 234 insertions(+), 1 deletion(-)
 create mode 100644 
tools/testing/selftests/livepatch/test_modules/test_klp_livepatch_noreplace.c
 create mode 100644 
tools/testing/selftests/livepatch/test_modules/test_klp_livepatch_noreplace2.c
 create mode 100644 
tools/testing/selftests/livepatch/test_modules/test_klp_livepatch_noreplace3.c

diff --git a/tools/testing/selftests/livepatch/test-sysfs.sh 
b/tools/testing/selftests/livepatch/test-sysfs.sh
index 05a14f5a7bfb..a086b62fb488 100755
--- a/tools/testing/selftests/livepatch/test-sysfs.sh
+++ b/tools/testing/selftests/livepatch/test-sysfs.sh
@@ -5,6 +5,9 @@
 . $(dirname $0)/functions.sh
 
 MOD_LIVEPATCH=test_klp_livepatch
+MOD_LIVEPATCH_NOREPLACE=test_klp_livepatch_noreplace
+MOD_LIVEPATCH_NOREPLACE2=test_klp_livepatch_noreplace2
+MOD_LIVEPATCH_NOREPLACE3=test_klp_livepatch_noreplace3
 
 setup_config
 
@@ -131,4 +134,72 @@ livepatch: '$MOD_LIVEPATCH': completing unpatching 
transition
 livepatch: '$MOD_LIVEPATCH': unpatching complete
 % rmmod $MOD_LIVEPATCH"
 
+start_test "sysfs test stack_order read"
+
+load_lp $MOD_LIVEPATCH_NOREPLACE
+
+check_sysfs_rights "$MOD_LIVEPATCH_NOREPLACE" "stack_order" "-r--r--r--"
+check_sysfs_value  "$MOD_LIVEPATCH_NOREPLACE" "stack_order" "1"
+
+load_lp $MOD_LIVEPATCH_NOREPLACE2
+
+check_sysfs_rights "$MOD_LIVEPATCH_NOREPLACE2" "stack_order" "-r--r--r--"
+check_sysfs_value  "$MOD_LIVEPATCH_NOREPLACE2" "stack_order" "2"
+
+load_lp $MOD_LIVEPATCH_NOREPLACE3
+
+check_sysfs_rights "$MOD_LIVEPATCH_NOREPLACE3" "stack_order" "-r--r--r--"
+check_sysfs_value  "$MOD_LIVEPATCH_NOREPLACE3" "stack_order" "3"
+
+disable_lp $MOD_LIVEPATCH_NOREPLACE2
+unload_lp $MOD_LIVEPATCH_NOREPLACE2
+
+check_sysfs_rights "$MOD_LIVEPATCH_NOREPLACE" "stack_order" "-r--r--r--"
+check_sysfs_value  "$MOD_LIVEPATCH_NOREPLACE" "stack_order" "1"
+check_sysfs_rights "$MOD_LIVEPATCH_NOREPLACE3" "stack_order" "-r--r--r--"
+check_sysfs_value  "$MOD_LIVEPATCH_NOREPLACE3" "stack_order" "2"
+
+disable_lp $MOD_LIVEPATCH_NOREPLACE3
+unload_lp $MOD_LIVEPATCH_NOREPLACE3
+
+disable_lp $MOD_LIVEPATCH_NOREPLACE
+unload_lp $MOD_LIVEPATCH_NOREPLACE
+
+check_result "% insmod test_modules/$MOD_LIVEPATCH_NOREPLACE.ko
+livepatch: enabling patch '$MOD_LIVEPATCH_NOREPLACE'
+livepatch: '$MOD_LIVEPATCH_NOREPLACE': initializing patching transition
+livepatch: '$MOD_LIVEPATCH_NOREPLACE': starting patching transition
+livepatch: '$MOD_LIVEPATCH_NOREPLACE': completing patching transition
+livepatch: '$MOD_LIVEPATCH_NOREPLACE': patching complete
+% insmod test_modules/$MOD_LIVEPATCH_NOREPLACE2.ko
+livepatch: enabling patch '$MOD_LIVEPATCH_NOREPLACE2'
+livepatch: '$MOD_LIVEPATCH_NOREPLACE2': initializing patching transition
+livepatch: '$MOD_LIVEPATCH_NOREPLACE2': starting patching transition
+livepatch: '$MOD_LIVEPATCH_NOREPLACE2': completing patching transition
+livepatch: '$MOD_LIVEPATCH_NOREPLACE2': patching complete
+% insmod test_modules/$MOD_LIVEPATCH_NOREPLACE3.ko
+livepatch: enabling patch '$MOD_LIVEPATCH_NOREPLACE3'
+livepatch: '$MOD_LIVEPATCH_NOREPLACE3': initializing patching transition
+livepatch: '$MOD_LIVEPATCH_NOREPLACE3': starting patching transition
+livepatch: '$MOD_LIVEPATCH_NOREPLACE3': completing patching transition
+livepatch: '$MOD_LIVEPATCH_NOREPLACE3': patching complete
+% echo 0 > /sys/kernel/livepatch/$MOD_LIVEPATCH_NOREPLACE2/enabled
+livepatch: '$MOD_LIVEPATCH_NOREPLACE2': initializing unpatching transition
+livepatch: '$MOD_LIVEPATCH_NOREPLACE2': starting unpatching transition
+livepatch: '$MOD_LIVEPATCH_NOREPLACE2': completing unpatching transition
+livepatch: '$MOD_LIVEPATCH_NOREPLACE2': unpatching complete
+% rmmod $MOD_LIVEPATCH_NOREPLACE2
+% echo 0 > /sys/kernel/livepatch/$MOD_LIVEPATCH_NOREPLACE3/enabled
+livepatch: '$MOD_LIVEPATCH_NOREPLACE3': initializing unpatching transition
+livepatch: '$MOD_LIVEPATCH_NOREPLACE3': starting unpatching transition
+livepatch: '$MOD_LIVEPATCH_NOREPLACE3': completing unpatching transition
+livepatch: '$MOD_LIVEPATCH_NOREPLACE3': unpatching complete
+% rmmod $MOD_LIVEPATCH_NOREPLACE3
+% echo 0 > /sys/kernel/livepatch/$MOD_LIVEPATCH_NOREPLACE/enabled
+livepatch: '$MOD_LIVEPATCH_NOREPLACE': initializing unpatching transition
+livepatch: '$MOD_LIVEPATCH_NOREPLACE': starting unpatching transition
+livepatch: '$MOD_LIVEPATCH_NOREPLACE': completing unpatching transition
+livepatch: '$MOD_LIVEPATCH_NOREPLACE': unpatching complete
+% rmmod $MOD_LIVEPATCH_NOREPLACE"
+
 exit 0
diff --git a/tools/testing/selftests/livepatch/test_modules/Makefile 
b/tools/testing/selftests/livepatch/test_modules/Makefile
index

Re: [PATCH net-next v8 01/24] netlink: add NLA_POLICY_MAX_LEN macro


On 07/10/24 17:53, Jakub Kicinski wrote:

On Mon, 7 Oct 2024 12:04:22 +0200 Antonio Quartulli wrote:

Or we could check if len(self.checks) <= 1 early and throw our hands up
if there is more, for now?


We already perform the same check in the 'else' branch below.
It'd be about moving it at the beginning of the function and bail out if
true, right?

Should I modify this patch and move the check above?


I just sent the refactor patch, that seemed easier than explaining ;)


Great, thanks :-)


--
Antonio Quartulli
OpenVPN Inc.

[PATCH 0/1] selftests: livepatch: add test cases of stack_order sysfs

2024-10-08 Thread Wardenjohn

This patch add self test cases to 'stack_order' sysfs interface.

Reuse test module of 'test_klp_livepatch'. However, some module in test_module
have '.replace' enable. So, I set the replace value of the stack_order test 
module to false.

Regards.
Wardenjohn.

Re: [PATCH net-next v8 03/24] ovpn: add basic netlink support


Hi,

On 07/10/24 17:32, Jiri Pirko wrote:

Wed, Oct 02, 2024 at 11:02:17AM CEST, anto...@openvpn.net wrote:

[...]



+operations:
+  list:
+-
+  name: dev-new
+  attribute-set: ovpn
+  flags: [ admin-perm ]
+  doc: Create a new interface of type ovpn
+  do:
+request:
+  attributes:
+- ifname
+- mode
+reply:
+  attributes:
+- ifname
+- ifindex
+-
+  name: dev-del


Why you expose new and del here in ovn specific generic netlink iface?
Why can't you use the exising RTNL api which is used for creation and
destruction of other types of devices?


That was my original approach in v1, but it was argued that an ovpn 
interface needs a userspace program to be configured and used in a 
meaningful way, therefore it was decided to concentrate all iface mgmt 
APIs along with the others in the netlink family and to not expose any 
RTNL ops.


However, recently we decided to add a dellink implementation for better 
integration with network namespaces and to allow the user to wipe a 
dangling interface.


In the future we are planning to also add the possibility to create a 
"persistent interface", that is an interface created before launching 
any userspace program and that survives when the latter is stopped.
I can guess this functionality may be better suited for RTNL, but I am 
not sure yet.


@Jiri: do you have any particular opinion why we should use RTNL ops and 
not netlink for creating/destroying interfaces? I feel this is mostly a 
matter of taste, but maybe there are technical reasons we should consider.


Thanks a lot for your contribution.

Regards,





ip link add [link DEV | parentdev NAME] [ name ] NAME
[ txqueuelen PACKETS ]
[ address LLADDR ]
[ broadcast LLADDR ]
[ mtu MTU ] [index IDX ]
[ numtxqueues QUEUE_COUNT ]
[ numrxqueues QUEUE_COUNT ]
[ netns { PID | NETNSNAME | NETNSFILE } ]
type TYPE [ ARGS ]

ip link delete { DEVICE | dev DEVICE | group DEVGROUP } type TYPE [ ARGS ]

Lots of examples of existing types creation is for example here:
https://developers.redhat.com/blog/2018/10/22/introduction-to-linux-interfaces-for-virtual-networking




+  attribute-set: ovpn
+  flags: [ admin-perm ]
+  doc: Delete existing interface of type ovpn
+  do:
+pre: ovpn-nl-pre-doit
+post: ovpn-nl-post-doit
+request:
+  attributes:
+- ifindex


[...]


--
Antonio Quartulli
OpenVPN Inc.

Re: [RFC PATCH 30/39] KVM: guest_memfd: Handle folio preparation for guest_memfd mmap

2024-10-08 Thread Manwaring, Derek

On 2024-10-08 at 19:56+ Sean Christopherson wrote:
> Another (slightly crazy) approach would be use protection keys to provide the
> security properties that you want, while giving KVM (and userspace) a 
> quick-and-easy
> override to access guest memory.
>
>  1. mmap() guest_memfd into userpace with RW protections
>  2. Configure PKRU to make guest_memfd memory inaccessible by default
>  3. Swizzle PKRU on-demand when intentionally accessing guest memory
>
> It's essentially the same idea as SMAP+STAC/CLAC, just applied to guest memory
> instead of to usersepace memory.
>
> The benefit of the PKRU approach is that there are no PTE modifications, and 
> thus
> no TLB flushes, and only the CPU that is access guest memory gains temporary
> access.  The big downside is that it would be limited to modern hardware, but
> that might be acceptable, especially if it simplifies KVM's implementation.

Yeah this might be worth it if it simplifies significantly. Jenkins et
al. showed MPK worked for stopping in-process Spectre V1 [1]. While
future hardware bugs are always possible, the host kernel would still
offer better protection overall since discovery of additional Spectre
approaches and gadgets in the kernel is more likely (I think it's a
bigger surface area than hardware-specific MPK transient execution
issues).

Patrick, we talked about this a couple weeks ago and ended up focusing
on within-userspace protection, but I see keys can also be used to stop
kernel access like Andrew's project he mentioned during Dave's MPK
session at LPC [2]. Andrew, could you share that here?

It's not clear to me how reliably the kernel prevents its own access to
such pages. I see a few papers that warrant more investigation:

"we found multiple interfaces that Linux, by design, provides for
accessing process memory that ignore PKU domains on a page." [3]

"Though Connor et al. demonstrate that existing MPK protections can be
bypassed by using the kernel as a confused deputy, compelling recent
work indicates that MPK operations can be made secure." [4]

Dave and others, if you're aware of resources clarifying how strong the
boundaries are, that would be helpful.

Derek

[1] https://www.cs.dartmouth.edu/~sws/pubs/jas2020.pdf
[2] https://www.youtube.com/watch?v=gEUeMfrNH94&t=1028s
[3] https://www.usenix.org/system/files/sec20-connor.pdf
[4] https://ics.uci.edu/~dabrowsa/kirth-eurosys22-pkru.pdf

Re: [PATCH net-next v20 01/14] mm: page_frag: add a test module for page_frag

On 2024/10/9 3:56, Shuah Khan wrote:
> On 10/8/24 05:20, Yunsheng Lin wrote:
>> The testing is done by ensuring that the fragment allocated
>> from a frag_frag_cache instance is pushed into a ptr_ring
>> instance in a kthread binded to a specified cpu, and a kthread
>> binded to a specified cpu will pop the fragment from the
>> ptr_ring and free the fragment.
>>
>> CC: Alexander Duyck 
>> Signed-off-by: Yunsheng Lin 
>> Reviewed-by: Alexander Duyck 
> 
> Signed-off-by should be last. Same comment on all the other

Hi, Shuah

I used 'git am' to collect those tag, it seems that is the order
the tool applied, and I checking other applied commit, it seems
only Signed-off-by from the committer is the last, like the below
recent mm commit:
6901cf55de22
ff7f5ad7bce4

> patches in this series. When you have 4 patches, it is a good
> practice to add cover-letter.

I guess the cover-letter meant below?
https://lore.kernel.org/all/20241008112049.2279307-1-linyunsh...@huawei.com/

> 
>> ---
>>   tools/testing/selftests/mm/Makefile   |   3 +
>>   tools/testing/selftests/mm/page_frag/Makefile |  18 ++
>>   .../selftests/mm/page_frag/page_frag_test.c   | 173 ++
>>   tools/testing/selftests/mm/run_vmtests.sh |   8 +
>>   tools/testing/selftests/mm/test_page_frag.sh  | 171 +
>>   5 files changed, 373 insertions(+)
>>   create mode 100644 tools/testing/selftests/mm/page_frag/Makefile
>>   create mode 100644 tools/testing/selftests/mm/page_frag/page_frag_test.c
>>   create mode 100755 tools/testing/selftests/mm/test_page_frag.sh
>>
>> diff --git a/tools/testing/selftests/mm/Makefile 
>> b/tools/testing/selftests/mm/Makefile
>> index 02e1204971b0..acec529baaca 100644
>> --- a/tools/testing/selftests/mm/Makefile
>> +++ b/tools/testing/selftests/mm/Makefile
>> @@ -36,6 +36,8 @@ MAKEFLAGS += --no-builtin-rules
>>   CFLAGS = -Wall -I $(top_srcdir) $(EXTRA_CFLAGS) $(KHDR_INCLUDES) 
>> $(TOOLS_INCLUDES)
>>   LDLIBS = -lrt -lpthread -lm
>>   +TEST_GEN_MODS_DIR := page_frag
>> +
>>   TEST_GEN_FILES = cow
>>   TEST_GEN_FILES += compaction_test
>>   TEST_GEN_FILES += gup_longterm
>> @@ -126,6 +128,7 @@ TEST_FILES += test_hmm.sh
>>   TEST_FILES += va_high_addr_switch.sh
>>   TEST_FILES += charge_reserved_hugetlb.sh
>>   TEST_FILES += hugetlb_reparenting_test.sh
>> +TEST_FILES += test_page_frag.sh
>>     # required by charge_reserved_hugetlb.sh
>>   TEST_FILES += write_hugetlb_memory.sh
>> diff --git a/tools/testing/selftests/mm/page_frag/Makefile 
>> b/tools/testing/selftests/mm/page_frag/Makefile
>> new file mode 100644
>> index ..58dda74d50a3
>> --- /dev/null
>> +++ b/tools/testing/selftests/mm/page_frag/Makefile
>> @@ -0,0 +1,18 @@
>> +PAGE_FRAG_TEST_DIR := $(realpath $(dir $(abspath $(lastword 
>> $(MAKEFILE_LIST)
>> +KDIR ?= $(abspath $(PAGE_FRAG_TEST_DIR)/../../../../..)
>> +
>> +ifeq ($(V),1)
>> +Q =
>> +else
>> +Q = @
>> +endif
>> +
>> +MODULES = page_frag_test.ko
>> +
>> +obj-m += page_frag_test.o
>> +
>> +all:
>> +    +$(Q)make -C $(KDIR) M=$(PAGE_FRAG_TEST_DIR) modules
>> +
>> +clean:
>> +    +$(Q)make -C $(KDIR) M=$(PAGE_FRAG_TEST_DIR) clean
>> diff --git a/tools/testing/selftests/mm/page_frag/page_frag_test.c 
>> b/tools/testing/selftests/mm/page_frag/page_frag_test.c
>> new file mode 100644
>> index ..eeb2b6bc681a
>> --- /dev/null
>> +++ b/tools/testing/selftests/mm/page_frag/page_frag_test.c
>> @@ -0,0 +1,173 @@
>> +// SPDX-License-Identifier: GPL-2.0
> 
> I think this would throw a checkpatch warning about
> comment should be "/*" and not "//"

using "git grep 'SPDX-License' mm",  "//" seems like a more common
case.
And I did using './scripts/checkpatch.pl --strict --codespell', and
it does not throw a checkpatch warning.

>> +
>> +/*
>> + * Test module for page_frag cache
>> + *

...

>> +function run_manual_check()
>> +{
>> +    #
>> +    # Validate passed parameters. If there is wrong one,
>> +    # the script exists and does not execute further.
>> +    #
>> +    validate_passed_args $@
>> +
>> +    echo "Run the test with following parameters: $@"
> 
> Is this marker good enough to isolate the test results in the
> dmesg? Include the test name in the message.
> 
> 
>> +    insmod $DRIVER $@ > /dev/null 2>&1
>> +    echo "Done."
> 
> Is this marker good enough to isolate the test results in the
> dmesg? Include the test name in the message.
> 
>> +    echo "Check the kernel ring buffer to see the summary."
> 
> Usually the test would run dmesg and filter out the test results
> from the dmesg and include them in the test script output.
> 
> You can refer to other tests that do that: powerpc/scripts/hmi.sh
> is one example.

Thanks, will check that.

[RESEND] [PATCH v6 0/2] Add test to distinguish between thread's signal mask and ucontext_t

2024-10-08 Thread Dev Jain

This patch series is motivated by the following observation:

Raise a signal, jump to signal handler. The ucontext_t structure dumped
by kernel to userspace has a uc_sigmask field having the mask of blocked
signals. If you run a fresh minimalistic program doing this, this field
is empty, even if you block some signals while registering the handler
with sigaction().

Here is what the man-pages have to say:

sigaction(2): "sa_mask specifies a mask of signals which should be blocked
(i.e., added to the signal mask of the thread in which the signal handler
is invoked) during execution of the signal handler. In addition, the
signal which triggered the handler will be blocked, unless the SA_NODEFER
flag is used."

signal(7): Under "Execution of signal handlers", (1.3) implies:

"The thread's current signal mask is accessible via the ucontext_t
object that is pointed to by the third argument of the signal handler."

But, (1.4) states:

"Any signals specified in act->sa_mask when registering the handler with
sigprocmask(2) are added to the thread's signal mask.  The signal being
delivered is also added to the signal mask, unless SA_NODEFER was
specified when registering the handler.  These signals are thus blocked
while the handler executes."

There clearly is no distinction being made in the man pages between
"Thread's signal mask" and ucontext_t; this logically should imply
that a signal blocked by populating struct sigaction should be visible
in ucontext_t.

Here is what the kernel code does (for Aarch64):

do_signal() -> handle_signal() -> sigmask_to_save(), which returns
¤t->blocked, is passed to setup_rt_frame() -> setup_sigframe() ->
__copy_to_user(). Hence, ¤t->blocked is copied to ucontext_t
exposed to userspace. Returning back to handle_signal(),
signal_setup_done() -> signal_delivered() -> sigorsets() and
set_current_blocked() are responsible for using information from
struct ksignal ksig, which was populated through the sigaction()
system call in kernel/signal.c:
copy_from_user(&new_sa.sa, act, sizeof(new_sa.sa)),
to update ¤t->blocked; hence, the set of blocked signals for the
current thread is updated AFTER the kernel dumps ucontext_t to
userspace.

Assuming that the above is indeed the intended behaviour, because it
semantically makes sense, since the signals blocked using sigaction()
remain blocked only till the execution of the handler, and not in the
context present before jumping to the handler (but nothing can be
confirmed from the man-pages), the series introduces a test for
mangling with uc_sigmask. I will send a separate series to fix the
man-pages.

The proposed selftest has been tested out on Aarch32, Aarch64 and x86_64.

v5->v6:
 - Drop renaming of sas.c
 - Include the explanation from the cover letter in the changelog
   for the second patch

v4->v5:
 - Remove a redundant print statement

v3->v4:
 - Allocate sigsets as automatic variables to avoid malloc()

v2->v3:
 - ucontext describes current state -> ucontext describes interrupted context
 - Add a comment for blockage of USR2 even after return from handler
 - Describe blockage of signals in a better way

v1->v2:
 - Replace all occurrences of SIGPIPE with SIGSEGV
 - Fixed a mismatch between code comment and ksft log
 - Add a testcase: Raise the same signal again; it must not be queued
 - Remove unneeded , 
 - Give a detailed test description in the comments; also describe the
   exact meaning of delivered and blocked
 - Handle errors for all libc functions/syscalls
 - Mention tests in Makefile and .gitignore in alphabetical order

v1:
 - https://lore.kernel.org/all/20240607122319.768640-1-dev.j...@arm.com/


Dev Jain (2):
  selftests: Rename sigaltstack to generic signal
  selftests: Add a test mangling with uc_sigmask

 tools/testing/selftests/Makefile  |   2 +-
 .../{sigaltstack => signal}/.gitignore|   1 +
 .../{sigaltstack => signal}/Makefile  |   3 +-
 .../current_stack_pointer.h   |   0
 .../selftests/signal/mangle_uc_sigmask.c  | 184 ++
 .../selftests/{sigaltstack => signal}/sas.c   |   0
 6 files changed, 188 insertions(+), 2 deletions(-)
 rename tools/testing/selftests/{sigaltstack => signal}/.gitignore (70%)
 rename tools/testing/selftests/{sigaltstack => signal}/Makefile (56%)
 rename tools/testing/selftests/{sigaltstack => signal}/current_stack_pointer.h 
(100%)
 create mode 100644 tools/testing/selftests/signal/mangle_uc_sigmask.c
 rename tools/testing/selftests/{sigaltstack => signal}/sas.c (100%)

-- 
2.30.2

[RESEND] [PATCH v6 1/2] selftests: Rename sigaltstack to generic signal

2024-10-08 Thread Dev Jain

Rename sigaltstack to generic signal directory, to allow adding more
signal tests in the future.

Signed-off-by: Dev Jain 
Reviewed-by: Mark Brown 
Acked-by: Shuah Khan 
---
 tools/testing/selftests/Makefile| 2 +-
 tools/testing/selftests/{sigaltstack => signal}/.gitignore  | 0
 tools/testing/selftests/{sigaltstack => signal}/Makefile| 0
 .../selftests/{sigaltstack => signal}/current_stack_pointer.h   | 0
 tools/testing/selftests/{sigaltstack => signal}/sas.c   | 0
 5 files changed, 1 insertion(+), 1 deletion(-)
 rename tools/testing/selftests/{sigaltstack => signal}/.gitignore (100%)
 rename tools/testing/selftests/{sigaltstack => signal}/Makefile (100%)
 rename tools/testing/selftests/{sigaltstack => signal}/current_stack_pointer.h 
(100%)
 rename tools/testing/selftests/{sigaltstack => signal}/sas.c (100%)

diff --git a/tools/testing/selftests/Makefile b/tools/testing/selftests/Makefile
index bc8fe9e8f7f2..edbe30fb3304 100644
--- a/tools/testing/selftests/Makefile
+++ b/tools/testing/selftests/Makefile
@@ -87,7 +87,7 @@ TARGETS += rtc
 TARGETS += rust
 TARGETS += seccomp
 TARGETS += sgx
-TARGETS += sigaltstack
+TARGETS += signal
 TARGETS += size
 TARGETS += sparc64
 TARGETS += splice
diff --git a/tools/testing/selftests/sigaltstack/.gitignore 
b/tools/testing/selftests/signal/.gitignore
similarity index 100%
rename from tools/testing/selftests/sigaltstack/.gitignore
rename to tools/testing/selftests/signal/.gitignore
diff --git a/tools/testing/selftests/sigaltstack/Makefile 
b/tools/testing/selftests/signal/Makefile
similarity index 100%
rename from tools/testing/selftests/sigaltstack/Makefile
rename to tools/testing/selftests/signal/Makefile
diff --git a/tools/testing/selftests/sigaltstack/current_stack_pointer.h 
b/tools/testing/selftests/signal/current_stack_pointer.h
similarity index 100%
rename from tools/testing/selftests/sigaltstack/current_stack_pointer.h
rename to tools/testing/selftests/signal/current_stack_pointer.h
diff --git a/tools/testing/selftests/sigaltstack/sas.c 
b/tools/testing/selftests/signal/sas.c
similarity index 100%
rename from tools/testing/selftests/sigaltstack/sas.c
rename to tools/testing/selftests/signal/sas.c
-- 
2.30.2

[RESEND] [PATCH v6 2/2] selftests: Add a test mangling with uc_sigmask

2024-10-08 Thread Dev Jain

The test is motivated by the following observation:

Raise a signal, jump to signal handler. The ucontext_t structure dumped
by kernel to userspace has a uc_sigmask field having the mask of blocked
signals. If you run a fresh minimalistic program doing this, this field
is empty, even if you block some signals while registering the handler
with sigaction().

Here is what the man-pages have to say:

sigaction(2): "sa_mask specifies a mask of signals which should be blocked
(i.e., added to the signal mask of the thread in which the signal handler
is invoked) during execution of the signal handler. In addition, the
signal which triggered the handler will be blocked, unless the SA_NODEFER
flag is used."

signal(7): Under "Execution of signal handlers", (1.3) implies:

"The thread's current signal mask is accessible via the ucontext_t
object that is pointed to by the third argument of the signal handler."

But, (1.4) states:

"Any signals specified in act->sa_mask when registering the handler with
sigprocmask(2) are added to the thread's signal mask.  The signal being
delivered is also added to the signal mask, unless SA_NODEFER was
specified when registering the handler.  These signals are thus blocked
while the handler executes."

There clearly is no distinction being made in the man pages between
"Thread's signal mask" and ucontext_t; this logically should imply
that a signal blocked by populating struct sigaction should be visible
in ucontext_t.

Here is what the kernel code does (for Aarch64):

do_signal() -> handle_signal() -> sigmask_to_save(), which returns
¤t->blocked, is passed to setup_rt_frame() -> setup_sigframe() ->
__copy_to_user(). Hence, ¤t->blocked is copied to ucontext_t
exposed to userspace. Returning back to handle_signal(),
signal_setup_done() -> signal_delivered() -> sigorsets() and
set_current_blocked() are responsible for using information from
struct ksignal ksig, which was populated through the sigaction()
system call in kernel/signal.c:
copy_from_user(&new_sa.sa, act, sizeof(new_sa.sa)),
to update ¤t->blocked; hence, the set of blocked signals for the
current thread is updated AFTER the kernel dumps ucontext_t to
userspace.

Assuming that the above is indeed the intended behaviour, because it
semantically makes sense, since the signals blocked using sigaction()
remain blocked only till the execution of the handler, and not in the
context present before jumping to the handler (but nothing can be
confirmed from the man-pages), this patch introduces a test for
mangling with uc_sigmask.

The test asserts the relation between blocked signal, delivered signal,
and ucontext. The ucontext is mangled with, by adding a signal mask to
it; on return from the handler, the thread must block the corresponding
signal.

In the test description, I have also described signal delivery and blockage,
for ease of understanding what the test does.

Signed-off-by: Dev Jain 
Reviewed-by: Mark Brown 
---
 tools/testing/selftests/signal/.gitignore |   1 +
 tools/testing/selftests/signal/Makefile   |   3 +-
 .../selftests/signal/mangle_uc_sigmask.c  | 184 ++
 3 files changed, 187 insertions(+), 1 deletion(-)
 create mode 100644 tools/testing/selftests/signal/mangle_uc_sigmask.c

diff --git a/tools/testing/selftests/signal/.gitignore 
b/tools/testing/selftests/signal/.gitignore
index 50a19ace..3f339865a3b6 100644
--- a/tools/testing/selftests/signal/.gitignore
+++ b/tools/testing/selftests/signal/.gitignore
@@ -1,2 +1,3 @@
 # SPDX-License-Identifier: GPL-2.0-only
+mangle_uc_sigmask
 sas
diff --git a/tools/testing/selftests/signal/Makefile 
b/tools/testing/selftests/signal/Makefile
index 3e96d5d47036..e0bf7058d19c 100644
--- a/tools/testing/selftests/signal/Makefile
+++ b/tools/testing/selftests/signal/Makefile
@@ -1,6 +1,7 @@
 # SPDX-License-Identifier: GPL-2.0-only
 CFLAGS = -Wall
-TEST_GEN_PROGS = sas
+TEST_GEN_PROGS = mangle_uc_sigmask
+TEST_GEN_PROGS += sas
 
 include ../lib.mk
 
diff --git a/tools/testing/selftests/signal/mangle_uc_sigmask.c 
b/tools/testing/selftests/signal/mangle_uc_sigmask.c
new file mode 100644
index ..b79ab92178a8
--- /dev/null
+++ b/tools/testing/selftests/signal/mangle_uc_sigmask.c
@@ -0,0 +1,184 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2024 ARM Ltd.
+ *
+ * Author: Dev Jain 
+ *
+ * Test describing a clear distinction between signal states - delivered and
+ * blocked, and their relation with ucontext.
+ *
+ * A process can request blocking of a signal by masking it into its set of
+ * blocked signals; such a signal, when sent to the process by the kernel,
+ * will get blocked by the process and it may later unblock it and take an
+ * action. At that point, the signal will be delivered.
+ *
+ * We test the following functionalities of the kernel:
+ *
+ * ucontext_t describes the interrupted context of the thread; this implies
+ * that, in case of registering a handler and catching the corresponding
+ * signal, that s

[PATCH] selftests: drivers: net: fix name not defined

2024-10-08 Thread Alessandro Zanni

This fix solves this error, when calling kselftest with targets "drivers/net":

File "tools/testing/selftests/net/lib/py/nsim.py", line 64, in __init__
  if e.errno == errno.ENOSPC:
NameError: name 'errno' is not defined

The module errno makes available standard error system symbols.

Signed-off-by: Alessandro Zanni 
---
 tools/testing/selftests/net/lib/py/nsim.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tools/testing/selftests/net/lib/py/nsim.py 
b/tools/testing/selftests/net/lib/py/nsim.py
index f571a8b3139b..1a8cbe9acc48 100644
--- a/tools/testing/selftests/net/lib/py/nsim.py
+++ b/tools/testing/selftests/net/lib/py/nsim.py
@@ -1,5 +1,6 @@
 # SPDX-License-Identifier: GPL-2.0
 
+import errno
 import json
 import os
 import random
-- 
2.43.0

[PATCH 1/1] selftests/rseq: Fix mm_cid test failure

Adapt the rseq.c/rseq.h code to follow GNU C library changes introduced by:

glibc commit 2e456ccf0c34 ("Linux: Make __rseq_size useful for feature 
detection (bug 31965)")

Without this fix, rseq selftests for mm_cid fail:

./run_param_test.sh
Default parameters
Running test spinlock
Running compare-twice test spinlock
Running mm_cid test spinlock
Error: cpu id getter unavailable

[ This is based on the following branch:
  https://git.kernel.org/pub/scm/linux/kernel/git/shuah/linux-kselftest.git
  branch: fixes ]

Fixes: 18c2355838e7 ("selftests/rseq: Implement rseq mm_cid field support")
Signed-off-by: Mathieu Desnoyers 
Cc: Peter Zijlstra 
CC: Boqun Feng 
CC: "Paul E. McKenney" 
Cc: Shuah Khan 
CC: Carlos O'Donell 
CC: Florian Weimer 
CC: linux-kselft...@vger.kernel.org
CC: sta...@vger.kernel.org
---
 tools/testing/selftests/rseq/rseq.c | 110 +++-
 tools/testing/selftests/rseq/rseq.h |  10 +--
 2 files changed, 77 insertions(+), 43 deletions(-)

diff --git a/tools/testing/selftests/rseq/rseq.c 
b/tools/testing/selftests/rseq/rseq.c
index 96e812bdf8a4..5b9772cdf265 100644
--- a/tools/testing/selftests/rseq/rseq.c
+++ b/tools/testing/selftests/rseq/rseq.c
@@ -60,12 +60,6 @@ unsigned int rseq_size = -1U;
 /* Flags used during rseq registration.  */
 unsigned int rseq_flags;
 
-/*
- * rseq feature size supported by the kernel. 0 if the registration was
- * unsuccessful.
- */
-unsigned int rseq_feature_size = -1U;
-
 static int rseq_ownership;
 static int rseq_reg_success;   /* At least one rseq registration has succeded. 
*/
 
@@ -111,6 +105,43 @@ int rseq_available(void)
}
 }
 
+/* The rseq areas need to be at least 32 bytes. */
+static
+unsigned int get_rseq_min_alloc_size(void)
+{
+   unsigned int alloc_size = rseq_size;
+
+   if (alloc_size < ORIG_RSEQ_ALLOC_SIZE)
+   alloc_size = ORIG_RSEQ_ALLOC_SIZE;
+   return alloc_size;
+}
+
+/*
+ * Return the feature size supported by the kernel.
+ *
+ * Depending on the value returned by getauxval(AT_RSEQ_FEATURE_SIZE):
+ *
+ * 0:   Return ORIG_RSEQ_FEATURE_SIZE (20)
+ * > 0: Return the value from getauxval(AT_RSEQ_FEATURE_SIZE).
+ *
+ * It should never return a value below ORIG_RSEQ_FEATURE_SIZE.
+ */
+static
+unsigned int get_rseq_kernel_feature_size(void)
+{
+   unsigned long auxv_rseq_feature_size, auxv_rseq_align;
+
+   auxv_rseq_align = getauxval(AT_RSEQ_ALIGN);
+   assert(!auxv_rseq_align || auxv_rseq_align <= 
RSEQ_THREAD_AREA_ALLOC_SIZE);
+
+   auxv_rseq_feature_size = getauxval(AT_RSEQ_FEATURE_SIZE);
+   assert(!auxv_rseq_feature_size || auxv_rseq_feature_size <= 
RSEQ_THREAD_AREA_ALLOC_SIZE);
+   if (auxv_rseq_feature_size)
+   return auxv_rseq_feature_size;
+   else
+   return ORIG_RSEQ_FEATURE_SIZE;
+}
+
 int rseq_register_current_thread(void)
 {
int rc;
@@ -119,7 +150,7 @@ int rseq_register_current_thread(void)
/* Treat libc's ownership as a successful registration. */
return 0;
}
-   rc = sys_rseq(&__rseq_abi, rseq_size, 0, RSEQ_SIG);
+   rc = sys_rseq(&__rseq_abi, get_rseq_min_alloc_size(), 0, RSEQ_SIG);
if (rc) {
if (RSEQ_READ_ONCE(rseq_reg_success)) {
/* Incoherent success/failure within process. */
@@ -140,28 +171,12 @@ int rseq_unregister_current_thread(void)
/* Treat libc's ownership as a successful unregistration. */
return 0;
}
-   rc = sys_rseq(&__rseq_abi, rseq_size, RSEQ_ABI_FLAG_UNREGISTER, 
RSEQ_SIG);
+   rc = sys_rseq(&__rseq_abi, get_rseq_min_alloc_size(), 
RSEQ_ABI_FLAG_UNREGISTER, RSEQ_SIG);
if (rc)
return -1;
return 0;
 }
 
-static
-unsigned int get_rseq_feature_size(void)
-{
-   unsigned long auxv_rseq_feature_size, auxv_rseq_align;
-
-   auxv_rseq_align = getauxval(AT_RSEQ_ALIGN);
-   assert(!auxv_rseq_align || auxv_rseq_align <= 
RSEQ_THREAD_AREA_ALLOC_SIZE);
-
-   auxv_rseq_feature_size = getauxval(AT_RSEQ_FEATURE_SIZE);
-   assert(!auxv_rseq_feature_size || auxv_rseq_feature_size <= 
RSEQ_THREAD_AREA_ALLOC_SIZE);
-   if (auxv_rseq_feature_size)
-   return auxv_rseq_feature_size;
-   else
-   return ORIG_RSEQ_FEATURE_SIZE;
-}
-
 static __attribute__((constructor))
 void rseq_init(void)
 {
@@ -178,28 +193,54 @@ void rseq_init(void)
}
if (libc_rseq_size_p && libc_rseq_offset_p && libc_rseq_flags_p &&
*libc_rseq_size_p != 0) {
+   unsigned int libc_rseq_size;
+
/* rseq registration owned by glibc */
rseq_offset = *libc_rseq_offset_p;
-   rseq_size = *libc_rseq_size_p;
+   libc_rseq_size = *libc_rseq_size_p;
rseq_flags = *libc_rseq_flags_p;
-   rseq_feature_size = get_rseq_feature_size();
-   if (rseq_feature_size > rseq_size)
-

Re: [PATCH] selftest: hid: add the missing tests directory

2024-10-08 Thread luyun




在 2024/10/9 03:33, Shuah Khan 写道:

On 10/8/24 03:31, Yun Lu wrote:
Commit 160c826b4dd0 ("selftest: hid: add missing 
run-hid-tools-tests.sh")

has added the run-hid-tools-tests.sh script for it to be installed, but
I forgot to add the tests directory together.

In fact, the run-hid-tools-tests.sh script uses the scripts in the tests
directory to run tests. The tests directory also needs to be added to be
installed

Include the error you are seeing in here.


If running the test case without the tests directory,  the error message 
will like this:


    cd $KSFT_INSTALL_PATH

    ./run_kselftest.sh -t hid:hid-core.sh


# /usr/lib/python3.11/site-packages/_pytest/config/__init__.py:331: 
PluggyTeardownRaisedWarning: A plugin raised an exception during an 
old-style hookwrapper teardown.

# Plugin: helpconfig, Hook: pytest_cmdline_parse
# UsageError: usage: __main__.py [options] [file_or_dir] [file_or_dir] [...]
# __main__.py: error: unrecognized arguments: --udevd
#   inifile: None
#   rootdir: /root/linux/kselftest_install/hid






Fixes: ffb85d5c9e80 ("selftests: hid: import hid-tools hid-core tests")
Cc: sta...@vger.kernel.org
Signed-off-by: Yun Lu 
---
  tools/testing/selftests/hid/Makefile | 1 +
  1 file changed, 1 insertion(+)

diff --git a/tools/testing/selftests/hid/Makefile 
b/tools/testing/selftests/hid/Makefile

index 38ae31bb07b5..662209f5fabc 100644
--- a/tools/testing/selftests/hid/Makefile
+++ b/tools/testing/selftests/hid/Makefile
@@ -18,6 +18,7 @@ TEST_PROGS += hid-usb_crash.sh
  TEST_PROGS += hid-wacom.sh
    TEST_FILES := run-hid-tools-tests.sh
+TEST_FILES += tests


What about the files if any under the tests directory?
The install rule would handle the case, however, did
you verify that those are copied as well?


Yes, the install rule will copy the entire directory (including all 
files under the directory),


and I have confirmed it.


Thanks and best regards.

--Yun Lu




    CXX ?= $(CROSS_COMPILE)g++


thanks,
-- Shuah

Re: [PATCH 3/3] rcu: Report callbacks enqueued on offline CPU blind spot

2024-10-08 Thread Neeraj Upadhyay




On 10/2/2024 8:30 PM, Frederic Weisbecker wrote:
> Le Wed, Oct 02, 2024 at 04:57:38PM +0200, Frederic Weisbecker a écrit :
>> Callbacks enqueued after rcutree_report_cpu_dead() fall into RCU barrier
>> blind spot. Report any potential misuse.
>>
>> Reported-by: Paul E. McKenney 
>> Signed-off-by: Frederic Weisbecker 
>> ---
>>  kernel/rcu/tree.c | 3 +++
>>  1 file changed, 3 insertions(+)
>>
>> diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
>> index a60616e69b66..36070b6bf4a1 100644
>> --- a/kernel/rcu/tree.c
>> +++ b/kernel/rcu/tree.c
>> @@ -3084,8 +3084,11 @@ __call_rcu_common(struct rcu_head *head, 
>> rcu_callback_t func, bool lazy_in)
>>  head->func = func;
>>  head->next = NULL;
>>  kasan_record_aux_stack_noalloc(head);
>> +
>>  local_irq_save(flags);
>>  rdp = this_cpu_ptr(&rcu_data);
>> +RCU_LOCKDEP_WARN(rcu_rdp_cpu_online(rdp), "Callback enqueued on offline
>> CPU!");
> 
> This should be !rcu_rdp_cpu_online(rdp)
> 

With this patch series, 600 mins RCU torture overnight testing completed
without failures at my end.

- Neeraj

> Sigh...
> 
>> +
>>  lazy = lazy_in && !rcu_async_should_hurry();
>>  
>>  /* Add the callback to our list. */
>> -- 
>> 2.46.0
>>
>>

Re: [PATCH v1 04/10] iommufd/viommu: Allow drivers to control vdev_id lifecycle

2024-10-08 Thread Nicolin Chen

Sorry for the late reply. Just sat down and started to look at
this series.

On Thu, Sep 05, 2024 at 03:01:19PM -0300, Jason Gunthorpe wrote:
> On Tue, Aug 27, 2024 at 10:02:06AM -0700, Nicolin Chen wrote:
> > The iommufd core provides a lookup helper for an IOMMU driver to find a
> > device pointer by device's per-viommu virtual ID. Yet a driver may need
> > an inverted lookup to find a device's per-viommu virtual ID by a device
> > pointer, e.g. when reporting virtual IRQs/events back to the user space.
> > In this case, it'd be unsafe for iommufd core to do an inverted lookup,
> > as the driver can't track the lifecycle of a viommu object or a vdev_id
> > object.
> > 
> > Meanwhile, some HW can even support virtual device ID lookup by its HW-
> > accelerated virtualization feature. E.g. Tegra241 CMDQV HW supports to
> > execute vanilla guest-issued SMMU commands containing virtual Stream ID
> > but requires software to configure a link between virtual Stream ID and
> > physical Stream ID via HW registers. So not only the iommufd core needs
> > a vdev_id lookup table, drivers will want one too.
> > 
> > Given the two justifications above, it's the best practice to provide a
> > a pair of set_vdev_id/unset_vdev_id ops in the viommu ops, so a driver
> > can implement them to control a vdev_id's lifecycle, and configure the
> > HW properly if required.
> 
> I think the lifecycle rules should be much simpler.
> 
> If a nested domain is attached to a STE/RID/device then the vIOMMU
> affiliated with that nested domain is pinned while the STE is in place
> 
> So the driver only need to provide locking around attach changing the
> STE's vIOMMU vs async operations translating from a STE to a
> vIOMMU. This can be a simple driver lock of some kind, ie a rwlock
> across the STE table.

I was worried about the async between a vdev link (idev<=>vIOMMU)
and an regular attach link (idev<=>nested_domain).

Though practically the vdev link wouldn't break until the regular
attach link breaks first, it was still not safe from it happening.
So, having a master->lock to protect master->vdev could ensure.

Now, with the vdev being an object refcounting idev/vIOMMU objs,
I think we can simply pin the vdev in iommufd_hw_pagetable_attach
to ensure the vdev won't disappear. Then, a driver wouldn't need
to worry about that. [1]

Meanwhile, a nested_domain pins the vIOMMU object in the iommufd
level upon allocation. [2]

So, what's missing seems to be the attach between the master->dev
and the nested_domain itself [3]

  idev <--- vdev [1] -> vIOMMU
  (dev) ---[3]--> nested_domain [2]-^

A lock, protecting the attach(), which is in parallel with iommu
core's group->mutex could help I think?

> Generally that is how all the async events should work, go from the
> STE to the VIOMMU to a iommufd callback to the iommufd event
> queue. iommufd will translate the struct device from the driver to an
> idev_id (or maybe even a vdevid) the same basic way the PRI code works

My first attempt was putting the vdev into the attach_handle that
was created for PRI code. Yet later on I found the links were too
long and some of them weren't safe (perhaps with the new vdev and
those pins mentioned above, it worth another try). So letting the
driver hold the vdev itself became much simpler.

Thanks
Nicolin

Re: [PATCH V5 1/1] livepatch: Add stack_order sysfs attribute

2024-10-08 Thread Josh Poimboeuf

On Tue, Oct 08, 2024 at 09:48:56AM +0800, Wardenjohn wrote:
> Add "stack_order" sysfs attribute which holds the order in which a live
> patch module was loaded into the system. A user can then determine an
> active live patched version of a function.
> 
> cat /sys/kernel/livepatch/livepatch_1/stack_order -> 1
> 
> means that livepatch_1 is the first live patch applied
> 
> cat /sys/kernel/livepatch/livepatch_module/stack_order -> N
> 
> means that livepatch_module is the Nth live patch applied
> 
> Suggested-by: Petr Mladek 
> Suggested-by: Miroslav Benes 
> Suggested-by: Josh Poimboeuf 
> Signed-off-by: Wardenjohn 

Acked-by: Josh Poimboeuf 

-- 
Josh

Re: [PATCH net-next v8 03/24] ovpn: add basic netlink support

2024-10-08 Thread Jiri Pirko

Tue, Oct 08, 2024 at 11:16:01AM CEST, anto...@openvpn.net wrote:
>On 08/10/2024 10:58, Jiri Pirko wrote:
>> Tue, Oct 08, 2024 at 10:01:40AM CEST, anto...@openvpn.net wrote:
>> > Hi,
>> > 
>> > On 07/10/24 17:32, Jiri Pirko wrote:
>> > > Wed, Oct 02, 2024 at 11:02:17AM CEST, anto...@openvpn.net wrote:
>> > > 
>> > > [...]
>> > > 
>> > > 
>> > > > +operations:
>> > > > +  list:
>> > > > +-
>> > > > +  name: dev-new
>> > > > +  attribute-set: ovpn
>> > > > +  flags: [ admin-perm ]
>> > > > +  doc: Create a new interface of type ovpn
>> > > > +  do:
>> > > > +request:
>> > > > +  attributes:
>> > > > +- ifname
>> > > > +- mode
>> > > > +reply:
>> > > > +  attributes:
>> > > > +- ifname
>> > > > +- ifindex
>> > > > +-
>> > > > +  name: dev-del
>> > > 
>> > > Why you expose new and del here in ovn specific generic netlink iface?
>> > > Why can't you use the exising RTNL api which is used for creation and
>> > > destruction of other types of devices?
>> > 
>> > That was my original approach in v1, but it was argued that an ovpn 
>> > interface
>> > needs a userspace program to be configured and used in a meaningful way,
>> > therefore it was decided to concentrate all iface mgmt APIs along with the
>> > others in the netlink family and to not expose any RTNL ops.
>> 
>> Can you please point me to the message id?
>
> from
>Sergey and subsequent replies.
>RTNL vs NL topic starts right after the definition of 'ovpn_link_ops'

Yeah, does not make sense to me. All devices should implement common
rtnl ops, the extra-config, if needed, could be on a separate channel.
I don't find Sergey's argumentation valid.


>
>Recently Kuniyuki commented on this topic as well in:
><20240919055259.17622-1-kun...@amazon.com>
>and that is why I added a default dellink implemetation.

Having dellink without newlink implemented is just wrong.


>
>> 
>> 
>> > 
>> > However, recently we decided to add a dellink implementation for better
>> > integration with network namespaces and to allow the user to wipe a 
>> > dangling
>> > interface.
>> 
>> Hmm, one more argument to have symmetric add/del impletentation in RTNL
>> 
>> 
>> > 
>> > In the future we are planning to also add the possibility to create a
>> > "persistent interface", that is an interface created before launching any
>> > userspace program and that survives when the latter is stopped.
>> > I can guess this functionality may be better suited for RTNL, but I am not
>> > sure yet.
>> 
>> That would be quite confusing to have RTNL and genetlink iface to
>> add/del device. From what you described above, makes more sent to have
>> it just in RTNL
>
>All in all I tend to agree.
>
>> 
>> > 
>> > @Jiri: do you have any particular opinion why we should use RTNL ops and 
>> > not
>> > netlink for creating/destroying interfaces? I feel this is mostly a matter 
>> > of
>> > taste, but maybe there are technical reasons we should consider.
>> 
>> Well. technically, you can probabaly do both. But it is quite common
>> that you can add/delete these kind of devices over RTNL. Lots of
>> examples. People are used to it, aligns with existing flows.
>
>The only counterargument I see is the one brought by Sergey: "the ovpn
>interface is not usable after creation, if no openvpn process is running".
>
>However, allowing to create "persistent interfaces" will define a use-case
>for having an ovpn device without any userspace process.
>
>@Sergey what is your opinion here? I am not sure persistent interfaces were
>discussed at the time you brought your point about RTNL vs NL.
>
>
>Regards,
>
>
>> 
>> > 
>> > Thanks a lot for your contribution.
>> > 
>> > Regards,
>> > 
>> > 
>> > > 
>> > > 
>> > > ip link add [link DEV | parentdev NAME] [ name ] NAME
>> > >  [ txqueuelen PACKETS ]
>> > >  [ address LLADDR ]
>> > >  [ broadcast LLADDR ]
>> > >  [ mtu MTU ] [index IDX ]
>> > >  [ numtxqueues QUEUE_COUNT ]
>> > >  [ numrxqueues QUEUE_COUNT ]
>> > >  [ netns { PID | NETNSNAME | NETNSFILE } ]
>> > >  type TYPE [ ARGS ]
>> > > 
>> > > ip link delete { DEVICE | dev DEVICE | group DEVGROUP } type TYPE [ ARGS 
>> > > ]
>> > > 
>> > > Lots of examples of existing types creation is for example here:
>> > > https://developers.redhat.com/blog/2018/10/22/introduction-to-linux-interfaces-for-virtual-networking
>> > > 
>> > > 
>> > > 
>> > > > +  attribute-set: ovpn
>> > > > +  flags: [ admin-perm ]
>> > > > +  doc: Delete existing interface of type ovpn
>> > > > +  do:
>> > > > +pre: ovpn-nl-pre-doit
>> > > > +post: ovpn-nl-post-doit
>> > > > +request:
>> > > > +  attributes:
>> > > > +- ifindex
>> > > 
>> > > [...]
>> > 
>> > -- 
>> > Antonio Quartulli
>> > OpenVPN Inc.
>
>-- 
>Antonio Quartulli
>OpenVPN Inc.

Re: [PATCH v2 6/8] net: ieee802154: do not leave a dangling sk pointer in ieee802154_create()

2024-10-08 Thread Miquel Raynal

Hi Ignat,

ig...@cloudflare.com wrote on Mon,  7 Oct 2024 22:35:00 +0100:

> sock_init_data() attaches the allocated sk object to the provided sock
> object. If ieee802154_create() fails later, the allocated sk object is
> freed, but the dangling pointer remains in the provided sock object, which
> may allow use-after-free.
> 
> Clear the sk pointer in the sock object on error.
> 
> Signed-off-by: Ignat Korchagin 

Reviewed-by: Miquel Raynal 

Thanks,
Miquèl

[PATCH] selftests: net: rds: fix module not found

2024-10-08 Thread Alessandro Zanni

This fix solves this error, when calling kselftest with targets "net/rds":

selftests: net/rds: test.py
Traceback (most recent call last):
  File "tools/testing/selftests/net/rds/./test.py", line 17, in 
from lib.py import ip
ModuleNotFoundError: No module named 'lib'

Signed-off-by: Alessandro Zanni 
---
 tools/testing/selftests/net/rds/test.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/tools/testing/selftests/net/rds/test.py 
b/tools/testing/selftests/net/rds/test.py
index e6bb109bcead..112a8059c030 100755
--- a/tools/testing/selftests/net/rds/test.py
+++ b/tools/testing/selftests/net/rds/test.py
@@ -14,8 +14,9 @@ import sys
 import atexit
 from pwd import getpwuid
 from os import stat
-from lib.py import ip
 
+sys.path.append("..")
+from lib.py.utils import ip
 
 libc = ctypes.cdll.LoadLibrary('libc.so.6')
 setns = libc.setns
-- 
2.43.0

[PATCH] vdpa: solidrun: Fix UB bug with devres

2024-10-08 Thread Philipp Stanner

In psnet_open_pf_bar() and snet_open_vf_bar() a string later passed to
pcim_iomap_regions() is placed on the stack. Neither
pcim_iomap_regions() nor the functions it calls copy that string.

Should the string later ever be used, this, consequently, causes
undefined behavior since the stack frame will by then have disappeared.

Fix the bug by allocating the strings on the heap through
devm_kasprintf().

Cc: sta...@vger.kernel.org  # v6.3
Fixes: 51a8f9d7f587 ("virtio: vdpa: new SolidNET DPU driver.")
Reported-by: Christophe JAILLET 
Closes: 
https://lore.kernel.org/all/74e9109a-ac59-49e2-9b1d-d825c9c9f...@wanadoo.fr/
Suggested-by: Andy Shevchenko 
Signed-off-by: Philipp Stanner 
---
 drivers/vdpa/solidrun/snet_main.c | 14 ++
 1 file changed, 10 insertions(+), 4 deletions(-)

diff --git a/drivers/vdpa/solidrun/snet_main.c 
b/drivers/vdpa/solidrun/snet_main.c
index 99428a04068d..c8b74980dbd1 100644
--- a/drivers/vdpa/solidrun/snet_main.c
+++ b/drivers/vdpa/solidrun/snet_main.c
@@ -555,7 +555,7 @@ static const struct vdpa_config_ops snet_config_ops = {
 
 static int psnet_open_pf_bar(struct pci_dev *pdev, struct psnet *psnet)
 {
-   char name[50];
+   char *name;
int ret, i, mask = 0;
/* We don't know which BAR will be used to communicate..
 * We will map every bar with len > 0.
@@ -573,7 +573,10 @@ static int psnet_open_pf_bar(struct pci_dev *pdev, struct 
psnet *psnet)
return -ENODEV;
}
 
-   snprintf(name, sizeof(name), "psnet[%s]-bars", pci_name(pdev));
+   name = devm_kasprintf(&pdev->dev, GFP_KERNEL, "psnet[%s]-bars", 
pci_name(pdev));
+   if (!name)
+   return -ENOMEM;
+
ret = pcim_iomap_regions(pdev, mask, name);
if (ret) {
SNET_ERR(pdev, "Failed to request and map PCI BARs\n");
@@ -590,10 +593,13 @@ static int psnet_open_pf_bar(struct pci_dev *pdev, struct 
psnet *psnet)
 
 static int snet_open_vf_bar(struct pci_dev *pdev, struct snet *snet)
 {
-   char name[50];
+   char *name;
int ret;
 
-   snprintf(name, sizeof(name), "snet[%s]-bar", pci_name(pdev));
+   name = devm_kasprintf(&pdev->dev, GFP_KERNEL, "snet[%s]-bars", 
pci_name(pdev));
+   if (!name)
+   return -ENOMEM;
+
/* Request and map BAR */
ret = pcim_iomap_regions(pdev, BIT(snet->psnet->cfg.vf_bar), name);
if (ret) {
-- 
2.46.1

[RFC PATCH v3 4/4] sched+mm: Use hazard pointers to track lazy active mm existence

Replace lazy active mm existence tracking with hazard pointers. This
removes the following implementations and their associated config
options:

- MMU_LAZY_TLB_REFCOUNT
- MMU_LAZY_TLB_SHOOTDOWN
- This removes the call_rcu delayed mm drop for RT.

It leverages the fact that each CPU only ever have at most one single
lazy active mm. This makes it a very good fit for a hazard pointer
domain implemented with one hazard pointer slot per CPU.

* Benchmarks:

will-it-scale context_switch1_threads

nr threads (-t) speedup
 1-0.2%
 2+0.4%
 3+0.2%
 6+0.6%
12+0.8%
24+3%
48   +12%
96   +21%
   192   +28%
   384+4%
   768-0.6%

Methodology: Each test is the average of 20 iterations. Use median
result of 3 test runs.

Test hardware:

CPU(s):   384
  On-line CPU(s) list:0-383
Vendor ID:AuthenticAMD
  Model name: AMD EPYC 9654 96-Core Processor
CPU family:   25
Model:17
Thread(s) per core:   2
Core(s) per socket:   96
Socket(s):2
Stepping: 1
Frequency boost:  enabled
CPU(s) scaling MHz:   100%
CPU max MHz:  3709.
CPU min MHz:  400.
BogoMIPS: 4799.75

Memory: 768 GB ram.

Signed-off-by: Mathieu Desnoyers 
Cc: Nicholas Piggin 
Cc: Michael Ellerman 
Cc: Greg Kroah-Hartman 
Cc: Sebastian Andrzej Siewior 
Cc: "Paul E. McKenney" 
Cc: Will Deacon 
Cc: Peter Zijlstra 
Cc: Boqun Feng 
Cc: Alan Stern 
Cc: John Stultz 
Cc: Neeraj Upadhyay 
Cc: Linus Torvalds 
Cc: Andrew Morton 
Cc: Boqun Feng 
Cc: Frederic Weisbecker 
Cc: Joel Fernandes 
Cc: Josh Triplett 
Cc: Uladzislau Rezki 
Cc: Steven Rostedt 
Cc: Lai Jiangshan 
Cc: Zqiang 
Cc: Ingo Molnar 
Cc: Waiman Long 
Cc: Mark Rutland 
Cc: Thomas Gleixner 
Cc: Vlastimil Babka 
Cc: maged.mich...@gmail.com
Cc: Mateusz Guzik 
Cc: Jonas Oberhauser 
Cc: r...@vger.kernel.org
Cc: linux...@kvack.org
Cc: l...@lists.linux.dev
---
 Documentation/mm/active_mm.rst   |  9 ++--
 arch/Kconfig | 32 -
 arch/powerpc/Kconfig |  1 -
 arch/powerpc/mm/book3s64/radix_tlb.c | 23 +-
 include/linux/mm_types.h |  3 --
 include/linux/sched/mm.h | 68 ++--
 kernel/exit.c|  4 +-
 kernel/fork.c| 47 +--
 kernel/sched/sched.h |  8 +---
 lib/Kconfig.debug| 10 
 10 files changed, 45 insertions(+), 160 deletions(-)

diff --git a/Documentation/mm/active_mm.rst b/Documentation/mm/active_mm.rst
index d096fc091e23..c225cac49c30 100644
--- a/Documentation/mm/active_mm.rst
+++ b/Documentation/mm/active_mm.rst
@@ -2,11 +2,10 @@
 Active MM
 =
 
-Note, the mm_count refcount may no longer include the "lazy" users
-(running tasks with ->active_mm == mm && ->mm == NULL) on kernels
-with CONFIG_MMU_LAZY_TLB_REFCOUNT=n. Taking and releasing these lazy
-references must be done with mmgrab_lazy_tlb() and mmdrop_lazy_tlb()
-helpers, which abstract this config option.
+Note, the mm_count refcount no longer include the "lazy" users (running
+tasks with ->active_mm == mm && ->mm == NULL) Taking and releasing these
+lazy references must be done with mmgrab_lazy_tlb() and mmdrop_lazy_tlb()
+helpers, which are implemented with hazard pointers.
 
 ::
 
diff --git a/arch/Kconfig b/arch/Kconfig
index 975dd22a2dbd..d4261935f8dc 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -475,38 +475,6 @@ config ARCH_WANT_IRQS_OFF_ACTIVATE_MM
  irqs disabled over activate_mm. Architectures that do IPI based TLB
  shootdowns should enable this.
 
-# Use normal mm refcounting for MMU_LAZY_TLB kernel thread references.
-# MMU_LAZY_TLB_REFCOUNT=n can improve the scalability of context switching
-# to/from kernel threads when the same mm is running on a lot of CPUs (a large
-# multi-threaded application), by reducing contention on the mm refcount.
-#
-# This can be disabled if the architecture ensures no CPUs are using an mm as a
-# "lazy tlb" beyond its final refcount (i.e., by the time __mmdrop frees the mm
-# or its kernel page tables). This could be arranged by arch_exit_mmap(), or
-# final exit(2) TLB flush, for example.
-#
-# To implement this, an arch *must*:
-# Ensure the _lazy_tlb variants of mmgrab/mmdrop are used when manipulating
-# the lazy tlb reference of a kthread's ->active_mm (non-arch code has been
-# converted already).
-config MMU_LAZY_TLB_REFCOUNT
-   def_bool y
-   depends on !MMU_LAZY_TLB_SHOOTDOWN
-
-# This option allows MMU_LAZY_TLB_REFCOUNT=n. It ensures no CPUs are using an
-# mm as a lazy tlb beyond its last reference count, by shooting down these
-# users before the mm is deallocated. __mmdrop() first IPIs all CPUs that may
-

[RFC PATCH v3 1/4] compiler.h: Introduce ptr_eq() to preserve address dependency

Compiler CSE and SSA GVN optimizations can cause the address dependency
of addresses returned by rcu_dereference to be lost when comparing those
pointers with either constants or previously loaded pointers.

Introduce ptr_eq() to compare two addresses while preserving the address
dependencies for later use of the address. It should be used when
comparing an address returned by rcu_dereference().

This is needed to prevent the compiler CSE and SSA GVN optimizations
from using @a (or @b) in places where the source refers to @b (or @a)
based on the fact that after the comparison, the two are known to be
equal, which does not preserve address dependencies and allows the
following misordering speculations:

- If @b is a constant, the compiler can issue the loads which depend
  on @a before loading @a.
- If @b is a register populated by a prior load, weakly-ordered
  CPUs can speculate loads which depend on @a before loading @a.

The same logic applies with @a and @b swapped.

Suggested-by: Linus Torvalds 
Suggested-by: Boqun Feng 
Signed-off-by: Mathieu Desnoyers 
Reviewed-by: Boqun Feng 
Reviewed-by: Joel Fernandes (Google) 
Tested-by: Joel Fernandes (Google) 
Acked-by: "Paul E. McKenney" 
Acked-by: Alan Stern 
Cc: Greg Kroah-Hartman 
Cc: Sebastian Andrzej Siewior 
Cc: "Paul E. McKenney" 
Cc: Will Deacon 
Cc: Peter Zijlstra 
Cc: Boqun Feng 
Cc: Alan Stern 
Cc: John Stultz 
Cc: Neeraj Upadhyay 
Cc: Linus Torvalds 
Cc: Boqun Feng 
Cc: Frederic Weisbecker 
Cc: Joel Fernandes 
Cc: Josh Triplett 
Cc: Uladzislau Rezki 
Cc: Steven Rostedt 
Cc: Lai Jiangshan 
Cc: Zqiang 
Cc: Ingo Molnar 
Cc: Waiman Long 
Cc: Mark Rutland 
Cc: Thomas Gleixner 
Cc: Vlastimil Babka 
Cc: maged.mich...@gmail.com
Cc: Mateusz Guzik 
Cc: Gary Guo 
Cc: Jonas Oberhauser 
Cc: r...@vger.kernel.org
Cc: linux...@kvack.org
Cc: l...@lists.linux.dev
Cc: Nikita Popov 
Cc: l...@lists.linux.dev
---
Changes since v0:
- Include feedback from Alan Stern.
---
 include/linux/compiler.h | 63 
 1 file changed, 63 insertions(+)

diff --git a/include/linux/compiler.h b/include/linux/compiler.h
index 2df665fa2964..75a378ae7af1 100644
--- a/include/linux/compiler.h
+++ b/include/linux/compiler.h
@@ -186,6 +186,69 @@ void ftrace_likely_update(struct ftrace_likely_data *f, 
int val,
__asm__ ("" : "=r" (var) : "0" (var))
 #endif
 
+/*
+ * Compare two addresses while preserving the address dependencies for
+ * later use of the address. It should be used when comparing an address
+ * returned by rcu_dereference().
+ *
+ * This is needed to prevent the compiler CSE and SSA GVN optimizations
+ * from using @a (or @b) in places where the source refers to @b (or @a)
+ * based on the fact that after the comparison, the two are known to be
+ * equal, which does not preserve address dependencies and allows the
+ * following misordering speculations:
+ *
+ * - If @b is a constant, the compiler can issue the loads which depend
+ *   on @a before loading @a.
+ * - If @b is a register populated by a prior load, weakly-ordered
+ *   CPUs can speculate loads which depend on @a before loading @a.
+ *
+ * The same logic applies with @a and @b swapped.
+ *
+ * Return value: true if pointers are equal, false otherwise.
+ *
+ * The compiler barrier() is ineffective at fixing this issue. It does
+ * not prevent the compiler CSE from losing the address dependency:
+ *
+ * int fct_2_volatile_barriers(void)
+ * {
+ * int *a, *b;
+ *
+ * do {
+ * a = READ_ONCE(p);
+ * asm volatile ("" : : : "memory");
+ * b = READ_ONCE(p);
+ * } while (a != b);
+ * asm volatile ("" : : : "memory");  <-- barrier()
+ * return *b;
+ * }
+ *
+ * With gcc 14.2 (arm64):
+ *
+ * fct_2_volatile_barriers:
+ * adrpx0, .LANCHOR0
+ * add x0, x0, :lo12:.LANCHOR0
+ * .L2:
+ * ldr x1, [x0]  <-- x1 populated by first load.
+ * ldr x2, [x0]
+ * cmp x1, x2
+ * bne .L2
+ * ldr w0, [x1]  <-- x1 is used for access which should depend on 
b.
+ * ret
+ *
+ * On weakly-ordered architectures, this lets CPU speculation use the
+ * result from the first load to speculate "ldr w0, [x1]" before
+ * "ldr x2, [x0]".
+ * Based on the RCU documentation, the control dependency does not
+ * prevent the CPU from speculating loads.
+ */
+static __always_inline
+int ptr_eq(const volatile void *a, const volatile void *b)
+{
+   OPTIMIZER_HIDE_VAR(a);
+   OPTIMIZER_HIDE_VAR(b);
+   return a == b;
+}
+
 #define __UNIQUE_ID(prefix) __PASTE(__PASTE(__UNIQUE_ID_, prefix), __COUNTER__)
 
 /**
-- 
2.39.2

[RFC PATCH v3 2/4] Documentation: RCU: Refer to ptr_eq()

Refer to ptr_eq() in the rcu_dereference() documentation.

ptr_eq() is a mechanism that preserves address dependencies when
comparing pointers, and should be favored when comparing a pointer
obtained from rcu_dereference() against another pointer.

Signed-off-by: Mathieu Desnoyers 
Acked-by: Alan Stern 
Acked-by: Paul E. McKenney 
Reviewed-by: Joel Fernandes (Google) 
Cc: Greg Kroah-Hartman 
Cc: Sebastian Andrzej Siewior 
Cc: "Paul E. McKenney" 
Cc: Will Deacon 
Cc: Peter Zijlstra 
Cc: Boqun Feng 
Cc: Alan Stern 
Cc: John Stultz 
Cc: Neeraj Upadhyay 
Cc: Linus Torvalds 
Cc: Boqun Feng 
Cc: Frederic Weisbecker 
Cc: Joel Fernandes 
Cc: Josh Triplett 
Cc: Uladzislau Rezki 
Cc: Steven Rostedt 
Cc: Lai Jiangshan 
Cc: Zqiang 
Cc: Ingo Molnar 
Cc: Waiman Long 
Cc: Mark Rutland 
Cc: Thomas Gleixner 
Cc: Vlastimil Babka 
Cc: maged.mich...@gmail.com
Cc: Mateusz Guzik 
Cc: Gary Guo 
Cc: Jonas Oberhauser 
Cc: r...@vger.kernel.org
Cc: linux...@kvack.org
Cc: l...@lists.linux.dev
Cc: Nikita Popov 
Cc: l...@lists.linux.dev
---
Changes since v0:
- Include feedback from Alan Stern.

Changes since v1:
- Include feedback from Paul E. McKenney.
---
 Documentation/RCU/rcu_dereference.rst | 38 +++
 1 file changed, 33 insertions(+), 5 deletions(-)

diff --git a/Documentation/RCU/rcu_dereference.rst 
b/Documentation/RCU/rcu_dereference.rst
index 2524dcdadde2..de6175bf430f 100644
--- a/Documentation/RCU/rcu_dereference.rst
+++ b/Documentation/RCU/rcu_dereference.rst
@@ -104,11 +104,12 @@ readers working properly:
after such branches, but can speculate loads, which can again
result in misordering bugs.
 
--  Be very careful about comparing pointers obtained from
-   rcu_dereference() against non-NULL values.  As Linus Torvalds
-   explained, if the two pointers are equal, the compiler could
-   substitute the pointer you are comparing against for the pointer
-   obtained from rcu_dereference().  For example::
+-  Use operations that preserve address dependencies (such as
+   "ptr_eq()") to compare pointers obtained from rcu_dereference()
+   against non-NULL pointers. As Linus Torvalds explained, if the
+   two pointers are equal, the compiler could substitute the
+   pointer you are comparing against for the pointer obtained from
+   rcu_dereference().  For example::
 
p = rcu_dereference(gp);
if (p == &default_struct)
@@ -125,6 +126,29 @@ readers working properly:
On ARM and Power hardware, the load from "default_struct.a"
can now be speculated, such that it might happen before the
rcu_dereference().  This could result in bugs due to misordering.
+   Performing the comparison with "ptr_eq()" ensures the compiler
+   does not perform such transformation.
+
+   If the comparison is against another pointer, the compiler is
+   allowed to use either pointer for the following accesses, which
+   loses the address dependency and allows weakly-ordered
+   architectures such as ARM and PowerPC to speculate the
+   address-dependent load before rcu_dereference().  For example::
+
+   p1 = READ_ONCE(gp);
+   p2 = rcu_dereference(gp);
+   if (p1 == p2)  /* BUGGY!!! */
+   do_default(p2->a);
+
+   The compiler can use p1->a rather than p2->a, destroying the
+   address dependency.  Performing the comparison with "ptr_eq()"
+   ensures the compiler preserves the address dependencies.
+   Corrected code::
+
+   p1 = READ_ONCE(gp);
+   p2 = rcu_dereference(gp);
+   if (ptr_eq(p1, p2))
+   do_default(p2->a);
 
However, comparisons are OK in the following cases:
 
@@ -204,6 +228,10 @@ readers working properly:
comparison will provide exactly the information that the
compiler needs to deduce the value of the pointer.
 
+   When in doubt, use operations that preserve address dependencies
+   (such as "ptr_eq()") to compare pointers obtained from
+   rcu_dereference() against non-NULL pointers.
+
 -  Disable any value-speculation optimizations that your compiler
might provide, especially if you are making use of feedback-based
optimizations that take data collected from prior runs.  Such
-- 
2.39.2

[RFC PATCH v3 0/4] sched+mm: Track lazy active mm existence with hazard pointers

[
  I'm posting a v3 taking care of feedback from Peter Zijlstra and Paul
  E. McKenney in case it can be useful to try hazard pointers with other
  use-cases, or for further benchmarking of active mm tracking impact.
]

Hazard pointers appear to be a good fit for replacing refcount based lazy
active mm tracking.

Highlight:

will-it-scale context_switch1_threads

nr threads (-t) speedup
 1-0.2%
 2+0.4%
 3+0.2%
 6+0.6%
12+0.8%
24+3%
48   +12%
96   +21%
   192   +28%
   384+4%
   768-0.6%

This series applies on top of v6.11.1.

Signed-off-by: Mathieu Desnoyers 
Cc: Linus Torvalds 
Cc: Andrew Morton 
Cc: Peter Zijlstra 
Cc: Nicholas Piggin 
Cc: Michael Ellerman 
Cc: Greg Kroah-Hartman 
Cc: Sebastian Andrzej Siewior 
Cc: "Paul E. McKenney" 
Cc: Will Deacon 
Cc: Alan Stern 
Cc: John Stultz 
Cc: Neeraj Upadhyay 
Cc: Boqun Feng 
Cc: Frederic Weisbecker 
Cc: Joel Fernandes 
Cc: Josh Triplett 
Cc: Uladzislau Rezki 
Cc: Steven Rostedt 
Cc: Lai Jiangshan 
Cc: Zqiang 
Cc: Ingo Molnar 
Cc: Waiman Long 
Cc: Mark Rutland 
Cc: Thomas Gleixner 
Cc: Vlastimil Babka 
Cc: maged.mich...@gmail.com
Cc: Mateusz Guzik 
Cc: Jonas Oberhauser 
Cc: r...@vger.kernel.org
Cc: linux...@kvack.org
Cc: l...@lists.linux.dev

Mathieu Desnoyers (4):
  compiler.h: Introduce ptr_eq() to preserve address dependency
  Documentation: RCU: Refer to ptr_eq()
  hazptr: Implement Hazard Pointers
  sched+mm: Use hazard pointers to track lazy active mm existence

 Documentation/RCU/rcu_dereference.rst |  38 +-
 Documentation/mm/active_mm.rst|   9 +-
 arch/Kconfig  |  32 -
 arch/powerpc/Kconfig  |   1 -
 arch/powerpc/mm/book3s64/radix_tlb.c  |  23 +---
 include/linux/compiler.h  |  63 ++
 include/linux/hazptr.h| 165 ++
 include/linux/mm_types.h  |   3 -
 include/linux/sched/mm.h  |  68 ---
 kernel/Makefile   |   2 +-
 kernel/exit.c |   4 +-
 kernel/fork.c |  47 ++--
 kernel/hazptr.c   |  51 
 kernel/sched/sched.h  |   8 +-
 lib/Kconfig.debug |  10 --
 15 files changed, 358 insertions(+), 166 deletions(-)
 create mode 100644 include/linux/hazptr.h
 create mode 100644 kernel/hazptr.c

-- 
2.39.2

[RFC PATCH v3 3/4] hazptr: Implement Hazard Pointers

This API provides existence guarantees of objects through Hazard
Pointers (hazptr). This minimalist implementation is specific to use
with preemption disabled, but can be extended further as needed.

Each hazptr domain defines a fixed number of hazard pointer slots
(nr_cpus) across the entire system.

Its main benefit over RCU is that it allows fast reclaim of
HP-protected pointers without needing to wait for a grace period.

It also allows the hazard pointer scan to call a user-defined callback
to retire a hazard pointer slot immediately if needed. This callback
may, for instance, issue an IPI to the relevant CPU.

There are a few possible use-cases for this in the Linux kernel:

  - Improve performance of mm_count by replacing lazy active mm by
hazptr.
  - Guarantee object existence on pointer dereference to use refcount:
- replace locking used for that purpose in some drivers,
- replace RCU + inc_not_zero pattern,
  - rtmutex: Improve situations where locks need to be taken in
reverse dependency chain order by guaranteeing existence of
first and second locks in traversal order, allowing them to be
locked in the correct order (which is reverse from traversal
order) rather than try-lock+retry on nested lock.

References:

[1]: M. M. Michael, "Hazard pointers: safe memory reclamation for
 lock-free objects," in IEEE Transactions on Parallel and
 Distributed Systems, vol. 15, no. 6, pp. 491-504, June 2004

Link: 
https://lore.kernel.org/lkml/j3scdl5iymjlxavomgc6u5ndg3svhab6ga23dr36o4f5mt333w@7xslvq6b6hmv/
Link: https://lpc.events/event/18/contributions/1731/
Signed-off-by: Mathieu Desnoyers 
Cc: Nicholas Piggin 
Cc: Michael Ellerman 
Cc: Greg Kroah-Hartman 
Cc: Sebastian Andrzej Siewior 
Cc: "Paul E. McKenney" 
Cc: Will Deacon 
Cc: Peter Zijlstra 
Cc: Boqun Feng 
Cc: Alan Stern 
Cc: John Stultz 
Cc: Neeraj Upadhyay 
Cc: Linus Torvalds 
Cc: Andrew Morton 
Cc: Boqun Feng 
Cc: Frederic Weisbecker 
Cc: Joel Fernandes 
Cc: Josh Triplett 
Cc: Uladzislau Rezki 
Cc: Steven Rostedt 
Cc: Lai Jiangshan 
Cc: Zqiang 
Cc: Ingo Molnar 
Cc: Waiman Long 
Cc: Mark Rutland 
Cc: Thomas Gleixner 
Cc: Vlastimil Babka 
Cc: maged.mich...@gmail.com
Cc: Mateusz Guzik 
Cc: Jonas Oberhauser 
Cc: r...@vger.kernel.org
Cc: linux...@kvack.org
Cc: l...@lists.linux.dev
---
Changes since v0:
- Remove slot variable from hp_dereference_allocate().

Changes since v2:
- Address Peter Zijlstra's comments.
- Address Paul E. McKenney's comments.
---
 include/linux/hazptr.h | 165 +
 kernel/Makefile|   2 +-
 kernel/hazptr.c|  51 +
 3 files changed, 217 insertions(+), 1 deletion(-)
 create mode 100644 include/linux/hazptr.h
 create mode 100644 kernel/hazptr.c

diff --git a/include/linux/hazptr.h b/include/linux/hazptr.h
new file mode 100644
index ..f8e36d2bdc58
--- /dev/null
+++ b/include/linux/hazptr.h
@@ -0,0 +1,165 @@
+// SPDX-FileCopyrightText: 2024 Mathieu Desnoyers 

+//
+// SPDX-License-Identifier: LGPL-2.1-or-later
+
+#ifndef _LINUX_HAZPTR_H
+#define _LINUX_HAZPTR_H
+
+/*
+ * HP: Hazard Pointers
+ *
+ * This API provides existence guarantees of objects through hazard
+ * pointers.
+ *
+ * It uses a fixed number of hazard pointer slots (nr_cpus) across the
+ * entire system for each hazard pointer domain.
+ *
+ * Its main benefit over RCU is that it allows fast reclaim of
+ * HP-protected pointers without needing to wait for a grace period.
+ *
+ * It also allows the hazard pointer scan to call a user-defined callback
+ * to retire a hazard pointer slot immediately if needed. This callback
+ * may, for instance, issue an IPI to the relevant CPU.
+ *
+ * References:
+ *
+ * [1]: M. M. Michael, "Hazard pointers: safe memory reclamation for
+ *  lock-free objects," in IEEE Transactions on Parallel and
+ *  Distributed Systems, vol. 15, no. 6, pp. 491-504, June 2004
+ */
+
+#include 
+#include 
+
+/*
+ * Hazard pointer slot.
+ */
+struct hazptr_slot {
+   void *addr;
+};
+
+struct hazptr_domain {
+   struct hazptr_slot __percpu *percpu_slots;
+};
+
+#define DECLARE_HAZPTR_DOMAIN(domain)  \
+   extern struct hazptr_domain domain
+
+#define DEFINE_HAZPTR_DOMAIN(domain)   \
+   static DEFINE_PER_CPU(struct hazptr_slot, __ ## domain ## _slots); \
+   struct hazptr_domain domain = { \
+   .percpu_slots = &__## domain ## _slots, \
+   }
+
+/*
+ * hazptr_scan: Scan hazard pointer domain for @addr.
+ *
+ * Scan hazard pointer domain for @addr.
+ * If @on_match_cb is NULL, wait to observe that each slot contains a value
+ * that differs from @addr.
+ * If @on_match_cb is non-NULL, invoke @on_match_cb for each slot containing
+ * @addr.
+ */
+void hazptr_scan(struct hazptr_domain *domain, void *addr,
+void (*on_match_cb)(int cpu, struct hazptr_slot *slot, void 
*addr));
+
+/*
+ *

Re: [PATCH 1/1] s390/virtio_ccw: fix dma_parm pointer not set up

2024-10-08 Thread Marc Hartmayer

On Mon, Oct 07, 2024 at 10:10 PM +0200, Halil Pasic  wrote:
> At least since commit 334304ac2bac ("dma-mapping: don't return errors
> from dma_set_max_seg_size") setting up device.dma_parms is basically
> mandated by the DMA API. As of now Channel (CCW) I/O in general does not
> utilize the DMA API, except for virtio. For virtio-ccw however the
> common virtio DMA infrastructure is such that most of the DMA stuff
> hinges on the virtio parent device, which is a CCW device.
>
> So lets set up the dma_parms pointer for the CCW parent device and hope
> for the best!
>
> Signed-off-by: Halil Pasic 
> Fixes: 334304ac2bac ("dma-mapping: don't return errors from 
> dma_set_max_seg_size")
> Reported-by: "Marc Hartmayer" 
> Closes: https://bugzilla.linux.ibm.com/show_bug.cgi?id=209131

I guess, this line can be removed as it’s internal only.

> Reviewed-by: Eric Farman 
> ---
>
> In the long run it may make sense to move dma_parms into struct
> ccw_device, since layering-wise it is much cleaner. I decided
> to put it in virtio_ccw_device because currently it is only used for
> virtio.
>
> ---

[…snip…]

Thanks for fixing this!

Tested-by: Marc Hartmayer 

-- 
Kind regards / Beste Grüße
   Marc Hartmayer

IBM Deutschland Research & Development GmbH
Vorsitzender des Aufsichtsrats: Wolfgang Wendt
Geschäftsführung: David Faller
Sitz der Gesellschaft: Böblingen
Registergericht: Amtsgericht Stuttgart, HRB 243294

[PATCH net-next v02 0/2] net: af_packet: allow joining a fanout when link is down

2024-10-08 Thread Gur Stavi

PACKET socket can retain its fanout membership through link down and up
and leave a fanout while closed regardless of link state.
However, socket was forbidden from joining a fanout while it was not
RUNNING.

This patch allows PACKET socket to join a fanout while not RUNNING.
Selftest psock_fanout is extended to test this scenario.
This is the only test that was performed.

This scenario was identified while studying DPDK pmd_af_packet_drv.
Since sockets are only created during initialization, there is no reason
to fail the initialization if a single link is temporarily down.

I hope it is not considered as breaking user space and that applications
are not designed to expect this failure.


Changes:

V02:
* psock_fanout: use explicit loopback up/down instead of toggle.
* psock_fanout: don't try to restore loopback state on failure.
* Rephrase commit message about "leaving a fanout".

V01: https://lore.kernel.org/netdev/cover.1728303615.git.gur.st...@huawei.com/

Gur Stavi (2):
  af_packet: allow fanout_add when socket is not RUNNING
  selftests: net/psock_fanout: socket joins fanout when link is down

 net/packet/af_packet.c | 10 +++---
 tools/testing/selftests/net/psock_fanout.c | 42 --
 2 files changed, 44 insertions(+), 8 deletions(-)


base-commit: f95b4725e796b12e5f347a0d161e1d3843142aa8
-- 
2.45.2

[PATCH net-next v02 2/2] selftests: net/psock_fanout: socket joins fanout when link is down

2024-10-08 Thread Gur Stavi

Modify test_control_group to have toggle parameter.
When toggle is non-zero, loopback device will be set down for the
initialization of fd[1] which is still expected to successfully join
the fanout.

Signed-off-by: Gur Stavi 
---
 tools/testing/selftests/net/psock_fanout.c | 42 --
 1 file changed, 39 insertions(+), 3 deletions(-)

diff --git a/tools/testing/selftests/net/psock_fanout.c 
b/tools/testing/selftests/net/psock_fanout.c
index 4f31e92ebd96..acdfae8f8a9a 100644
--- a/tools/testing/selftests/net/psock_fanout.c
+++ b/tools/testing/selftests/net/psock_fanout.c
@@ -48,6 +48,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
@@ -59,6 +60,33 @@
 
 static uint32_t cfg_max_num_members;
 
+static void loopback_set_up_down(int state_up)
+{
+   struct ifreq ifreq = {};
+   int fd, err;
+
+   fd = socket(AF_PACKET, SOCK_RAW, 0);
+   if (fd < 0) {
+   perror("socket loopback");
+   exit(1);
+   }
+   strcpy(ifreq.ifr_name, "lo");
+   err = ioctl(fd, SIOCGIFFLAGS, &ifreq);
+   if (err) {
+   perror("SIOCGIFFLAGS");
+   exit(1);
+   }
+   if (state_up != !!(ifreq.ifr_flags & IFF_UP)) {
+   ifreq.ifr_flags ^= IFF_UP;
+   err = ioctl(fd, SIOCSIFFLAGS, &ifreq);
+   if (err) {
+   perror("SIOCSIFFLAGS");
+   exit(1);
+   }
+   }
+   close(fd);
+}
+
 /* Open a socket in a given fanout mode.
  * @return -1 if mode is bad, a valid socket otherwise */
 static int sock_fanout_open(uint16_t typeflags, uint16_t group_id)
@@ -264,17 +292,22 @@ static void test_control_single(void)
 }
 
 /* Test illegal group with different modes or flags */
-static void test_control_group(void)
+static void test_control_group(int toggle)
 {
int fds[2];
 
-   fprintf(stderr, "test: control multiple sockets\n");
+   if (toggle)
+   fprintf(stderr, "test: control multiple sockets with link down 
toggle\n");
+   else
+   fprintf(stderr, "test: control multiple sockets\n");
 
fds[0] = sock_fanout_open(PACKET_FANOUT_HASH, 0);
if (fds[0] == -1) {
fprintf(stderr, "ERROR: failed to open HASH socket\n");
exit(1);
}
+   if (toggle)
+   loopback_set_up_down(0);
if (sock_fanout_open(PACKET_FANOUT_HASH |
   PACKET_FANOUT_FLAG_DEFRAG, 0) != -1) {
fprintf(stderr, "ERROR: joined group with wrong flag defrag\n");
@@ -294,6 +327,8 @@ static void test_control_group(void)
fprintf(stderr, "ERROR: failed to join group\n");
exit(1);
}
+   if (toggle)
+   loopback_set_up_down(1);
if (close(fds[1]) || close(fds[0])) {
fprintf(stderr, "ERROR: closing sockets\n");
exit(1);
@@ -489,7 +524,8 @@ int main(int argc, char **argv)
int port_off = 2, tries = 20, ret;
 
test_control_single();
-   test_control_group();
+   test_control_group(0);
+   test_control_group(1);
test_control_group_max_num_members();
test_unique_fanout_group_ids();
 
-- 
2.45.2

[PATCH net-next v02 1/2] af_packet: allow fanout_add when socket is not RUNNING

2024-10-08 Thread Gur Stavi

PACKET socket can retain its fanout membership through link down and up
and leave a fanout while closed regardless of link state.
However, socket was forbidden from joining a fanout while it was not
RUNNING.

This patch allows PACKET socket to join fanout while not RUNNING.

Signed-off-by: Gur Stavi 
---
 net/packet/af_packet.c | 10 +-
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index f8942062f776..fb2cca73d953 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -1846,21 +1846,21 @@ static int fanout_add(struct sock *sk, struct 
fanout_args *args)
err = -EINVAL;
 
spin_lock(&po->bind_lock);
-   if (packet_sock_flag(po, PACKET_SOCK_RUNNING) &&
-   match->type == type &&
+   if (match->type == type &&
match->prot_hook.type == po->prot_hook.type &&
match->prot_hook.dev == po->prot_hook.dev) {
err = -ENOSPC;
if (refcount_read(&match->sk_ref) < match->max_num_members) {
-   __dev_remove_pack(&po->prot_hook);
-
/* Paired with packet_setsockopt(PACKET_FANOUT_DATA) */
WRITE_ONCE(po->fanout, match);
 
po->rollover = rollover;
rollover = NULL;
refcount_set(&match->sk_ref, 
refcount_read(&match->sk_ref) + 1);
-   __fanout_link(sk, po);
+   if (packet_sock_flag(po, PACKET_SOCK_RUNNING)) {
+   __dev_remove_pack(&po->prot_hook);
+   __fanout_link(sk, po);
+   }
err = 0;
}
}
-- 
2.45.2

Re: [PATCH v10 7/7] remoteproc: stm32: Add support of an OP-TEE TA to load the firmware

2024-10-08 Thread kernel test robot

Hi Arnaud,

kernel test robot noticed the following build warnings:

[auto build test WARNING on 9852d85ec9d492ebef56dc5f229416c925758edc]

url:
https://github.com/intel-lab-lkp/linux/commits/Arnaud-Pouliquen/remoteproc-core-Introduce-rproc_pa_to_va-helper/20241007-212358
base:   9852d85ec9d492ebef56dc5f229416c925758edc
patch link:
https://lore.kernel.org/r/20241007131620.2090104-8-arnaud.pouliquen%40foss.st.com
patch subject: [PATCH v10 7/7] remoteproc: stm32: Add support of an OP-TEE TA 
to load the firmware
config: alpha-kismet-CONFIG_REMOTEPROC_TEE-CONFIG_STM32_RPROC-0-0 
(https://download.01.org/0day-ci/archive/20241008/202410081902.twqcmwjk-...@intel.com/config)
reproduce: 
(https://download.01.org/0day-ci/archive/20241008/202410081902.twqcmwjk-...@intel.com/reproduce)

If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot 
| Closes: 
https://lore.kernel.org/oe-kbuild-all/202410081902.twqcmwjk-...@intel.com/

kismet warnings: (new ones prefixed by >>)
>> kismet: WARNING: unmet direct dependencies detected for REMOTEPROC_TEE when 
>> selected by STM32_RPROC
   WARNING: unmet direct dependencies detected for REMOTEPROC_TEE
 Depends on [n]: REMOTEPROC [=y] && OPTEE [=n]
 Selected by [y]:
 - STM32_RPROC [=y] && (ARCH_STM32 || COMPILE_TEST [=y]) && REMOTEPROC [=y]

-- 
0-DAY CI Kernel Test Service
https://github.com/intel/lkp-tests/wiki

[PATCH net-next v20 11/14] mm: page_frag: add testing for the newly added prepare API

Add testing for the newly added prepare API, for both aligned
and non-aligned API, also probe API is also tested along with
prepare API.

CC: Alexander Duyck 
Signed-off-by: Yunsheng Lin 
---
 .../selftests/mm/page_frag/page_frag_test.c   | 66 +--
 tools/testing/selftests/mm/run_vmtests.sh |  4 ++
 tools/testing/selftests/mm/test_page_frag.sh  | 31 +
 3 files changed, 96 insertions(+), 5 deletions(-)

diff --git a/tools/testing/selftests/mm/page_frag/page_frag_test.c 
b/tools/testing/selftests/mm/page_frag/page_frag_test.c
index 36543a129e40..567bcc6a181e 100644
--- a/tools/testing/selftests/mm/page_frag/page_frag_test.c
+++ b/tools/testing/selftests/mm/page_frag/page_frag_test.c
@@ -29,6 +29,10 @@ static bool test_align;
 module_param(test_align, bool, 0);
 MODULE_PARM_DESC(test_align, "use align API for testing");
 
+static bool test_prepare;
+module_param(test_prepare, bool, 0);
+MODULE_PARM_DESC(test_prepare, "use prepare API for testing");
+
 static int test_alloc_len = 2048;
 module_param(test_alloc_len, int, 0);
 MODULE_PARM_DESC(test_alloc_len, "alloc len for testing");
@@ -68,6 +72,18 @@ static int page_frag_pop_thread(void *arg)
return 0;
 }
 
+static void frag_frag_test_commit(struct page_frag_cache *nc,
+ struct page_frag *prepare_pfrag,
+ struct page_frag *probe_pfrag,
+ unsigned int used_sz)
+{
+   WARN_ON_ONCE(prepare_pfrag->page != probe_pfrag->page ||
+prepare_pfrag->offset != probe_pfrag->offset ||
+prepare_pfrag->size != probe_pfrag->size);
+
+   page_frag_commit(nc, prepare_pfrag, used_sz);
+}
+
 static int page_frag_push_thread(void *arg)
 {
struct ptr_ring *ring = arg;
@@ -80,13 +96,52 @@ static int page_frag_push_thread(void *arg)
int ret;
 
if (test_align) {
-   va = page_frag_alloc_align(&test_nc, test_alloc_len,
-  GFP_KERNEL, SMP_CACHE_BYTES);
+   if (test_prepare) {
+   struct page_frag prepare_frag, probe_frag;
+   void *probe_va;
+
+   va = 
page_frag_alloc_refill_prepare_align(&test_nc,
+ 
test_alloc_len,
+ 
&prepare_frag,
+ 
GFP_KERNEL,
+ 
SMP_CACHE_BYTES);
+
+   probe_va = 
__page_frag_alloc_refill_probe_align(&test_nc,
+   
test_alloc_len,
+   
&probe_frag,
+   
-SMP_CACHE_BYTES);
+   WARN_ON_ONCE(va != probe_va);
+
+   if (likely(va))
+   frag_frag_test_commit(&test_nc, 
&prepare_frag,
+ &probe_frag, 
test_alloc_len);
+   } else {
+   va = page_frag_alloc_align(&test_nc,
+  test_alloc_len,
+  GFP_KERNEL,
+  SMP_CACHE_BYTES);
+   }
 
WARN_ONCE((unsigned long)va & (SMP_CACHE_BYTES - 1),
  "unaligned va returned\n");
} else {
-   va = page_frag_alloc(&test_nc, test_alloc_len, 
GFP_KERNEL);
+   if (test_prepare) {
+   struct page_frag prepare_frag, probe_frag;
+   void *probe_va;
+
+   va = page_frag_alloc_refill_prepare(&test_nc, 
test_alloc_len,
+   
&prepare_frag, GFP_KERNEL);
+
+   probe_va = 
page_frag_alloc_refill_probe(&test_nc, test_alloc_len,
+   
&probe_frag);
+
+   WARN_ON_ONCE(va != probe_va);
+   if (likely(va))
+   frag_frag_test_commit(&test_nc, 
&prepare_frag,
+ &probe_frag, 
test_alloc_len);
+   } else {
+   va = page_frag_alloc(&test_nc, test_alloc_len, 
GFP_KERNEL);
+   }
}

[PATCH bpf] selftests/bpf: add missing header include for htons

2024-10-08 Thread eBPF Foundation

Including the network_helpers.h header in tests can lead to the following
build error:

./network_helpers.h: In function ‘csum_tcpudp_magic’:
./network_helpers.h:116:14: error: implicit declaration of function \
  ‘htons’ [-Werror=implicit-function-declaration]
  116 | s += htons(proto + len);

The error is avoided in many cases thanks to some other headers included
earlier and bringing in arpa/inet.h (ie: test_progs.h).

Make sure that test_progs build success does not depend on header ordering
by adding the missing header include in network_helpers.h

Fixes: f6642de0c3e9 ("selftests/bpf: Add csum helpers")
Signed-off-by: Alexis Lothoré (eBPF Foundation) 
---
 tools/testing/selftests/bpf/network_helpers.h | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tools/testing/selftests/bpf/network_helpers.h 
b/tools/testing/selftests/bpf/network_helpers.h
index 
c72c16e1aff825439896b38e59962ffafe92dc71..5764155b6d25188ed38e828e1e4a8a08f8a83934
 100644
--- a/tools/testing/selftests/bpf/network_helpers.h
+++ b/tools/testing/selftests/bpf/network_helpers.h
@@ -1,6 +1,7 @@
 /* SPDX-License-Identifier: GPL-2.0 */
 #ifndef __NETWORK_HELPERS_H
 #define __NETWORK_HELPERS_H
+#include 
 #include 
 #include 
 #include 

---
base-commit: 67a7c7b656cfc10a7280f71641fb9e88726e8a5d
change-id: 20241008-network_helpers_fix-bbb7d1589930

Best regards,
-- 
Alexis Lothoré, Bootlin
Embedded Linux and Kernel engineering
https://bootlin.com

[PATCH] Fix typo in vringh_test.c

2024-10-08 Thread Shivam Chaudhary

Corrected minor typo in tools/virtio/vringh_test.c:
- Fixed "retreives" to "retrieves"

Signed-off-by: Shivam Chaudhary 
---
 tools/virtio/vringh_test.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/virtio/vringh_test.c b/tools/virtio/vringh_test.c
index 43d3a6aa1dcf..b9591223437a 100644
--- a/tools/virtio/vringh_test.c
+++ b/tools/virtio/vringh_test.c
@@ -519,7 +519,7 @@ int main(int argc, char *argv[])
errx(1, "virtqueue_add_sgs: %i", err);
__kmalloc_fake = NULL;
 
-   /* Host retreives it. */
+   /* Host retrieves it. */
vringh_iov_init(&riov, host_riov, ARRAY_SIZE(host_riov));
vringh_iov_init(&wiov, host_wiov, ARRAY_SIZE(host_wiov));
 
-- 
2.34.1

[PATCH] selftest: hid: add the missing tests directory

2024-10-08 Thread Yun Lu

Commit 160c826b4dd0 ("selftest: hid: add missing run-hid-tools-tests.sh")
has added the run-hid-tools-tests.sh script for it to be installed, but
I forgot to add the tests directory together.

In fact, the run-hid-tools-tests.sh script uses the scripts in the tests
directory to run tests. The tests directory also needs to be added to be
installed.

Fixes: ffb85d5c9e80 ("selftests: hid: import hid-tools hid-core tests")
Cc: sta...@vger.kernel.org
Signed-off-by: Yun Lu 
---
 tools/testing/selftests/hid/Makefile | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tools/testing/selftests/hid/Makefile 
b/tools/testing/selftests/hid/Makefile
index 38ae31bb07b5..662209f5fabc 100644
--- a/tools/testing/selftests/hid/Makefile
+++ b/tools/testing/selftests/hid/Makefile
@@ -18,6 +18,7 @@ TEST_PROGS += hid-usb_crash.sh
 TEST_PROGS += hid-wacom.sh
 
 TEST_FILES := run-hid-tools-tests.sh
+TEST_FILES += tests
 
 CXX ?= $(CROSS_COMPILE)g++
 
-- 
2.27.0

Re: [PATCH v10 7/7] remoteproc: stm32: Add support of an OP-TEE TA to load the firmware

2024-10-08 Thread kernel test robot

Hi Arnaud,

kernel test robot noticed the following build errors:

[auto build test ERROR on 9852d85ec9d492ebef56dc5f229416c925758edc]

url:
https://github.com/intel-lab-lkp/linux/commits/Arnaud-Pouliquen/remoteproc-core-Introduce-rproc_pa_to_va-helper/20241007-212358
base:   9852d85ec9d492ebef56dc5f229416c925758edc
patch link:
https://lore.kernel.org/r/20241007131620.2090104-8-arnaud.pouliquen%40foss.st.com
patch subject: [PATCH v10 7/7] remoteproc: stm32: Add support of an OP-TEE TA 
to load the firmware
config: parisc-randconfig-001-20241008 
(https://download.01.org/0day-ci/archive/20241008/202410081704.zo2k0szq-...@intel.com/config)
compiler: hppa-linux-gcc (GCC) 14.1.0
reproduce (this is a W=1 build): 
(https://download.01.org/0day-ci/archive/20241008/202410081704.zo2k0szq-...@intel.com/reproduce)

If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot 
| Closes: 
https://lore.kernel.org/oe-kbuild-all/202410081704.zo2k0szq-...@intel.com/

All errors (new ones prefixed by >>):

   hppa-linux-ld: drivers/remoteproc/remoteproc_tee.o: in function 
`tee_rproc_load_fw':
>> (.text+0xa8): undefined reference to `tee_shm_register_kernel_buf'
>> hppa-linux-ld: (.text+0x160): undefined reference to `tee_client_invoke_func'
>> hppa-linux-ld: (.text+0x178): undefined reference to `tee_shm_free'
   hppa-linux-ld: drivers/remoteproc/remoteproc_tee.o: in function 
`tee_rproc_register':
>> (.text+0x2f4): undefined reference to `tee_client_open_session'
   hppa-linux-ld: drivers/remoteproc/remoteproc_tee.o: in function 
`tee_rproc_unregister':
>> (.text+0x3d4): undefined reference to `tee_client_close_session'
   hppa-linux-ld: drivers/remoteproc/remoteproc_tee.o: in function 
`tee_rproc_probe':
>> (.text+0x478): undefined reference to `tee_client_open_context'
>> hppa-linux-ld: (.text+0x4f8): undefined reference to 
>> `tee_client_close_context'
   hppa-linux-ld: drivers/remoteproc/remoteproc_tee.o: in function 
`tee_rproc_remove':
   (.text+0x558): undefined reference to `tee_client_close_session'
   hppa-linux-ld: (.text+0x59c): undefined reference to 
`tee_client_close_context'
   hppa-linux-ld: drivers/remoteproc/remoteproc_tee.o: in function 
`tee_rproc_start':
>> (.text+0x68c): undefined reference to `tee_client_invoke_func'
   hppa-linux-ld: drivers/remoteproc/remoteproc_tee.o: in function 
`tee_rproc_stop':
   (.text+0x7c8): undefined reference to `tee_client_invoke_func'
   hppa-linux-ld: drivers/remoteproc/remoteproc_tee.o: in function 
`tee_rproc_get_loaded_rsc_table':
   (.text+0x92c): undefined reference to `tee_client_invoke_func'
   hppa-linux-ld: drivers/remoteproc/remoteproc_tee.o: in function 
`tee_rproc_release_fw':
   (.text+0xb18): undefined reference to `tee_client_invoke_func'
>> hppa-linux-ld: drivers/remoteproc/remoteproc_tee.o:(.data+0x8): undefined 
>> reference to `tee_bus_type'

Kconfig warnings: (for reference only)
   WARNING: unmet direct dependencies detected for REMOTEPROC_TEE
   Depends on [n]: REMOTEPROC [=y] && OPTEE [=n]
   Selected by [y]:
   - STM32_RPROC [=y] && (ARCH_STM32 || COMPILE_TEST [=y]) && REMOTEPROC [=y]

-- 
0-DAY CI Kernel Test Service
https://github.com/intel/lkp-tests/wiki

Re: [PATCH] mailbox, remoteproc: k3-m4+: fix compile testing

2024-10-08 Thread Mathieu Poirier

On Mon, Oct 07, 2024 at 01:23:57PM +, Arnd Bergmann wrote:
> From: Arnd Bergmann 
> 
> The k3-m4 remoteproc driver was merged with incorrect dependencies.
> Despite multiple people trying to fix this, the version 6.12-rc2
> remains broken and causes a build failure with CONFIG_TI_SCI_PROTOCOL=m
> when the driver is built-in.
>

(Sigh)

I will do my own testing and will get back to you.

> arm-linux-gnueabi-ld: drivers/remoteproc/ti_k3_m4_remoteproc.o: in function 
> `k3_m4_rproc_probe':
> ti_k3_m4_remoteproc.c:(.text.k3_m4_rproc_probe+0x76): undefined reference to 
> `devm_ti_sci_get_by_phandle'
> 
> Fix the dependency again to make it work in all configurations.
> The 'select OMAP2PLUS_MBOX' no longer matches what the other drivers
> dependencies. The link failure can be avoided with a simple 'depends
> do, so turn that into the same 'depends' to ensure we get no circular
> on TI_SCI_PROTOCOL', but the extra COMPILE_TEST alternative is what
> we use elsehwere. On the other hand, building for OMAP2PLUS makes
> no sense since the hardware only exists on K3.
> 
> Fixes: ebcf9008a895 ("remoteproc: k3-m4: Add a remoteproc driver for M4F 
> subsystem")
> Fixes: ba0c0cb56f22 ("remoteproc: k3-m4: use the proper dependencies")
> Fixes: 54595f2807d2 ("mailbox, remoteproc: omap2+: fix compile testing")
> Signed-off-by: Arnd Bergmann 
> ---
>  drivers/remoteproc/Kconfig | 6 +++---
>  1 file changed, 3 insertions(+), 3 deletions(-)
> 
> diff --git a/drivers/remoteproc/Kconfig b/drivers/remoteproc/Kconfig
> index 955e4e38477e..62f8548fb46a 100644
> --- a/drivers/remoteproc/Kconfig
> +++ b/drivers/remoteproc/Kconfig
> @@ -341,9 +341,9 @@ config TI_K3_DSP_REMOTEPROC
>  
>  config TI_K3_M4_REMOTEPROC
>   tristate "TI K3 M4 remoteproc support"
> - depends on ARCH_OMAP2PLUS || ARCH_K3
> - select MAILBOX
> - select OMAP2PLUS_MBOX
> + depends on ARCH_K3 || COMPILE_TEST
> + depends on TI_SCI_PROTOCOL || (COMPILE_TEST && TI_SCI_PROTOCOL=n)
> + depends on OMAP2PLUS_MBOX
>   help
> Say m here to support TI's M4 remote processor subsystems
> on various TI K3 family of SoCs through the remote processor
> -- 
> 2.39.2
>

Re: [PATCH v10 7/7] remoteproc: stm32: Add support of an OP-TEE TA to load the firmware

2024-10-08 Thread Mathieu Poirier

>From hereon and starting with this version, I will not review patchets that
don't pass the compilation bots.

Mathieu

On Tue, Oct 08, 2024 at 07:07:40PM +0800, kernel test robot wrote:
> Hi Arnaud,
> 
> kernel test robot noticed the following build warnings:
> 
> [auto build test WARNING on 9852d85ec9d492ebef56dc5f229416c925758edc]
> 
> url:
> https://github.com/intel-lab-lkp/linux/commits/Arnaud-Pouliquen/remoteproc-core-Introduce-rproc_pa_to_va-helper/20241007-212358
> base:   9852d85ec9d492ebef56dc5f229416c925758edc
> patch link:
> https://lore.kernel.org/r/20241007131620.2090104-8-arnaud.pouliquen%40foss.st.com
> patch subject: [PATCH v10 7/7] remoteproc: stm32: Add support of an OP-TEE TA 
> to load the firmware
> config: alpha-kismet-CONFIG_REMOTEPROC_TEE-CONFIG_STM32_RPROC-0-0 
> (https://download.01.org/0day-ci/archive/20241008/202410081902.twqcmwjk-...@intel.com/config)
> reproduce: 
> (https://download.01.org/0day-ci/archive/20241008/202410081902.twqcmwjk-...@intel.com/reproduce)
> 
> If you fix the issue in a separate patch/commit (i.e. not just a new version 
> of
> the same patch/commit), kindly add following tags
> | Reported-by: kernel test robot 
> | Closes: 
> https://lore.kernel.org/oe-kbuild-all/202410081902.twqcmwjk-...@intel.com/
> 
> kismet warnings: (new ones prefixed by >>)
> >> kismet: WARNING: unmet direct dependencies detected for REMOTEPROC_TEE 
> >> when selected by STM32_RPROC
>WARNING: unmet direct dependencies detected for REMOTEPROC_TEE
>  Depends on [n]: REMOTEPROC [=y] && OPTEE [=n]
>  Selected by [y]:
>  - STM32_RPROC [=y] && (ARCH_STM32 || COMPILE_TEST [=y]) && REMOTEPROC 
> [=y]
> 
> -- 
> 0-DAY CI Kernel Test Service
> https://github.com/intel/lkp-tests/wiki

Re: [PATCH net-next v02 1/2] af_packet: allow fanout_add when socket is not RUNNING

2024-10-08 Thread Willem de Bruijn

Gur Stavi wrote:
> PACKET socket can retain its fanout membership through link down and up
> and leave a fanout while closed regardless of link state.
> However, socket was forbidden from joining a fanout while it was not
> RUNNING.
> 
> This patch allows PACKET socket to join fanout while not RUNNING.
> 
> Signed-off-by: Gur Stavi 
> ---
>  net/packet/af_packet.c | 10 +-
>  1 file changed, 5 insertions(+), 5 deletions(-)
> 
> diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
> index f8942062f776..fb2cca73d953 100644
> --- a/net/packet/af_packet.c
> +++ b/net/packet/af_packet.c
> @@ -1846,21 +1846,21 @@ static int fanout_add(struct sock *sk, struct 
> fanout_args *args)
>   err = -EINVAL;
>  
>   spin_lock(&po->bind_lock);
> - if (packet_sock_flag(po, PACKET_SOCK_RUNNING) &&
> - match->type == type &&
> + if (match->type == type &&
>   match->prot_hook.type == po->prot_hook.type &&
>   match->prot_hook.dev == po->prot_hook.dev) {

Remaining unaddressed issue is that the socket can now be added
before being bound. See comment in v1.

>   err = -ENOSPC;
>   if (refcount_read(&match->sk_ref) < match->max_num_members) {
> - __dev_remove_pack(&po->prot_hook);
> -
>   /* Paired with packet_setsockopt(PACKET_FANOUT_DATA) */
>   WRITE_ONCE(po->fanout, match);
>  
>   po->rollover = rollover;
>   rollover = NULL;
>   refcount_set(&match->sk_ref, 
> refcount_read(&match->sk_ref) + 1);
> - __fanout_link(sk, po);
> + if (packet_sock_flag(po, PACKET_SOCK_RUNNING)) {
> + __dev_remove_pack(&po->prot_hook);
> + __fanout_link(sk, po);
> + }
>   err = 0;
>   }
>   }
> -- 
> 2.45.2
>

Re: [PATCH net-next v8 03/24] ovpn: add basic netlink support

On 08/10/2024 14:52, Jiri Pirko wrote:

Tue, Oct 08, 2024 at 11:16:01AM CEST, anto...@openvpn.net wrote:

On 08/10/2024 10:58, Jiri Pirko wrote:

Tue, Oct 08, 2024 at 10:01:40AM CEST, anto...@openvpn.net wrote:

Hi,

On 07/10/24 17:32, Jiri Pirko wrote:

Wed, Oct 02, 2024 at 11:02:17AM CEST, anto...@openvpn.net wrote:

[...]

+operations:
+ list:
+-
+ name: dev-new
+ attribute-set: ovpn
+ flags: [ admin-perm ]
+ doc: Create a new interface of type ovpn
+ do:
+request:
+ attributes:
+- ifname
+- mode
+reply:
+ attributes:
+- ifname
+- ifindex
+-
+ name: dev-del

Why you expose new and del here in ovn specific generic netlink iface?
Why can't you use the exising RTNL api which is used for creation and
destruction of other types of devices?

That was my original approach in v1, but it was argued that an ovpn interface
needs a userspace program to be configured and used in a meaningful way,
therefore it was decided to concentrate all iface mgmt APIs along with the
others in the netlink family and to not expose any RTNL ops.

Can you please point me to the message id?

from
Sergey and subsequent replies.
RTNL vs NL topic starts right after the definition of 'ovpn_link_ops'

Yeah, does not make sense to me. All devices should implement common
rtnl ops, the extra-config, if needed, could be on a separate channel.
I don't find Sergey's argumentation valid.

Thanks a lot for taking the time to read our conversation.

Ok, considering all points we have discussed so far (including future
developments already on the roadmap), I think it makes sense to go back
to RTNL and drop the new/del-dev ops from the netlink family.

Will do that in v9.

Regards,

Recently Kuniyuki commented on this topic as well in:
<20240919055259.17622-1-kun...@amazon.com>
and that is why I added a default dellink implemetation.

Having dellink without newlink implemented is just wrong.

However, recently we decided to add a dellink implementation for better
integration with network namespaces and to allow the user to wipe a dangling
interface.

Hmm, one more argument to have symmetric add/del impletentation in RTNL

In the future we are planning to also add the possibility to create a
"persistent interface", that is an interface created before launching any
userspace program and that survives when the latter is stopped.
I can guess this functionality may be better suited for RTNL, but I am not
sure yet.

That would be quite confusing to have RTNL and genetlink iface to
add/del device. From what you described above, makes more sent to have
it just in RTNL

All in all I tend to agree.

@Jiri: do you have any particular opinion why we should use RTNL ops and not
netlink for creating/destroying interfaces? I feel this is mostly a matter of
taste, but maybe there are technical reasons we should consider.

Well. technically, you can probabaly do both. But it is quite common
that you can add/delete these kind of devices over RTNL. Lots of
examples. People are used to it, aligns with existing flows.

The only counterargument I see is the one brought by Sergey: "the ovpn
interface is not usable after creation, if no openvpn process is running".

However, allowing to create "persistent interfaces" will define a use-case
for having an ovpn device without any userspace process.

@Sergey what is your opinion here? I am not sure persistent interfaces were
discussed at the time you brought your point about RTNL vs NL.

Regards,

Thanks a lot for your contribution.

Regards,

ip link add [link DEV | parentdev NAME] [ name ] NAME
[ txqueuelen PACKETS ]
[ address LLADDR ]
[ broadcast LLADDR ]
[ mtu MTU ] [index IDX ]
[ numtxqueues QUEUE_COUNT ]
[ numrxqueues QUEUE_COUNT ]
[ netns { PID | NETNSNAME | NETNSFILE } ]
type TYPE [ ARGS ]

ip link delete { DEVICE | dev DEVICE | group DEVGROUP } type TYPE [ ARGS ]

Lots of examples of existing types creation is for example here:
https://developers.redhat.com/blog/2018/10/22/introduction-to-linux-interfaces-for-virtual-networking

+ attribute-set: ovpn
+ flags: [ admin-perm ]
+ doc: Delete existing interface of type ovpn
+ do:
+pre: ovpn-nl-pre-doit
+post: ovpn-nl-post-doit
+request:
+ attributes:
+- ifindex

[...]

--
Antonio Quartulli
OpenVPN Inc.

Re: [PATCH v4 0/2] virtiofs: fix the warning for kernel direct IO

2024-10-08 Thread Miklos Szeredi

On Sat, 31 Aug 2024 at 11:38, Hou Tao  wrote:
>
> From: Hou Tao 
>
> Hi,
>
> The patch set aims to fix the warning related to an abnormal size
> parameter of kmalloc() in virtiofs. Patch #1 fixes it by introducing
> use_pages_for_kvec_io option in fuse_conn and enabling it in virtiofs.
> Beside the abnormal size parameter for kmalloc, the gfp parameter is
> also questionable: GFP_ATOMIC is used even when the allocation occurs
> in a kworker context. Patch #2 fixes it by using GFP_NOFS when the
> allocation is initiated by the kworker. For more details, please check
> the individual patches.

Applied, thanks.

Miklos

[PATCH net-next v20 01/14] mm: page_frag: add a test module for page_frag

The testing is done by ensuring that the fragment allocated
from a frag_frag_cache instance is pushed into a ptr_ring
instance in a kthread binded to a specified cpu, and a kthread
binded to a specified cpu will pop the fragment from the
ptr_ring and free the fragment.

CC: Alexander Duyck 
Signed-off-by: Yunsheng Lin 
Reviewed-by: Alexander Duyck 
---
 tools/testing/selftests/mm/Makefile   |   3 +
 tools/testing/selftests/mm/page_frag/Makefile |  18 ++
 .../selftests/mm/page_frag/page_frag_test.c   | 173 ++
 tools/testing/selftests/mm/run_vmtests.sh |   8 +
 tools/testing/selftests/mm/test_page_frag.sh  | 171 +
 5 files changed, 373 insertions(+)
 create mode 100644 tools/testing/selftests/mm/page_frag/Makefile
 create mode 100644 tools/testing/selftests/mm/page_frag/page_frag_test.c
 create mode 100755 tools/testing/selftests/mm/test_page_frag.sh

diff --git a/tools/testing/selftests/mm/Makefile 
b/tools/testing/selftests/mm/Makefile
index 02e1204971b0..acec529baaca 100644
--- a/tools/testing/selftests/mm/Makefile
+++ b/tools/testing/selftests/mm/Makefile
@@ -36,6 +36,8 @@ MAKEFLAGS += --no-builtin-rules
 CFLAGS = -Wall -I $(top_srcdir) $(EXTRA_CFLAGS) $(KHDR_INCLUDES) 
$(TOOLS_INCLUDES)
 LDLIBS = -lrt -lpthread -lm
 
+TEST_GEN_MODS_DIR := page_frag
+
 TEST_GEN_FILES = cow
 TEST_GEN_FILES += compaction_test
 TEST_GEN_FILES += gup_longterm
@@ -126,6 +128,7 @@ TEST_FILES += test_hmm.sh
 TEST_FILES += va_high_addr_switch.sh
 TEST_FILES += charge_reserved_hugetlb.sh
 TEST_FILES += hugetlb_reparenting_test.sh
+TEST_FILES += test_page_frag.sh
 
 # required by charge_reserved_hugetlb.sh
 TEST_FILES += write_hugetlb_memory.sh
diff --git a/tools/testing/selftests/mm/page_frag/Makefile 
b/tools/testing/selftests/mm/page_frag/Makefile
new file mode 100644
index ..58dda74d50a3
--- /dev/null
+++ b/tools/testing/selftests/mm/page_frag/Makefile
@@ -0,0 +1,18 @@
+PAGE_FRAG_TEST_DIR := $(realpath $(dir $(abspath $(lastword 
$(MAKEFILE_LIST)
+KDIR ?= $(abspath $(PAGE_FRAG_TEST_DIR)/../../../../..)
+
+ifeq ($(V),1)
+Q =
+else
+Q = @
+endif
+
+MODULES = page_frag_test.ko
+
+obj-m += page_frag_test.o
+
+all:
+   +$(Q)make -C $(KDIR) M=$(PAGE_FRAG_TEST_DIR) modules
+
+clean:
+   +$(Q)make -C $(KDIR) M=$(PAGE_FRAG_TEST_DIR) clean
diff --git a/tools/testing/selftests/mm/page_frag/page_frag_test.c 
b/tools/testing/selftests/mm/page_frag/page_frag_test.c
new file mode 100644
index ..eeb2b6bc681a
--- /dev/null
+++ b/tools/testing/selftests/mm/page_frag/page_frag_test.c
@@ -0,0 +1,173 @@
+// SPDX-License-Identifier: GPL-2.0
+
+/*
+ * Test module for page_frag cache
+ *
+ * Copyright (C) 2024 Yunsheng Lin 
+ */
+
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+
+static struct ptr_ring ptr_ring;
+static int nr_objs = 512;
+static atomic_t nthreads;
+static struct completion wait;
+static struct page_frag_cache test_nc;
+static int test_popped;
+static int test_pushed;
+
+static int nr_test = 200;
+module_param(nr_test, int, 0);
+MODULE_PARM_DESC(nr_test, "number of iterations to test");
+
+static bool test_align;
+module_param(test_align, bool, 0);
+MODULE_PARM_DESC(test_align, "use align API for testing");
+
+static int test_alloc_len = 2048;
+module_param(test_alloc_len, int, 0);
+MODULE_PARM_DESC(test_alloc_len, "alloc len for testing");
+
+static int test_push_cpu;
+module_param(test_push_cpu, int, 0);
+MODULE_PARM_DESC(test_push_cpu, "test cpu for pushing fragment");
+
+static int test_pop_cpu;
+module_param(test_pop_cpu, int, 0);
+MODULE_PARM_DESC(test_pop_cpu, "test cpu for popping fragment");
+
+static int page_frag_pop_thread(void *arg)
+{
+   struct ptr_ring *ring = arg;
+
+   pr_info("page_frag pop test thread begins on cpu %d\n",
+   smp_processor_id());
+
+   while (test_popped < nr_test) {
+   void *obj = __ptr_ring_consume(ring);
+
+   if (obj) {
+   test_popped++;
+   page_frag_free(obj);
+   } else {
+   cond_resched();
+   }
+   }
+
+   if (atomic_dec_and_test(&nthreads))
+   complete(&wait);
+
+   pr_info("page_frag pop test thread exits on cpu %d\n",
+   smp_processor_id());
+
+   return 0;
+}
+
+static int page_frag_push_thread(void *arg)
+{
+   struct ptr_ring *ring = arg;
+
+   pr_info("page_frag push test thread begins on cpu %d\n",
+   smp_processor_id());
+
+   while (test_pushed < nr_test) {
+   void *va;
+   int ret;
+
+   if (test_align) {
+   va = page_frag_alloc_align(&test_nc, test_alloc_len,
+  GFP_KERNEL, SMP_CACHE_BYTES);
+
+   WARN_ONCE((unsigned long)va & (SMP_CACHE_BYTES - 1),
+ "unaligned va returned\n");
+   } else {
+

[PATCH net-next v20 02/14] mm: move the page fragment allocator from page_alloc into its own file

Inspired by [1], move the page fragment allocator from page_alloc
into its own c file and header file, as we are about to make more
change for it to replace another page_frag implementation in
sock.c

As this patchset is going to replace 'struct page_frag' with
'struct page_frag_cache' in sched.h, including page_frag_cache.h
in sched.h has a compiler error caused by interdependence between
mm_types.h and mm.h for asm-offsets.c, see [2]. So avoid the compiler
error by moving 'struct page_frag_cache' to mm_types_task.h as
suggested by Alexander, see [3].

1. https://lore.kernel.org/all/20230411160902.4134381-3-dhowe...@redhat.com/
2. https://lore.kernel.org/all/15623dac-9358-4597-b3ee-3694a5956...@gmail.com/
3. 
https://lore.kernel.org/all/CAKgT0UdH1yD=LSCXFJ=ym_aia4oomd-2wxyko42bizawmt_...@mail.gmail.com/
CC: David Howells 
CC: Alexander Duyck 
Signed-off-by: Yunsheng Lin 
Acked-by: Andrew Morton 
Reviewed-by: Alexander Duyck 
---
 include/linux/gfp.h   |  22 ---
 include/linux/mm_types.h  |  18 ---
 include/linux/mm_types_task.h |  18 +++
 include/linux/page_frag_cache.h   |  31 
 include/linux/skbuff.h|   1 +
 mm/Makefile   |   1 +
 mm/page_alloc.c   | 136 
 mm/page_frag_cache.c  | 145 ++
 .../selftests/mm/page_frag/page_frag_test.c   |   2 +-
 9 files changed, 197 insertions(+), 177 deletions(-)
 create mode 100644 include/linux/page_frag_cache.h
 create mode 100644 mm/page_frag_cache.c

diff --git a/include/linux/gfp.h b/include/linux/gfp.h
index a951de920e20..a0a6d25f883f 100644
--- a/include/linux/gfp.h
+++ b/include/linux/gfp.h
@@ -371,28 +371,6 @@ __meminit void *alloc_pages_exact_nid_noprof(int nid, 
size_t size, gfp_t gfp_mas
 extern void __free_pages(struct page *page, unsigned int order);
 extern void free_pages(unsigned long addr, unsigned int order);
 
-struct page_frag_cache;
-void page_frag_cache_drain(struct page_frag_cache *nc);
-extern void __page_frag_cache_drain(struct page *page, unsigned int count);
-void *__page_frag_alloc_align(struct page_frag_cache *nc, unsigned int fragsz,
- gfp_t gfp_mask, unsigned int align_mask);
-
-static inline void *page_frag_alloc_align(struct page_frag_cache *nc,
- unsigned int fragsz, gfp_t gfp_mask,
- unsigned int align)
-{
-   WARN_ON_ONCE(!is_power_of_2(align));
-   return __page_frag_alloc_align(nc, fragsz, gfp_mask, -align);
-}
-
-static inline void *page_frag_alloc(struct page_frag_cache *nc,
-unsigned int fragsz, gfp_t gfp_mask)
-{
-   return __page_frag_alloc_align(nc, fragsz, gfp_mask, ~0u);
-}
-
-extern void page_frag_free(void *addr);
-
 #define __free_page(page) __free_pages((page), 0)
 #define free_page(addr) free_pages((addr), 0)
 
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index 6e3bdf8e38bc..92314ef2d978 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -521,9 +521,6 @@ static_assert(sizeof(struct ptdesc) <= sizeof(struct page));
  */
 #define STRUCT_PAGE_MAX_SHIFT  (order_base_2(sizeof(struct page)))
 
-#define PAGE_FRAG_CACHE_MAX_SIZE   __ALIGN_MASK(32768, ~PAGE_MASK)
-#define PAGE_FRAG_CACHE_MAX_ORDER  get_order(PAGE_FRAG_CACHE_MAX_SIZE)
-
 /*
  * page_private can be used on tail pages.  However, PagePrivate is only
  * checked by the VM on the head page.  So page_private on the tail pages
@@ -542,21 +539,6 @@ static inline void *folio_get_private(struct folio *folio)
return folio->private;
 }
 
-struct page_frag_cache {
-   void * va;
-#if (PAGE_SIZE < PAGE_FRAG_CACHE_MAX_SIZE)
-   __u16 offset;
-   __u16 size;
-#else
-   __u32 offset;
-#endif
-   /* we maintain a pagecount bias, so that we dont dirty cache line
-* containing page->_refcount every time we allocate a fragment.
-*/
-   unsigned intpagecnt_bias;
-   bool pfmemalloc;
-};
-
 typedef unsigned long vm_flags_t;
 
 /*
diff --git a/include/linux/mm_types_task.h b/include/linux/mm_types_task.h
index bff5706b76e1..0ac6daebdd5c 100644
--- a/include/linux/mm_types_task.h
+++ b/include/linux/mm_types_task.h
@@ -8,6 +8,7 @@
  * (These are defined separately to decouple sched.h from mm_types.h as much 
as possible.)
  */
 
+#include 
 #include 
 
 #include 
@@ -43,6 +44,23 @@ struct page_frag {
 #endif
 };
 
+#define PAGE_FRAG_CACHE_MAX_SIZE   __ALIGN_MASK(32768, ~PAGE_MASK)
+#define PAGE_FRAG_CACHE_MAX_ORDER  get_order(PAGE_FRAG_CACHE_MAX_SIZE)
+struct page_frag_cache {
+   void *va;
+#if (PAGE_SIZE < PAGE_FRAG_CACHE_MAX_SIZE)
+   __u16 offset;
+   __u16 size;
+#else
+   __u32 offset;
+#endif
+   /* we maintain a pagecount bias, so that we dont dirty cache line
+* containing pa

[PATCH net-next v20 04/14] mm: page_frag: avoid caller accessing 'page_frag_cache' directly

Use appropriate frag_page API instead of caller accessing
'page_frag_cache' directly.

CC: Alexander Duyck 
Signed-off-by: Yunsheng Lin 
Reviewed-by: Alexander Duyck 
Acked-by: Chuck Lever 
---
 drivers/vhost/net.c   |  2 +-
 include/linux/page_frag_cache.h   | 10 ++
 net/core/skbuff.c |  6 +++---
 net/rxrpc/conn_object.c   |  4 +---
 net/rxrpc/local_object.c  |  4 +---
 net/sunrpc/svcsock.c  |  6 ++
 tools/testing/selftests/mm/page_frag/page_frag_test.c |  2 +-
 7 files changed, 19 insertions(+), 15 deletions(-)

diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c
index f16279351db5..9ad37c012189 100644
--- a/drivers/vhost/net.c
+++ b/drivers/vhost/net.c
@@ -1325,7 +1325,7 @@ static int vhost_net_open(struct inode *inode, struct 
file *f)
vqs[VHOST_NET_VQ_RX]);
 
f->private_data = n;
-   n->pf_cache.va = NULL;
+   page_frag_cache_init(&n->pf_cache);
 
return 0;
 }
diff --git a/include/linux/page_frag_cache.h b/include/linux/page_frag_cache.h
index 67ac8626ed9b..0a52f7a179c8 100644
--- a/include/linux/page_frag_cache.h
+++ b/include/linux/page_frag_cache.h
@@ -7,6 +7,16 @@
 #include 
 #include 
 
+static inline void page_frag_cache_init(struct page_frag_cache *nc)
+{
+   nc->va = NULL;
+}
+
+static inline bool page_frag_cache_is_pfmemalloc(struct page_frag_cache *nc)
+{
+   return !!nc->pfmemalloc;
+}
+
 void page_frag_cache_drain(struct page_frag_cache *nc);
 void __page_frag_cache_drain(struct page *page, unsigned int count);
 void *__page_frag_alloc_align(struct page_frag_cache *nc, unsigned int fragsz,
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 74149dc4ee31..ca01880c7ad0 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -753,14 +753,14 @@ struct sk_buff *__netdev_alloc_skb(struct net_device 
*dev, unsigned int len,
if (in_hardirq() || irqs_disabled()) {
nc = this_cpu_ptr(&netdev_alloc_cache);
data = page_frag_alloc(nc, len, gfp_mask);
-   pfmemalloc = nc->pfmemalloc;
+   pfmemalloc = page_frag_cache_is_pfmemalloc(nc);
} else {
local_bh_disable();
local_lock_nested_bh(&napi_alloc_cache.bh_lock);
 
nc = this_cpu_ptr(&napi_alloc_cache.page);
data = page_frag_alloc(nc, len, gfp_mask);
-   pfmemalloc = nc->pfmemalloc;
+   pfmemalloc = page_frag_cache_is_pfmemalloc(nc);
 
local_unlock_nested_bh(&napi_alloc_cache.bh_lock);
local_bh_enable();
@@ -850,7 +850,7 @@ struct sk_buff *napi_alloc_skb(struct napi_struct *napi, 
unsigned int len)
len = SKB_HEAD_ALIGN(len);
 
data = page_frag_alloc(&nc->page, len, gfp_mask);
-   pfmemalloc = nc->page.pfmemalloc;
+   pfmemalloc = page_frag_cache_is_pfmemalloc(&nc->page);
}
local_unlock_nested_bh(&napi_alloc_cache.bh_lock);
 
diff --git a/net/rxrpc/conn_object.c b/net/rxrpc/conn_object.c
index 1539d315afe7..694c4df7a1a3 100644
--- a/net/rxrpc/conn_object.c
+++ b/net/rxrpc/conn_object.c
@@ -337,9 +337,7 @@ static void rxrpc_clean_up_connection(struct work_struct 
*work)
 */
rxrpc_purge_queue(&conn->rx_queue);
 
-   if (conn->tx_data_alloc.va)
-   __page_frag_cache_drain(virt_to_page(conn->tx_data_alloc.va),
-   conn->tx_data_alloc.pagecnt_bias);
+   page_frag_cache_drain(&conn->tx_data_alloc);
call_rcu(&conn->rcu, rxrpc_rcu_free_connection);
 }
 
diff --git a/net/rxrpc/local_object.c b/net/rxrpc/local_object.c
index 504453c688d7..a8cffe47cf01 100644
--- a/net/rxrpc/local_object.c
+++ b/net/rxrpc/local_object.c
@@ -452,9 +452,7 @@ void rxrpc_destroy_local(struct rxrpc_local *local)
 #endif
rxrpc_purge_queue(&local->rx_queue);
rxrpc_purge_client_connections(local);
-   if (local->tx_alloc.va)
-   __page_frag_cache_drain(virt_to_page(local->tx_alloc.va),
-   local->tx_alloc.pagecnt_bias);
+   page_frag_cache_drain(&local->tx_alloc);
 }
 
 /*
diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c
index 825ec5357691..b785425c3315 100644
--- a/net/sunrpc/svcsock.c
+++ b/net/sunrpc/svcsock.c
@@ -1608,7 +1608,6 @@ static void svc_tcp_sock_detach(struct svc_xprt *xprt)
 static void svc_sock_free(struct svc_xprt *xprt)
 {
struct svc_sock *svsk = container_of(xprt, struct svc_sock, sk_xprt);
-   struct page_frag_cache *pfc = &svsk->sk_frag_cache;
struct socket *sock = svsk->sk_sock;
 
trace_svcsock_free(svsk, sock);
@@ -1618,8 +1617,7 @@ static void svc_sock_free(struct svc_xprt *xprt)
sockfd_put(sock);
else
sock_release(sock);
-   if (pfc-

Re: [PATCH net-next v8 03/24] ovpn: add basic netlink support

2024-10-08 Thread Jiri Pirko

Tue, Oct 08, 2024 at 10:01:40AM CEST, anto...@openvpn.net wrote:
>Hi,
>
>On 07/10/24 17:32, Jiri Pirko wrote:
>> Wed, Oct 02, 2024 at 11:02:17AM CEST, anto...@openvpn.net wrote:
>> 
>> [...]
>> 
>> 
>> > +operations:
>> > +  list:
>> > +-
>> > +  name: dev-new
>> > +  attribute-set: ovpn
>> > +  flags: [ admin-perm ]
>> > +  doc: Create a new interface of type ovpn
>> > +  do:
>> > +request:
>> > +  attributes:
>> > +- ifname
>> > +- mode
>> > +reply:
>> > +  attributes:
>> > +- ifname
>> > +- ifindex
>> > +-
>> > +  name: dev-del
>> 
>> Why you expose new and del here in ovn specific generic netlink iface?
>> Why can't you use the exising RTNL api which is used for creation and
>> destruction of other types of devices?
>
>That was my original approach in v1, but it was argued that an ovpn interface
>needs a userspace program to be configured and used in a meaningful way,
>therefore it was decided to concentrate all iface mgmt APIs along with the
>others in the netlink family and to not expose any RTNL ops.

Can you please point me to the message id?


>
>However, recently we decided to add a dellink implementation for better
>integration with network namespaces and to allow the user to wipe a dangling
>interface.

Hmm, one more argument to have symmetric add/del impletentation in RTNL


>
>In the future we are planning to also add the possibility to create a
>"persistent interface", that is an interface created before launching any
>userspace program and that survives when the latter is stopped.
>I can guess this functionality may be better suited for RTNL, but I am not
>sure yet.

That would be quite confusing to have RTNL and genetlink iface to
add/del device. From what you described above, makes more sent to have
it just in RTNL

>
>@Jiri: do you have any particular opinion why we should use RTNL ops and not
>netlink for creating/destroying interfaces? I feel this is mostly a matter of
>taste, but maybe there are technical reasons we should consider.

Well. technically, you can probabaly do both. But it is quite common
that you can add/delete these kind of devices over RTNL. Lots of
examples. People are used to it, aligns with existing flows.

>
>Thanks a lot for your contribution.
>
>Regards,
>
>
>> 
>> 
>> ip link add [link DEV | parentdev NAME] [ name ] NAME
>>  [ txqueuelen PACKETS ]
>>  [ address LLADDR ]
>>  [ broadcast LLADDR ]
>>  [ mtu MTU ] [index IDX ]
>>  [ numtxqueues QUEUE_COUNT ]
>>  [ numrxqueues QUEUE_COUNT ]
>>  [ netns { PID | NETNSNAME | NETNSFILE } ]
>>  type TYPE [ ARGS ]
>> 
>> ip link delete { DEVICE | dev DEVICE | group DEVGROUP } type TYPE [ ARGS ]
>> 
>> Lots of examples of existing types creation is for example here:
>> https://developers.redhat.com/blog/2018/10/22/introduction-to-linux-interfaces-for-virtual-networking
>> 
>> 
>> 
>> > +  attribute-set: ovpn
>> > +  flags: [ admin-perm ]
>> > +  doc: Delete existing interface of type ovpn
>> > +  do:
>> > +pre: ovpn-nl-pre-doit
>> > +post: ovpn-nl-post-doit
>> > +request:
>> > +  attributes:
>> > +- ifindex
>> 
>> [...]
>
>-- 
>Antonio Quartulli
>OpenVPN Inc.

[PATCH bpf v7 1/2] bpf: fix unpopulated name_len field in perf_event link info

2024-10-08 Thread Tyrone Wu

Previously when retrieving `bpf_link_info.perf_event` for
kprobe/uprobe/tracepoint, the `name_len` field was not populated by the
kernel, leaving it to reflect the value initially set by the user. This
behavior was inconsistent with how other input/output string buffer
fields function (e.g. `raw_tracepoint.tp_name_len`).

This patch fills `name_len` with the actual size of the string name.

Fixes: 1b715e1b0ec5 ("bpf: Support ->fill_link_info for perf_event")
Signed-off-by: Tyrone Wu 
Acked-by: Jiri Olsa 
Acked-by: Yafang Shao 
---
V6 -> V7:
- Use *ulenp suggestion from Jiri for user name_len
- Set user name_len to 1 when buf returned from bpf_get_perf_event_info() is 
NULL

V5 -> V6:
- Use simpler buf check while keeping V4
- Fix netdev/checkpatch warning for 80 cols exceeded
- Fix Signed-off-by to use real name instead of git username

V4 -> V5:
- Check that buf is not NULL before retrieving/using its length

V3 -> V4:
- Split patch into separate kernel and selftest change

V2 -> V3:
- Use clearer variable name for user set/inputted name len (name_len -> 
input_len)
- Change (name_len -> input_len) type from size_t to u32 since it's only 
received and used as u32

V1 -> V2:
- Use user set *ulen in bpf_copy_to_user before overwriting *ulen

 kernel/bpf/syscall.c | 29 ++---
 1 file changed, 22 insertions(+), 7 deletions(-)

diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index a8f1808a1ca5..8cfa7183d2ef 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -3565,15 +3565,16 @@ static void bpf_perf_link_dealloc(struct bpf_link *link)
 }
 
 static int bpf_perf_link_fill_common(const struct perf_event *event,
-char __user *uname, u32 ulen,
+char __user *uname, u32 *ulenp,
 u64 *probe_offset, u64 *probe_addr,
 u32 *fd_type, unsigned long *missed)
 {
const char *buf;
-   u32 prog_id;
+   u32 prog_id, ulen;
size_t len;
int err;
 
+   ulen = *ulenp;
if (!ulen ^ !uname)
return -EINVAL;
 
@@ -3581,10 +3582,17 @@ static int bpf_perf_link_fill_common(const struct 
perf_event *event,
  probe_offset, probe_addr, missed);
if (err)
return err;
+
+   if (buf) {
+   len = strlen(buf);
+   *ulenp = len + 1;
+   } else {
+   *ulenp = 1;
+   }
if (!uname)
return 0;
+
if (buf) {
-   len = strlen(buf);
err = bpf_copy_to_user(uname, buf, ulen, len);
if (err)
return err;
@@ -3609,7 +3617,7 @@ static int bpf_perf_link_fill_kprobe(const struct 
perf_event *event,
 
uname = u64_to_user_ptr(info->perf_event.kprobe.func_name);
ulen = info->perf_event.kprobe.name_len;
-   err = bpf_perf_link_fill_common(event, uname, ulen, &offset, &addr,
+   err = bpf_perf_link_fill_common(event, uname, &ulen, &offset, &addr,
&type, &missed);
if (err)
return err;
@@ -3617,7 +3625,7 @@ static int bpf_perf_link_fill_kprobe(const struct 
perf_event *event,
info->perf_event.type = BPF_PERF_EVENT_KRETPROBE;
else
info->perf_event.type = BPF_PERF_EVENT_KPROBE;
-
+   info->perf_event.kprobe.name_len = ulen;
info->perf_event.kprobe.offset = offset;
info->perf_event.kprobe.missed = missed;
if (!kallsyms_show_value(current_cred()))
@@ -3639,7 +3647,7 @@ static int bpf_perf_link_fill_uprobe(const struct 
perf_event *event,
 
uname = u64_to_user_ptr(info->perf_event.uprobe.file_name);
ulen = info->perf_event.uprobe.name_len;
-   err = bpf_perf_link_fill_common(event, uname, ulen, &offset, &addr,
+   err = bpf_perf_link_fill_common(event, uname, &ulen, &offset, &addr,
&type, NULL);
if (err)
return err;
@@ -3648,6 +3656,7 @@ static int bpf_perf_link_fill_uprobe(const struct 
perf_event *event,
info->perf_event.type = BPF_PERF_EVENT_URETPROBE;
else
info->perf_event.type = BPF_PERF_EVENT_UPROBE;
+   info->perf_event.uprobe.name_len = ulen;
info->perf_event.uprobe.offset = offset;
info->perf_event.uprobe.cookie = event->bpf_cookie;
return 0;
@@ -3673,12 +3682,18 @@ static int bpf_perf_link_fill_tracepoint(const struct 
perf_event *event,
 {
char __user *uname;
u32 ulen;
+   int err;
 
uname = u64_to_user_ptr(info->perf_event.tracepoint.tp_name);
ulen = info->perf_event.tracepoint.name_len;
+   err = bpf_perf_link_fill_common(event, uname, &ulen, NULL, NULL, NULL, 
NULL);
+   if (err)
+   return err;
+
info->perf_event.type = BPF_PERF_EVENT_TRACEPOINT;
+

[PATCH bpf v7 2/2] selftests/bpf: fix perf_event link info name_len assertion

2024-10-08 Thread Tyrone Wu

Fix `name_len` field assertions in `bpf_link_info.perf_event` for
kprobe/uprobe/tracepoint to validate correct name size instead of 0.

Fixes: 23cf7aa539dc ("selftests/bpf: Add selftest for fill_link_info")
Signed-off-by: Tyrone Wu 
Acked-by: Jiri Olsa 
Acked-by: Yafang Shao 
---
V6 -> V7: no change

V5 -> V6:
- Fix netdev/checkpatch warning for 80 cols exceeded
- Fix Signed-off-by to use real name instead of git username

V4 -> V5: no change

V3 -> V4:
- Split patch into separate kernel and selftest change

 tools/testing/selftests/bpf/prog_tests/fill_link_info.c | 9 ++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/tools/testing/selftests/bpf/prog_tests/fill_link_info.c 
b/tools/testing/selftests/bpf/prog_tests/fill_link_info.c
index f3932941bbaa..745c5ada4c4b 100644
--- a/tools/testing/selftests/bpf/prog_tests/fill_link_info.c
+++ b/tools/testing/selftests/bpf/prog_tests/fill_link_info.c
@@ -67,8 +67,9 @@ static int verify_perf_link_info(int fd, enum 
bpf_perf_event_type type, long add
 
ASSERT_EQ(info.perf_event.kprobe.cookie, PERF_EVENT_COOKIE, 
"kprobe_cookie");
 
+   ASSERT_EQ(info.perf_event.kprobe.name_len, strlen(KPROBE_FUNC) 
+ 1,
+ "name_len");
if (!info.perf_event.kprobe.func_name) {
-   ASSERT_EQ(info.perf_event.kprobe.name_len, 0, 
"name_len");
info.perf_event.kprobe.func_name = ptr_to_u64(&buf);
info.perf_event.kprobe.name_len = sizeof(buf);
goto again;
@@ -79,8 +80,9 @@ static int verify_perf_link_info(int fd, enum 
bpf_perf_event_type type, long add
ASSERT_EQ(err, 0, "cmp_kprobe_func_name");
break;
case BPF_PERF_EVENT_TRACEPOINT:
+   ASSERT_EQ(info.perf_event.tracepoint.name_len, strlen(TP_NAME) 
+ 1,
+ "name_len");
if (!info.perf_event.tracepoint.tp_name) {
-   ASSERT_EQ(info.perf_event.tracepoint.name_len, 0, 
"name_len");
info.perf_event.tracepoint.tp_name = ptr_to_u64(&buf);
info.perf_event.tracepoint.name_len = sizeof(buf);
goto again;
@@ -96,8 +98,9 @@ static int verify_perf_link_info(int fd, enum 
bpf_perf_event_type type, long add
case BPF_PERF_EVENT_URETPROBE:
ASSERT_EQ(info.perf_event.uprobe.offset, offset, 
"uprobe_offset");
 
+   ASSERT_EQ(info.perf_event.uprobe.name_len, strlen(UPROBE_FILE) 
+ 1,
+ "name_len");
if (!info.perf_event.uprobe.file_name) {
-   ASSERT_EQ(info.perf_event.uprobe.name_len, 0, 
"name_len");
info.perf_event.uprobe.file_name = ptr_to_u64(&buf);
info.perf_event.uprobe.name_len = sizeof(buf);
goto again;
-- 
2.43.0

Re: [PATCH bpf] selftests/bpf: add missing header include for htons

2024-10-08 Thread patchwork-bot+netdevbpf

Hello:

This patch was applied to bpf/bpf-next.git (net)
by Martin KaFai Lau :

On Tue, 08 Oct 2024 16:50:57 +0200 you wrote:
> Including the network_helpers.h header in tests can lead to the following
> build error:
> 
> ./network_helpers.h: In function ‘csum_tcpudp_magic’:
> ./network_helpers.h:116:14: error: implicit declaration of function \
>   ‘htons’ [-Werror=implicit-function-declaration]
>   116 | s += htons(proto + len);
> 
> [...]

Here is the summary with links:
  - [bpf] selftests/bpf: add missing header include for htons
https://git.kernel.org/bpf/bpf-next/c/bc9b3fb827fc

You are awesome, thank you!
-- 
Deet-doot-dot, I am a bot.
https://korg.docs.kernel.org/patchwork/pwbot.html

[PATCH v4 14/19] gendwarfksyms: Add support for kABI rules

Distributions that want to maintain a stable kABI need the ability
to make ABI compatible changes to kernel without affecting symbol
versions, either because of LTS updates or backports.

With genksyms, developers would typically hide these changes from
version calculation with #ifndef __GENKSYMS__, which would result
in the symbol version not changing even though the actual type has
changed.  When we process precompiled object files, this isn't an
option.

To support this use case, add a --stable command line flag that
gates kABI stability features that are not needed in mainline
kernels, but can be useful for distributions, and add support for
kABI rules, which can be used to restrict gendwarfksyms output.

The rules are specified as a set of null-terminated strings stored
in the .discard.gendwarfksyms.kabi_rules section. Each rule consists
of four strings as follows:

  "version\0type\0target\0value"

The version string ensures the structure can be changed in a
backwards compatible way. The type string indicates the type of the
rule, and target and value strings contain rule-specific data.

Initially support two simple rules:

  1. Declaration-only structures

 A structure declaration can change into a full definition when
 additional includes are pulled in to the TU, which changes the
 versions of any symbol that references the struct. Add support
 for defining declaration-only structs whose definition is not
 expanded during versioning.

  2. Ignored enum fields

 It's possible to add new enum fields without changing the ABI,
 but as the fields are included in symbol versioning, this would
 change the versions. Add support for ignoring specific fields.

Add examples for using the rules under the examples/ directory.

Signed-off-by: Sami Tolvanen 
Acked-by: Neal Gompa 
---
 scripts/gendwarfksyms/Makefile  |   1 +
 scripts/gendwarfksyms/dwarf.c   |  19 +-
 scripts/gendwarfksyms/examples/kabi.h   |  61 ++
 scripts/gendwarfksyms/examples/kabi_rules.c |  56 +
 scripts/gendwarfksyms/gendwarfksyms.c   |  11 +-
 scripts/gendwarfksyms/gendwarfksyms.h   |  57 ++
 scripts/gendwarfksyms/kabi.c| 214 
 7 files changed, 415 insertions(+), 4 deletions(-)
 create mode 100644 scripts/gendwarfksyms/examples/kabi.h
 create mode 100644 scripts/gendwarfksyms/examples/kabi_rules.c
 create mode 100644 scripts/gendwarfksyms/kabi.c

diff --git a/scripts/gendwarfksyms/Makefile b/scripts/gendwarfksyms/Makefile
index 6540282dc746..27258c31e839 100644
--- a/scripts/gendwarfksyms/Makefile
+++ b/scripts/gendwarfksyms/Makefile
@@ -5,6 +5,7 @@ gendwarfksyms-objs += gendwarfksyms.o
 gendwarfksyms-objs += cache.o
 gendwarfksyms-objs += die.o
 gendwarfksyms-objs += dwarf.o
+gendwarfksyms-objs += kabi.o
 gendwarfksyms-objs += symbols.o
 gendwarfksyms-objs += types.o
 
diff --git a/scripts/gendwarfksyms/dwarf.c b/scripts/gendwarfksyms/dwarf.c
index a47a3a0f7a69..b15f1a5db452 100644
--- a/scripts/gendwarfksyms/dwarf.c
+++ b/scripts/gendwarfksyms/dwarf.c
@@ -80,11 +80,12 @@ static bool match_export_symbol(struct state *state, 
Dwarf_Die *die)
return !!state->sym;
 }
 
-static bool is_declaration(Dwarf_Die *die)
+static bool is_declaration(struct die *cache, Dwarf_Die *die)
 {
bool value;
 
-   return get_flag_attr(die, DW_AT_declaration, &value) && value;
+   return (get_flag_attr(die, DW_AT_declaration, &value) && value) ||
+  kabi_is_struct_declonly(cache->fqn);
 }
 
 /*
@@ -472,10 +473,11 @@ static void __process_structure_type(struct state *state, 
struct die *cache,
process(cache, " {");
process_linebreak(cache, 1);
 
-   is_decl = is_declaration(die);
+   is_decl = is_declaration(cache, die);
 
if (!is_decl && state->expand.expand) {
cache_mark_expanded(&state->expansion_cache, die->addr);
+   state->expand.current_fqn = cache->fqn;
check(process_die_container(state, cache, die, process_func,
match_func));
}
@@ -508,6 +510,15 @@ static void process_enumerator_type(struct state *state, 
struct die *cache,
 {
Dwarf_Word value;
 
+   if (stable) {
+   /* Get the fqn before we process anything */
+   update_fqn(cache, die);
+
+   if (kabi_is_enumerator_ignored(state->expand.current_fqn,
+  cache->fqn))
+   return;
+   }
+
process_list_comma(state, cache);
process(cache, "enumerator");
process_fqn(cache, die);
@@ -580,6 +591,7 @@ static void state_init(struct state *state)
state->expand.expand = true;
state->expand.ptr_depth = 0;
state->expand.ptr_expansion_depth = 0;
+   state->expand.current_fqn = NULL;
hash_init(state->expansion_cache.cache);
 }
 
@@ -589,6 +601,7 @@ static void expansion_s

[PATCH v4 12/19] gendwarfksyms: Add symtypes output

Add support for producing genksyms-style symtypes files. Process
die_map to find the longest expansions for each type, and use symtypes
references in type definitions. The basic file format is similar to
genksyms, with two notable exceptions:

  1. Type names with spaces (common with Rust) in references are
 wrapped in single quotes. E.g.:

 s#'core::result::Result'

  2. The actual type definition is the simple parsed DWARF format we
 output with --dump-dies, not the preprocessed C-style format
 genksyms produces.

Signed-off-by: Sami Tolvanen 
Acked-by: Neal Gompa 
---
 scripts/gendwarfksyms/Makefile|   1 +
 scripts/gendwarfksyms/die.c   |  11 +
 scripts/gendwarfksyms/dwarf.c |   1 +
 scripts/gendwarfksyms/gendwarfksyms.c |  36 ++-
 scripts/gendwarfksyms/gendwarfksyms.h |  19 ++
 scripts/gendwarfksyms/symbols.c   |   4 +-
 scripts/gendwarfksyms/types.c | 359 ++
 7 files changed, 428 insertions(+), 3 deletions(-)
 create mode 100644 scripts/gendwarfksyms/types.c

diff --git a/scripts/gendwarfksyms/Makefile b/scripts/gendwarfksyms/Makefile
index c06145d84df8..6540282dc746 100644
--- a/scripts/gendwarfksyms/Makefile
+++ b/scripts/gendwarfksyms/Makefile
@@ -6,5 +6,6 @@ gendwarfksyms-objs += cache.o
 gendwarfksyms-objs += die.o
 gendwarfksyms-objs += dwarf.o
 gendwarfksyms-objs += symbols.o
+gendwarfksyms-objs += types.o
 
 HOSTLDLIBS_gendwarfksyms := -ldw -lelf
diff --git a/scripts/gendwarfksyms/die.c b/scripts/gendwarfksyms/die.c
index 2829387fd815..df1ca3a032bb 100644
--- a/scripts/gendwarfksyms/die.c
+++ b/scripts/gendwarfksyms/die.c
@@ -22,6 +22,7 @@ static inline unsigned int die_hash(uintptr_t addr, enum 
die_state state)
 static void init_die(struct die *cd)
 {
cd->state = DIE_INCOMPLETE;
+   cd->mapped = false;
cd->fqn = NULL;
cd->tag = -1;
cd->addr = 0;
@@ -83,6 +84,16 @@ static void reset_die(struct die *cd)
init_die(cd);
 }
 
+void die_map_for_each(die_map_callback_t func, void *arg)
+{
+   struct hlist_node *tmp;
+   struct die *cd;
+
+   hash_for_each_safe(die_map, cd, tmp, hash) {
+   func(cd, arg);
+   }
+}
+
 void die_map_free(void)
 {
struct hlist_node *tmp;
diff --git a/scripts/gendwarfksyms/dwarf.c b/scripts/gendwarfksyms/dwarf.c
index d67cac16f8fb..e1a9e9061b1d 100644
--- a/scripts/gendwarfksyms/dwarf.c
+++ b/scripts/gendwarfksyms/dwarf.c
@@ -728,6 +728,7 @@ static void process_symbol(struct state *state, Dwarf_Die 
*die,
 {
debug("%s", state->sym->name);
check(process_func(state, NULL, die));
+   state->sym->state = SYMBOL_MAPPED;
if (dump_dies)
fputs("\n", stderr);
 }
diff --git a/scripts/gendwarfksyms/gendwarfksyms.c 
b/scripts/gendwarfksyms/gendwarfksyms.c
index d40692a703bb..24c87523fc3a 100644
--- a/scripts/gendwarfksyms/gendwarfksyms.c
+++ b/scripts/gendwarfksyms/gendwarfksyms.c
@@ -21,6 +21,11 @@ int debug;
 int dump_dies;
 /* Print debugging information about die_map changes */
 int dump_die_map;
+/* Print out type strings (i.e. type_map) */
+int dump_types;
+/* Write a symtypes file */
+int symtypes;
+static const char *symtypes_file;
 
 static void usage(void)
 {
@@ -29,6 +34,8 @@ static void usage(void)
  "  -d, --debug  Print debugging information\n"
  "  --dump-dies  Dump DWARF DIE contents\n"
  "  --dump-die-map   Print debugging information about die_map 
changes\n"
+ "  --dump-types Dump type strings\n"
+ "  -T, --symtypes file  Write a symtypes file\n"
  "  -h, --help   Print this message\n"
  "\n",
  stderr);
@@ -41,6 +48,7 @@ static int process_module(Dwfl_Module *mod, void **userdata, 
const char *name,
Dwarf_Die cudie;
Dwarf_CU *cu = NULL;
Dwarf *dbg;
+   FILE *symfile = arg;
int res;
 
debug("%s", name);
@@ -60,6 +68,10 @@ static int process_module(Dwfl_Module *mod, void **userdata, 
const char *name,
process_cu(&cudie);
} while (cu);
 
+   /*
+* Use die_map to expand type strings and write them to `symfile`.
+*/
+   generate_symtypes(symfile);
die_map_free();
 
return DWARF_CB_OK;
@@ -72,22 +84,29 @@ static const Dwfl_Callbacks callbacks = {
 
 int main(int argc, char **argv)
 {
+   FILE *symfile = NULL;
unsigned int n;
int opt;
 
struct option opts[] = { { "debug", 0, NULL, 'd' },
 { "dump-dies", 0, &dump_dies, 1 },
 { "dump-die-map", 0, &dump_die_map, 1 },
+{ "dump-types", 0, &dump_types, 1 },
+{ "symtypes", 1, NULL, 'T' },
 { "help", 0, NULL, 'h' },
 { 0, 0, NULL, 0 } };
 
-   while ((opt = getopt_long(argc, ar

[PATCH v4 13/19] gendwarfksyms: Add symbol versioning

Calculate symbol versions from the fully expanded type strings in
type_map, and output the versions in a genksyms-compatible format.

Signed-off-by: Sami Tolvanen 
Acked-by: Neal Gompa 
---
 scripts/gendwarfksyms/dwarf.c |  25 +-
 scripts/gendwarfksyms/gendwarfksyms.c |  11 ++-
 scripts/gendwarfksyms/gendwarfksyms.h |  13 ++-
 scripts/gendwarfksyms/symbols.c   |  59 +
 scripts/gendwarfksyms/types.c | 122 +-
 5 files changed, 222 insertions(+), 8 deletions(-)

diff --git a/scripts/gendwarfksyms/dwarf.c b/scripts/gendwarfksyms/dwarf.c
index e1a9e9061b1d..a47a3a0f7a69 100644
--- a/scripts/gendwarfksyms/dwarf.c
+++ b/scripts/gendwarfksyms/dwarf.c
@@ -723,12 +723,33 @@ static int process_type(struct state *state, struct die 
*parent, Dwarf_Die *die)
 /*
  * Exported symbol processing
  */
+static struct die *get_symbol_cache(struct state *state, Dwarf_Die *die)
+{
+   struct die *cache;
+
+   cache = die_map_get(die, DIE_SYMBOL);
+
+   if (cache->state != DIE_INCOMPLETE)
+   return NULL; /* We already processed a symbol for this DIE */
+
+   cache->tag = dwarf_tag(die);
+   return cache;
+}
+
 static void process_symbol(struct state *state, Dwarf_Die *die,
   die_callback_t process_func)
 {
+   struct die *cache;
+
+   symbol_set_die(state->sym, die);
+
+   cache = get_symbol_cache(state, die);
+   if (!cache)
+   return;
+
debug("%s", state->sym->name);
-   check(process_func(state, NULL, die));
-   state->sym->state = SYMBOL_MAPPED;
+   check(process_func(state, cache, die));
+   cache->state = DIE_SYMBOL;
if (dump_dies)
fputs("\n", stderr);
 }
diff --git a/scripts/gendwarfksyms/gendwarfksyms.c 
b/scripts/gendwarfksyms/gendwarfksyms.c
index 24c87523fc3a..e90d909d259b 100644
--- a/scripts/gendwarfksyms/gendwarfksyms.c
+++ b/scripts/gendwarfksyms/gendwarfksyms.c
@@ -23,6 +23,8 @@ int dump_dies;
 int dump_die_map;
 /* Print out type strings (i.e. type_map) */
 int dump_types;
+/* Print out expanded type strings used for symbol versions */
+int dump_versions;
 /* Write a symtypes file */
 int symtypes;
 static const char *symtypes_file;
@@ -35,6 +37,7 @@ static void usage(void)
  "  --dump-dies  Dump DWARF DIE contents\n"
  "  --dump-die-map   Print debugging information about die_map 
changes\n"
  "  --dump-types Dump type strings\n"
+ "  --dump-versions  Dump expanded type strings used for 
symbol versions\n"
  "  -T, --symtypes file  Write a symtypes file\n"
  "  -h, --help   Print this message\n"
  "\n",
@@ -69,9 +72,10 @@ static int process_module(Dwfl_Module *mod, void **userdata, 
const char *name,
} while (cu);
 
/*
-* Use die_map to expand type strings and write them to `symfile`.
+* Use die_map to expand type strings, write them to `symfile`, and
+* calculate symbol versions.
 */
-   generate_symtypes(symfile);
+   generate_symtypes_and_versions(symfile);
die_map_free();
 
return DWARF_CB_OK;
@@ -92,6 +96,7 @@ int main(int argc, char **argv)
 { "dump-dies", 0, &dump_dies, 1 },
 { "dump-die-map", 0, &dump_die_map, 1 },
 { "dump-types", 0, &dump_types, 1 },
+{ "dump-versions", 0, &dump_versions, 1 },
 { "symtypes", 1, NULL, 'T' },
 { "help", 0, NULL, 'h' },
 { 0, 0, NULL, 0 } };
@@ -167,5 +172,7 @@ int main(int argc, char **argv)
if (symfile)
check(fclose(symfile));
 
+   symbol_print_versions();
+
return 0;
 }
diff --git a/scripts/gendwarfksyms/gendwarfksyms.h 
b/scripts/gendwarfksyms/gendwarfksyms.h
index e47b5e967520..814f53ef799e 100644
--- a/scripts/gendwarfksyms/gendwarfksyms.h
+++ b/scripts/gendwarfksyms/gendwarfksyms.h
@@ -26,6 +26,7 @@ extern int debug;
 extern int dump_dies;
 extern int dump_die_map;
 extern int dump_types;
+extern int dump_versions;
 extern int symtypes;
 
 /*
@@ -98,6 +99,7 @@ static inline unsigned int addr_hash(uintptr_t addr)
 enum symbol_state {
SYMBOL_UNPROCESSED,
SYMBOL_MAPPED,
+   SYMBOL_PROCESSED
 };
 
 struct symbol_addr {
@@ -112,6 +114,7 @@ struct symbol {
struct hlist_node name_hash;
enum symbol_state state;
uintptr_t die_addr;
+   unsigned long crc;
 };
 
 typedef void (*symbol_callback_t)(struct symbol *, void *arg);
@@ -119,6 +122,10 @@ typedef void (*symbol_callback_t)(struct symbol *, void 
*arg);
 void symbol_read_exports(FILE *file);
 void symbol_read_symtab(int fd);
 struct symbol *symbol_get(const char *name);
+void symbol_set_die(struct symbol *sym, Dwarf_Die *die);
+void symbol_s

[PATCH v4 10/19] gendwarfksyms: Limit structure expansion

Expand each structure type only once per exported symbol. This
is necessary to support self-referential structures, which would
otherwise result in infinite recursion, but is still sufficient for
catching ABI changes.

For pointers, limit structure expansion after the first pointer
in the symbol type. This should be plenty for detecting ABI
differences, but it stops us from pulling in half the kernel for
types that contain pointers to large kernel data structures, like
task_struct, for example.

Signed-off-by: Sami Tolvanen 
Acked-by: Neal Gompa 
---
 scripts/gendwarfksyms/Makefile|   1 +
 scripts/gendwarfksyms/cache.c |  44 +++
 scripts/gendwarfksyms/dwarf.c | 109 +++---
 scripts/gendwarfksyms/gendwarfksyms.h |  37 +
 4 files changed, 182 insertions(+), 9 deletions(-)
 create mode 100644 scripts/gendwarfksyms/cache.c

diff --git a/scripts/gendwarfksyms/Makefile b/scripts/gendwarfksyms/Makefile
index c0d4ce50fc27..c06145d84df8 100644
--- a/scripts/gendwarfksyms/Makefile
+++ b/scripts/gendwarfksyms/Makefile
@@ -2,6 +2,7 @@
 hostprogs-always-y += gendwarfksyms
 
 gendwarfksyms-objs += gendwarfksyms.o
+gendwarfksyms-objs += cache.o
 gendwarfksyms-objs += die.o
 gendwarfksyms-objs += dwarf.o
 gendwarfksyms-objs += symbols.o
diff --git a/scripts/gendwarfksyms/cache.c b/scripts/gendwarfksyms/cache.c
new file mode 100644
index ..2f1517133a20
--- /dev/null
+++ b/scripts/gendwarfksyms/cache.c
@@ -0,0 +1,44 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2024 Google LLC
+ */
+
+#include "gendwarfksyms.h"
+
+struct expanded {
+   uintptr_t addr;
+   struct hlist_node hash;
+};
+
+void __cache_mark_expanded(struct expansion_cache *ec, uintptr_t addr)
+{
+   struct expanded *es;
+
+   es = xmalloc(sizeof(struct expanded));
+   es->addr = addr;
+   hash_add(ec->cache, &es->hash, addr_hash(addr));
+}
+
+bool __cache_was_expanded(struct expansion_cache *ec, uintptr_t addr)
+{
+   struct expanded *es;
+
+   hash_for_each_possible(ec->cache, es, hash, addr_hash(addr)) {
+   if (es->addr == addr)
+   return true;
+   }
+
+   return false;
+}
+
+void cache_clear_expanded(struct expansion_cache *ec)
+{
+   struct hlist_node *tmp;
+   struct expanded *es;
+
+   hash_for_each_safe(ec->cache, es, tmp, hash) {
+   free(es);
+   }
+
+   hash_init(ec->cache);
+}
diff --git a/scripts/gendwarfksyms/dwarf.c b/scripts/gendwarfksyms/dwarf.c
index f5cebbdcc212..51dd8e82f9e7 100644
--- a/scripts/gendwarfksyms/dwarf.c
+++ b/scripts/gendwarfksyms/dwarf.c
@@ -26,6 +26,7 @@ static void process_linebreak(struct die *cache, int n)
   !dwarf_form##attr(&da, value);  \
}
 
+DEFINE_GET_ATTR(flag, bool)
 DEFINE_GET_ATTR(udata, Dwarf_Word)
 
 static bool get_ref_die_attr(Dwarf_Die *die, unsigned int id, Dwarf_Die *value)
@@ -79,6 +80,13 @@ static bool match_export_symbol(struct state *state, 
Dwarf_Die *die)
return !!state->sym;
 }
 
+static bool is_declaration(Dwarf_Die *die)
+{
+   bool value;
+
+   return get_flag_attr(die, DW_AT_declaration, &value) && value;
+}
+
 /*
  * Type string processing
  */
@@ -455,19 +463,28 @@ static void __process_structure_type(struct state *state, 
struct die *cache,
 die_callback_t process_func,
 die_match_callback_t match_func)
 {
+   bool is_decl;
+
process(cache, type);
process_fqn(cache, die);
process(cache, " {");
process_linebreak(cache, 1);
 
-   check(process_die_container(state, cache, die, process_func,
-   match_func));
+   is_decl = is_declaration(die);
+
+   if (!is_decl && state->expand.expand) {
+   cache_mark_expanded(&state->expansion_cache, die->addr);
+   check(process_die_container(state, cache, die, process_func,
+   match_func));
+   }
 
process_linebreak(cache, -1);
process(cache, "}");
 
-   process_byte_size_attr(cache, die);
-   process_alignment_attr(cache, die);
+   if (!is_decl && state->expand.expand) {
+   process_byte_size_attr(cache, die);
+   process_alignment_attr(cache, die);
+   }
 }
 
 #define DEFINE_PROCESS_STRUCTURE_TYPE(structure)\
@@ -520,7 +537,7 @@ static void process_unspecified_type(struct state *state, 
struct die *cache,
 Dwarf_Die *die)
 {
/*
-* These can be emitted for stand-elone assembly code, which means we
+* These can be emitted for stand-alone assembly code, which means we
 * might run into them in vmlinux.o.
 */
process(cache, "unspecified_type");
@@ -552,6 +569,42 @@ static void process_cached(struct state *state, struct die 
*cache,

[PATCH v4 11/19] gendwarfksyms: Add die_map debugging

Debugging the DWARF processing can be somewhat challenging, so add
more detailed debugging output for die_map operations. Add the
--dump-die-map flag, which adds color coded tags to the output for
die_map changes.

Signed-off-by: Sami Tolvanen 
Acked-by: Neal Gompa 
---
 scripts/gendwarfksyms/dwarf.c | 15 +++
 scripts/gendwarfksyms/gendwarfksyms.c |  7 +++
 scripts/gendwarfksyms/gendwarfksyms.h | 13 +
 3 files changed, 35 insertions(+)

diff --git a/scripts/gendwarfksyms/dwarf.c b/scripts/gendwarfksyms/dwarf.c
index 51dd8e82f9e7..d67cac16f8fb 100644
--- a/scripts/gendwarfksyms/dwarf.c
+++ b/scripts/gendwarfksyms/dwarf.c
@@ -103,6 +103,8 @@ static void process(struct die *cache, const char *s)
if (dump_dies)
fputs(s, stderr);
 
+   if (cache)
+   die_debug_r("cache %p string '%s'", cache, s);
die_map_add_string(cache, s);
 }
 
@@ -552,6 +554,8 @@ static void process_cached(struct state *state, struct die 
*cache,
list_for_each_entry(df, &cache->fragments, list) {
switch (df->type) {
case FRAGMENT_STRING:
+   die_debug_b("cache %p STRING '%s'", cache,
+   df->data.str);
process(NULL, df->data.str);
break;
case FRAGMENT_LINEBREAK:
@@ -561,6 +565,8 @@ static void process_cached(struct state *state, struct die 
*cache,
if (!dwarf_die_addr_die(dwarf_cu_getdwarf(die->cu),
(void *)df->data.addr, &child))
error("dwarf_die_addr_die failed");
+   die_debug_b("cache %p DIE addr %" PRIxPTR " tag %x",
+   cache, df->data.addr, dwarf_tag(&child));
check(process_type(state, NULL, &child));
break;
default:
@@ -651,6 +657,9 @@ static int process_type(struct state *state, struct die 
*parent, Dwarf_Die *die)
cache = die_map_get(die, want_state);
 
if (cache->state == want_state) {
+   die_debug_g("cached addr %p tag %x -- %s", die->addr, tag,
+   die_state_name(cache->state));
+
if (want_state == DIE_COMPLETE && is_expanded_type(tag))
cache_mark_expanded(&state->expansion_cache, die->addr);
 
@@ -661,6 +670,9 @@ static int process_type(struct state *state, struct die 
*parent, Dwarf_Die *die)
return 0;
}
 
+   die_debug_g("addr %p tag %x -- %s -> %s", die->addr, tag,
+   die_state_name(cache->state), die_state_name(want_state));
+
switch (tag) {
/* Type modifiers */
PROCESS_TYPE(atomic)
@@ -696,6 +708,9 @@ static int process_type(struct state *state, struct die 
*parent, Dwarf_Die *die)
error("unexpected type: %x", tag);
}
 
+   die_debug_r("parent %p cache %p die addr %p tag %x", parent, cache,
+   die->addr, tag);
+
/* Update cache state and append to the parent (if any) */
cache->tag = tag;
cache->state = want_state;
diff --git a/scripts/gendwarfksyms/gendwarfksyms.c 
b/scripts/gendwarfksyms/gendwarfksyms.c
index 310cc9257d6e..d40692a703bb 100644
--- a/scripts/gendwarfksyms/gendwarfksyms.c
+++ b/scripts/gendwarfksyms/gendwarfksyms.c
@@ -19,6 +19,8 @@
 int debug;
 /* Dump DIE contents */
 int dump_dies;
+/* Print debugging information about die_map changes */
+int dump_die_map;
 
 static void usage(void)
 {
@@ -26,6 +28,7 @@ static void usage(void)
  "Options:\n"
  "  -d, --debug  Print debugging information\n"
  "  --dump-dies  Dump DWARF DIE contents\n"
+ "  --dump-die-map   Print debugging information about die_map 
changes\n"
  "  -h, --help   Print this message\n"
  "\n",
  stderr);
@@ -74,6 +77,7 @@ int main(int argc, char **argv)
 
struct option opts[] = { { "debug", 0, NULL, 'd' },
 { "dump-dies", 0, &dump_dies, 1 },
+{ "dump-die-map", 0, &dump_die_map, 1 },
 { "help", 0, NULL, 'h' },
 { 0, 0, NULL, 0 } };
 
@@ -93,6 +97,9 @@ int main(int argc, char **argv)
}
}
 
+   if (dump_die_map)
+   dump_dies = 1;
+
if (optind >= argc) {
usage();
error("no input files?");
diff --git a/scripts/gendwarfksyms/gendwarfksyms.h 
b/scripts/gendwarfksyms/gendwarfksyms.h
index 6147859ae2af..126916e85ee0 100644
--- a/scripts/gendwarfksyms/gendwarfksyms.h
+++ b/scripts/gendwarfksyms/gendwarfksyms.h
@@ -24,6 +24,7 @@
  */
 extern int debug;
 extern int dump_dies;
+extern int dump_die_map;
 
 /*
  * Output helpers
@@ -46,6 +47,18 @@ extern in

[PATCH v4 08/19] gendwarfksyms: Expand array_type

Add support for expanding DW_TAG_array_type, and the subrange type
indicating array size.

Example source code:

  const char *s[34];

Output with --dump-dies:

  variable array_type[34] {
pointer_type {
  const_type {
base_type char byte_size(1) encoding(6)
  }
} byte_size(8)
  }

Signed-off-by: Sami Tolvanen 
Acked-by: Neal Gompa 
Reviewed-by: Petr Pavlu 
---
 scripts/gendwarfksyms/dwarf.c | 30 ++
 1 file changed, 30 insertions(+)

diff --git a/scripts/gendwarfksyms/dwarf.c b/scripts/gendwarfksyms/dwarf.c
index 7e6b477d7c12..ade9b3b7b119 100644
--- a/scripts/gendwarfksyms/dwarf.c
+++ b/scripts/gendwarfksyms/dwarf.c
@@ -219,6 +219,7 @@ DEFINE_PROCESS_UDATA_ATTRIBUTE(encoding)
}
 
 DEFINE_MATCH(formal_parameter)
+DEFINE_MATCH(subrange)
 
 bool match_all(Dwarf_Die *die)
 {
@@ -341,6 +342,33 @@ DEFINE_PROCESS_TYPE(shared)
 DEFINE_PROCESS_TYPE(volatile)
 DEFINE_PROCESS_TYPE(typedef)
 
+static void process_subrange_type(struct state *state, struct die *cache,
+ Dwarf_Die *die)
+{
+   Dwarf_Word count = 0;
+
+   if (get_udata_attr(die, DW_AT_count, &count))
+   process_fmt(cache, "[%" PRIu64 "]", count);
+   else if (get_udata_attr(die, DW_AT_upper_bound, &count))
+   process_fmt(cache, "[%" PRIu64 "]", count + 1);
+   else
+   process(cache, "[]");
+}
+
+static void process_array_type(struct state *state, struct die *cache,
+  Dwarf_Die *die)
+{
+   process(cache, "array_type");
+   /* Array size */
+   check(process_die_container(state, cache, die, process_type,
+   match_subrange_type));
+   process(cache, " {");
+   process_linebreak(cache, 1);
+   process_type_attr(state, cache, die);
+   process_linebreak(cache, -1);
+   process(cache, "}");
+}
+
 static void __process_subroutine_type(struct state *state, struct die *cache,
  Dwarf_Die *die, const char *type)
 {
@@ -436,7 +464,9 @@ static int process_type(struct state *state, struct die 
*parent, Dwarf_Die *die)
PROCESS_TYPE(volatile)
/* Subtypes */
PROCESS_TYPE(formal_parameter)
+   PROCESS_TYPE(subrange)
/* Other types */
+   PROCESS_TYPE(array)
PROCESS_TYPE(base)
PROCESS_TYPE(subroutine)
PROCESS_TYPE(typedef)
-- 
2.47.0.rc0.187.ge670bccf7e-goog

[PATCH v4 15/19] gendwarfksyms: Add support for reserved and ignored fields

Distributions that want to maintain a stable kABI need the ability
to make ABI compatible changes to kernel data structures without
affecting symbol versions, either because of LTS updates or backports.

With genksyms, developers would typically hide these changes from
version calculation with #ifndef __GENKSYMS__, which would result
in the symbol version not changing even though the actual type has
changed.  When we process precompiled object files, this isn't an
option.

Change union processing to recognize field name prefixes that allow
the user to ignore the union completely during symbol versioning with
a __kabi_ignored prefix in a field name, or to replace the type of a
placeholder field using a __kabi_reserved field name prefix.

For example, assume we want to add a new field to an existing
alignment hole in a data structure, and ignore the new field when
calculating symbol versions:

  struct struct1 {
int a;
/* a 4-byte alignment hole */
unsigned long b;
  };

To add `int n` to the alignment hole, we can add a union that includes
a __kabi_ignored field that causes gendwarfksyms to ignore the entire
union:

  struct struct1 {
int a;
union {
  char __kabi_ignored_0;
  int n;
};
unsigned long b;
  };

With --stable, both structs produce the same symbol version.

Alternatively, when a distribution expects future modification to a
data structure, they can explicitly add reserved fields:

  struct struct2 {
long a;
long __kabi_reserved_0; /* reserved for future use */
  };

To take the field into use, we can again replace it with a union, with
one of the fields keeping the __kabi_reserved name prefix to indicate
the original type:

  struct struct2 {
long a;
union {
  long __kabi_reserved_0;
  struct {
  int b;
  int v;
  };
};

Here gendwarfksyms --stable replaces the union with the type of the
placeholder field when calculating versions.

Signed-off-by: Sami Tolvanen 
Acked-by: Neal Gompa 
---
 scripts/gendwarfksyms/dwarf.c | 202 +-
 scripts/gendwarfksyms/examples/kabi.h |  80 +
 scripts/gendwarfksyms/examples/kabi_ex0.c |  86 +
 scripts/gendwarfksyms/examples/kabi_ex1.c |  89 ++
 scripts/gendwarfksyms/examples/kabi_ex2.c |  98 +++
 scripts/gendwarfksyms/gendwarfksyms.h |  29 
 6 files changed, 583 insertions(+), 1 deletion(-)
 create mode 100644 scripts/gendwarfksyms/examples/kabi_ex0.c
 create mode 100644 scripts/gendwarfksyms/examples/kabi_ex1.c
 create mode 100644 scripts/gendwarfksyms/examples/kabi_ex2.c

diff --git a/scripts/gendwarfksyms/dwarf.c b/scripts/gendwarfksyms/dwarf.c
index b15f1a5db452..72e24140b6e3 100644
--- a/scripts/gendwarfksyms/dwarf.c
+++ b/scripts/gendwarfksyms/dwarf.c
@@ -308,6 +308,9 @@ static void __process_list_type(struct state *state, struct 
die *cache,
 {
const char *name = get_name_attr(die);
 
+   if (stable && is_kabi_prefix(name))
+   name = NULL;
+
process_list_comma(state, cache);
process(cache, type);
process_type_attr(state, cache, die);
@@ -441,11 +444,193 @@ static void process_variant_part_type(struct state 
*state, struct die *cache,
process(cache, "}");
 }
 
+static int get_kabi_status(Dwarf_Die *die)
+{
+   const char *name = get_name_attr(die);
+
+   if (is_kabi_prefix(name)) {
+   name += KABI_PREFIX_LEN;
+
+   if (!strncmp(name, KABI_RESERVED_PREFIX,
+KABI_RESERVED_PREFIX_LEN))
+   return KABI_RESERVED;
+   if (!strncmp(name, KABI_IGNORED_PREFIX,
+KABI_IGNORED_PREFIX_LEN))
+   return KABI_IGNORED;
+   }
+
+   return KABI_NORMAL;
+}
+
+static int check_struct_member_kabi_status(struct state *state,
+  struct die *__unused, Dwarf_Die *die)
+{
+   int res;
+
+   if (dwarf_tag(die) != DW_TAG_member_type)
+   error("expected a member");
+
+   /*
+* If the union member is a struct, expect the __kabi field to
+* be the first member of the structure, i.e..:
+*
+* union {
+*  type new_member;
+*  struct {
+*  type __kabi_field;
+*  }
+* };
+*/
+   res = get_kabi_status(die);
+
+   if (res == KABI_RESERVED &&
+   !get_ref_die_attr(die, DW_AT_type, &state->kabi.placeholder))
+   error("structure member missing a type?");
+
+   return res;
+}
+
+static int check_union_member_kabi_status(struct state *state,
+ struct die *__unused, Dwarf_Die *die)
+{
+   Dwarf_Die type;
+   int res;
+
+   if (dwarf_tag(die) != DW_TAG_member_type)
+   error("expected a member");
+
+   if (!get_ref_die_attr(die, DW_AT_type, &type))
+   error("union mem

[PATCH v4 19/19] Documentation/kbuild: Add DWARF module versioning

Add documentation for gendwarfksyms changes, and the kABI stability
features that can be useful for distributions even though they're not
used in mainline kernels.

Signed-off-by: Sami Tolvanen 
Acked-by: Neal Gompa 
---
 Documentation/kbuild/gendwarfksyms.rst | 274 +
 Documentation/kbuild/index.rst |   1 +
 2 files changed, 275 insertions(+)
 create mode 100644 Documentation/kbuild/gendwarfksyms.rst

diff --git a/Documentation/kbuild/gendwarfksyms.rst 
b/Documentation/kbuild/gendwarfksyms.rst
new file mode 100644
index ..4b89743d2a88
--- /dev/null
+++ b/Documentation/kbuild/gendwarfksyms.rst
@@ -0,0 +1,274 @@
+===
+DWARF module versioning
+===
+
+1. Introduction
+===
+
+When CONFIG_MODVERSIONS is enabled, symbol versions for modules
+are typically calculated from preprocessed source code using the
+**genksyms** tool.  However, this is incompatible with languages such
+as Rust, where the source code has insufficient information about
+the resulting ABI. With CONFIG_GENDWARFKSYMS (and CONFIG_DEBUG_INFO)
+selected, **gendwarfksyms** is used instead to calculate symbol versions
+from the DWARF debugging information, which contains the necessary
+details about the final module ABI.
+
+1.1. Usage
+==
+
+gendwarfksyms accepts a list of object files on the command line, and a
+list of symbol names (one per line) in standard input::
+
+Usage: gendwarfksyms [options] elf-object-file ... < symbol-list
+
+Options:
+  -d, --debug  Print debugging information
+  --dump-dies  Dump DWARF DIE contents
+  --dump-die-map   Print debugging information about die_map 
changes
+  --dump-types Dump type strings
+  --dump-versions  Dump expanded type strings used for symbol 
versions
+  -s, --stable Support kABI stability features
+  -T, --symtypes file  Write a symtypes file
+  -h, --help   Print this message
+
+
+2. Type information availability
+
+
+While symbols are typically exported in the same translation unit (TU)
+where they're defined, it's also perfectly fine for a TU to export
+external symbols. For example, this is done when calculating symbol
+versions for exports in stand-alone assembly code.
+
+To ensure the compiler emits the necessary DWARF type information in the
+TU where symbols are actually exported, gendwarfksyms adds a pointer
+to exported symbols in the `EXPORT_SYMBOL()` macro using the following
+macro::
+
+#define __GENDWARFKSYMS_EXPORT(sym) \
+static typeof(sym) *__gendwarfksyms_ptr_##sym __used\
+__section(".discard.gendwarfksyms") = &sym;
+
+
+When a symbol pointer is found in DWARF, gendwarfksyms can use its
+type for calculating symbol versions even if the symbol is defined
+elsewhere. The name of the symbol pointer is expected to start with
+`__gendwarfksyms_ptr_`, followed by the name of the exported symbol.
+
+3. Symtypes output format
+=
+
+Similarly to genksyms, gendwarfksyms supports writing a symtypes file
+for each processed object that contain types for exported symbols and
+each referenced type that was used in calculating symbol versions. These
+files can be useful when trying to determine what exactly caused symbol
+versions to change between builds.
+
+Matching the existing format, the first column of each line contains
+either a type reference or a symbol name. Type references have a
+one-letter prefix followed by "#" and the name of the type. Four
+reference types are supported::
+
+e# = enum
+s# = struct
+t# = typedef
+u# = union
+
+Type names with spaces in them are wrapped in single quotes, e.g.::
+
+s#'core::result::Result'
+
+The rest of the line contains a type string. Unlike with genksyms that
+produces C-style type strings, gendwarfksyms uses the same simple parsed
+DWARF format produced by **--dump-dies**, but with type references
+instead of fully expanded strings.
+
+4. Maintaining a stable kABI
+
+
+Distribution maintainers often need the ability to make ABI compatible
+changes to kernel data structures due to LTS updates or backports. Using
+the traditional `#ifndef __GENKSYMS__` to hide these changes from symbol
+versioning won't work when processing object files. To support this
+use case, gendwarfksyms provides kABI stability features designed to
+hide changes that won't affect the ABI when calculating versions. These
+features are all gated behind the **--stable** command line flag and are
+not used in the mainline kernel.
+
+Examples for using these features are provided in the
+**scripts/gendwarfksyms/examples** directory, including helper macros
+for source code annotation. Note that as these features are only used to
+transform the inp

[PATCH v4 16/19] gendwarfksyms: Add support for symbol type pointers

The compiler may choose not to emit type information in DWARF for
external symbols. Clang, for example, does this for symbols not
defined in the current TU.

To provide a way to work around this issue, add support for
__gendwarfksyms_ptr_ pointers that force the compiler to emit
the necessary type information in DWARF also for the missing symbols.

Example usage:

  #define GENDWARFKSYMS_PTR(sym) \
  static typeof(sym) *__gendwarfksyms_ptr_##sym __used  \
  __section(".discard.gendwarfksyms") = &sym;

  extern int external_symbol(void);
  GENDWARFKSYMS_PTR(external_symbol);

Signed-off-by: Sami Tolvanen 
Acked-by: Neal Gompa 
---
 scripts/gendwarfksyms/dwarf.c  | 55 +-
 scripts/gendwarfksyms/examples/symbolptr.c | 33 +
 scripts/gendwarfksyms/gendwarfksyms.h  |  7 +++
 scripts/gendwarfksyms/symbols.c| 27 +++
 4 files changed, 121 insertions(+), 1 deletion(-)
 create mode 100644 scripts/gendwarfksyms/examples/symbolptr.c

diff --git a/scripts/gendwarfksyms/dwarf.c b/scripts/gendwarfksyms/dwarf.c
index 72e24140b6e3..0112b5e8fbf5 100644
--- a/scripts/gendwarfksyms/dwarf.c
+++ b/scripts/gendwarfksyms/dwarf.c
@@ -992,6 +992,31 @@ static void process_variable(struct state *state, 
Dwarf_Die *die)
process_symbol(state, die, __process_variable);
 }
 
+static void save_symbol_ptr(struct state *state)
+{
+   Dwarf_Die ptr_type;
+   Dwarf_Die type;
+
+   if (!get_ref_die_attr(&state->die, DW_AT_type, &ptr_type) ||
+   dwarf_tag(&ptr_type) != DW_TAG_pointer_type)
+   error("%s must be a pointer type!",
+ get_symbol_name(&state->die));
+
+   if (!get_ref_die_attr(&ptr_type, DW_AT_type, &type))
+   error("%s pointer missing a type attribute?",
+ get_symbol_name(&state->die));
+
+   /*
+* Save the symbol pointer DIE in case the actual symbol is
+* missing from the DWARF. Clang, for example, intentionally
+* omits external symbols from the debugging information.
+*/
+   if (dwarf_tag(&type) == DW_TAG_subroutine_type)
+   symbol_set_ptr(state->sym, &type);
+   else
+   symbol_set_ptr(state->sym, &ptr_type);
+}
+
 static int process_exported_symbols(struct state *unused, struct die *cache,
Dwarf_Die *die)
 {
@@ -1015,7 +1040,9 @@ static int process_exported_symbols(struct state *unused, 
struct die *cache,
 
state_init(&state);
 
-   if (tag == DW_TAG_subprogram)
+   if (is_symbol_ptr(get_symbol_name(&state.die)))
+   save_symbol_ptr(&state);
+   else if (tag == DW_TAG_subprogram)
process_subprogram(&state, &state.die);
else
process_variable(&state, &state.die);
@@ -1028,8 +1055,34 @@ static int process_exported_symbols(struct state 
*unused, struct die *cache,
}
 }
 
+static void process_symbol_ptr(struct symbol *sym, void *arg)
+{
+   struct state state;
+   Dwarf *dwarf = arg;
+
+   if (sym->state != SYMBOL_UNPROCESSED || !sym->ptr_die_addr)
+   return;
+
+   debug("%s", sym->name);
+   state_init(&state);
+   state.sym = sym;
+
+   if (!dwarf_die_addr_die(dwarf, (void *)sym->ptr_die_addr, &state.die))
+   error("dwarf_die_addr_die failed for symbol ptr: '%s'",
+ sym->name);
+
+   if (dwarf_tag(&state.die) == DW_TAG_subroutine_type)
+   process_subprogram(&state, &state.die);
+   else
+   process_variable(&state, &state.die);
+
+   cache_clear_expanded(&state.expansion_cache);
+}
+
 void process_cu(Dwarf_Die *cudie)
 {
check(process_die_container(NULL, NULL, cudie, process_exported_symbols,
match_all));
+
+   symbol_for_each(process_symbol_ptr, dwarf_cu_getdwarf(cudie->cu));
 }
diff --git a/scripts/gendwarfksyms/examples/symbolptr.c 
b/scripts/gendwarfksyms/examples/symbolptr.c
new file mode 100644
index ..b7b97cd39769
--- /dev/null
+++ b/scripts/gendwarfksyms/examples/symbolptr.c
@@ -0,0 +1,33 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2024 Google LLC
+ *
+ * Example for symbol pointers. When compiled with Clang, gendwarfkyms
+ * uses a symbol pointer for `f`.
+ *
+ * $ clang -g -c examples/symbolptr.c examples/symbolptr.o
+ * $ echo -e "f\ng\np" | ./gendwarfksyms -d examples/symbolptr.o
+ */
+
+/* Kernel macros for userspace testing. */
+#ifndef __used
+#define __used __attribute__((__used__))
+#endif
+#ifndef __section
+#define __section(section) __attribute__((__section__(section)))
+#endif
+
+#define __GENDWARFKSYMS_EXPORT(sym)\
+   static typeof(sym) *__gendwarfksyms_ptr_##sym __used\
+   __section(".discard.gendwarfksyms") = &sym;
+
+extern void f(unsigned int arg);
+v

[PATCH v4 18/19] kbuild: Add gendwarfksyms as an alternative to genksyms

When MODVERSIONS is enabled, allow selecting gendwarfksyms as the
implementation, but default to genksyms.

Signed-off-by: Sami Tolvanen 
Acked-by: Neal Gompa 
---
 kernel/module/Kconfig  | 25 -
 scripts/Makefile   |  2 +-
 scripts/Makefile.build | 39 +++
 3 files changed, 56 insertions(+), 10 deletions(-)

diff --git a/kernel/module/Kconfig b/kernel/module/Kconfig
index f9e5f82fa88b..e6b2427e5c19 100644
--- a/kernel/module/Kconfig
+++ b/kernel/module/Kconfig
@@ -169,13 +169,36 @@ config MODVERSIONS
  make them incompatible with the kernel you are running.  If
  unsure, say N.
 
+choice
+   prompt "Module versioning implementation"
+   depends on MODVERSIONS
+   default GENKSYMS
+   help
+ Select the tool used to calculate symbol versions for modules.
+
+ If unsure, select GENKSYMS.
+
+config GENKSYMS
+   bool "genksyms (from source code)"
+   help
+ Calculate symbol versions from pre-processed source code using
+ genksyms.
+
+ If unsure, say Y.
+
 config GENDWARFKSYMS
-   bool
+   bool "gendwarfksyms (from debugging information)"
depends on DEBUG_INFO
# Requires full debugging information, split DWARF not supported.
depends on !DEBUG_INFO_REDUCED && !DEBUG_INFO_SPLIT
# Requires ELF object files.
depends on !LTO
+   help
+ Calculate symbol versions from DWARF debugging information using
+ gendwarfksyms. Requires DEBUG_INFO to be enabled.
+
+ If unsure, say N.
+endchoice
 
 config ASM_MODVERSIONS
bool
diff --git a/scripts/Makefile b/scripts/Makefile
index d7fec46d38c0..8533f4498885 100644
--- a/scripts/Makefile
+++ b/scripts/Makefile
@@ -53,7 +53,7 @@ hostprogs += unifdef
 targets += module.lds
 
 subdir-$(CONFIG_GCC_PLUGINS) += gcc-plugins
-subdir-$(CONFIG_MODVERSIONS) += genksyms
+subdir-$(CONFIG_GENKSYMS) += genksyms
 subdir-$(CONFIG_GENDWARFKSYMS) += gendwarfksyms
 subdir-$(CONFIG_SECURITY_SELINUX) += selinux
 subdir-$(CONFIG_SECURITY_IPE) += ipe
diff --git a/scripts/Makefile.build b/scripts/Makefile.build
index 8f423a1faf50..ae13afb71123 100644
--- a/scripts/Makefile.build
+++ b/scripts/Makefile.build
@@ -107,18 +107,28 @@ cmd_cpp_i_c   = $(CPP) $(c_flags) -o $@ $<
 $(obj)/%.i: $(obj)/%.c FORCE
$(call if_changed_dep,cpp_i_c)
 
+gendwarfksyms := scripts/gendwarfksyms/gendwarfksyms
+getexportsymbols = $(NM) $(1) | sed -n 's/.* __export_symbol_\(.*\)/$(2)/p'
+
 genksyms = scripts/genksyms/genksyms   \
$(if $(1), -T $(2)) \
$(if $(KBUILD_PRESERVE), -p)\
-r $(or $(wildcard $(2:.symtypes=.symref)), /dev/null)
 
 # These mirror gensymtypes_S and co below, keep them in synch.
+ifdef CONFIG_GENDWARFKSYMS
+symtypes_dep_c = $(obj)/%.o
+cmd_gensymtypes_c = $(if $(skip_gendwarfksyms),,   \
+   $(call getexportsymbols,$(2:.symtypes=.o),\1) | \
+   $(gendwarfksyms) $(2:.symtypes=.o) $(if $(1), --symtypes $(2)))
+else
 cmd_gensymtypes_c = $(CPP) -D__GENKSYMS__ $(c_flags) $< | $(genksyms)
+endif # CONFIG_GENDWARFKSYMS
 
 quiet_cmd_cc_symtypes_c = SYM $(quiet_modtag) $@
   cmd_cc_symtypes_c = $(call cmd_gensymtypes_c,true,$@) >/dev/null
 
-$(obj)/%.symtypes : $(obj)/%.c FORCE
+$(obj)/%.symtypes : $(obj)/%.c $(symtypes_dep_c) FORCE
$(call cmd,cc_symtypes_c)
 
 # LLVM assembly
@@ -314,19 +324,32 @@ $(obj)/%.ll: $(obj)/%.rs FORCE
 # This is convoluted. The .S file must first be preprocessed to run guards and
 # expand names, then the resulting exports must be constructed into plain
 # EXPORT_SYMBOL(symbol); to build our dummy C file, and that gets preprocessed
-# to make the genksyms input.
+# to make the genksyms input or compiled into an object for gendwarfksyms.
 #
 # These mirror gensymtypes_c and co above, keep them in synch.
-cmd_gensymtypes_S = \
-   { echo "\#include " ;\
- echo "\#include " ;  \
- $(NM) $@ | sed -n 's/.* __export_symbol_\(.*\)/EXPORT_SYMBOL(\1);/p' ; } 
| \
-$(CPP) -D__GENKSYMS__ $(c_flags) -xc - | $(genksyms)
+getasmexports =
\
+   { echo "\#include " ;   \
+ echo "\#include " ;   \
+ echo "\#include " ; \
+ $(call getexportsymbols,$(2:.symtypes=.o),EXPORT_SYMBOL(\1);) ; }
+
+ifdef CONFIG_GENDWARFKSYMS
+cmd_gensymtypes_S =\
+   $(getasmexports) |  \
+   $(CC) $(c_flags) -c -o $(2:.symtypes=.gendwarfksyms.o) -xc -;   \
+   $(call getexportsymbols,$(2:.symtypes=.o),\1) | \
+   $(gendwarfksyms) $(2:.symtypes=.gendwarfksyms.o)

[PATCH v4 17/19] export: Add __gendwarfksyms_ptr_ references to exported symbols

With gendwarfksyms, we need each TU where the EXPORT_SYMBOL() macro
is used to also contain DWARF type information for the symbols it
exports.  However, as a TU can also export external symbols and
compilers may choose not to emit debugging information for symbols not
defined in the current TU, the missing types will result in missing
symbol versions. Stand-alone assembly code also doesn't contain type
information for exported symbols, so we need to compile a temporary
object file with asm-prototypes.h instead, and similarly need to
ensure the DWARF in the temporary object file contains the necessary
types.

To always emit type information for external exports, add explicit
__gendwarfksyms_ptr_ references to them in EXPORT_SYMBOL().
gendwarfksyms will use the type information for __gendwarfksyms_ptr_*
if needed. Discard the pointers from the final binary to avoid further
bloat.

Signed-off-by: Sami Tolvanen 
Acked-by: Neal Gompa 
---
 include/linux/export.h | 15 +++
 1 file changed, 15 insertions(+)

diff --git a/include/linux/export.h b/include/linux/export.h
index 0bbd02fd351d..cf71d3202e5b 100644
--- a/include/linux/export.h
+++ b/include/linux/export.h
@@ -52,9 +52,24 @@
 
 #else
 
+#ifdef CONFIG_GENDWARFKSYMS
+/*
+ * With CONFIG_GENDWARFKSYMS, ensure the compiler emits debugging
+ * information for all exported symbols, including those defined in
+ * different TUs, by adding a __gendwarfksyms_ptr_ pointer
+ * that's discarded during the final link.
+ */
+#define __GENDWARFKSYMS_EXPORT(sym)\
+   static typeof(sym) *__gendwarfksyms_ptr_##sym __used\
+   __section(".discard.gendwarfksyms") = &sym;
+#else
+#define __GENDWARFKSYMS_EXPORT(sym)
+#endif
+
 #define __EXPORT_SYMBOL(sym, license, ns)  \
extern typeof(sym) sym; \
__ADDRESSABLE(sym)  \
+   __GENDWARFKSYMS_EXPORT(sym) \
asm(__stringify(___EXPORT_SYMBOL(sym, license, ns)))
 
 #endif
-- 
2.47.0.rc0.187.ge670bccf7e-goog

Re: [RFC PATCH v3 3/4] hazptr: Implement Hazard Pointers


On 2024-10-08 15:50, Mathieu Desnoyers wrote:
[...]


+/* Retire the protected hazard pointer from @slot. */
+static inline
+void hazptr_retire(struct hazptr_slot *slot, void *addr)
+{
+   WARN_ON_ONCE(slot->addr != addr);
+   smp_store_release(&slot->addr, NULL);
+}


Actually, comparing this with the literature and past presentations
from Maged Michael, "retire" is not the appropriate name here.

With Hazard Pointers, AFAIU, the "retire" operation is similar to a
call_rcu() memory reclaim. It marks the object for eventual reclamation
when it is safe to do so.

The API here really just releases the recently protected hazard
pointer. So I will rename this to "hazptr_release()".

At this stage, this hazptr API does not implement any retire operation,
and only offers the hazptr_scan() for reader/updater synchronization,
leaving the actual reclaim to the caller.

Thanks,

Mathieu

--
Mathieu Desnoyers
EfficiOS Inc.
https://www.efficios.com

Re: [PATCH v10 7/7] remoteproc: stm32: Add support of an OP-TEE TA to load the firmware

2024-10-08 Thread kernel test robot

Hi Arnaud,

kernel test robot noticed the following build errors:

[auto build test ERROR on 9852d85ec9d492ebef56dc5f229416c925758edc]

url:
https://github.com/intel-lab-lkp/linux/commits/Arnaud-Pouliquen/remoteproc-core-Introduce-rproc_pa_to_va-helper/20241007-212358
base:   9852d85ec9d492ebef56dc5f229416c925758edc
patch link:
https://lore.kernel.org/r/20241007131620.2090104-8-arnaud.pouliquen%40foss.st.com
patch subject: [PATCH v10 7/7] remoteproc: stm32: Add support of an OP-TEE TA 
to load the firmware
config: arm-randconfig-r054-20241008 
(https://download.01.org/0day-ci/archive/20241009/202410090225.d3bzaaqk-...@intel.com/config)
compiler: clang version 20.0.0git (https://github.com/llvm/llvm-project 
fef3566a25ff0e34fb87339ba5e13eca17cec00f)
reproduce (this is a W=1 build): 
(https://download.01.org/0day-ci/archive/20241009/202410090225.d3bzaaqk-...@intel.com/reproduce)

If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot 
| Closes: 
https://lore.kernel.org/oe-kbuild-all/202410090225.d3bzaaqk-...@intel.com/

All errors (new ones prefixed by >>):

>> ld.lld: error: undefined symbol: tee_client_invoke_func
   >>> referenced by remoteproc_tee.c
   >>>   drivers/remoteproc/remoteproc_tee.o:(tee_rproc_release_fw) 
in archive vmlinux.a
   >>> referenced by remoteproc_tee.c
   >>>   drivers/remoteproc/remoteproc_tee.o:(tee_rproc_load_fw) in 
archive vmlinux.a
   >>> referenced by remoteproc_tee.c
   >>>   drivers/remoteproc/remoteproc_tee.o:(tee_rproc_parse_fw) 
in archive vmlinux.a
   >>> referenced 3 more times
--
>> ld.lld: error: undefined symbol: tee_shm_register_kernel_buf
   >>> referenced by remoteproc_tee.c
   >>>   drivers/remoteproc/remoteproc_tee.o:(tee_rproc_load_fw) in 
archive vmlinux.a
--
>> ld.lld: error: undefined symbol: tee_shm_free
   >>> referenced by remoteproc_tee.c
   >>>   drivers/remoteproc/remoteproc_tee.o:(tee_rproc_load_fw) in 
archive vmlinux.a
--
>> ld.lld: error: undefined symbol: tee_client_open_session
   >>> referenced by remoteproc_tee.c
   >>>   drivers/remoteproc/remoteproc_tee.o:(tee_rproc_register) 
in archive vmlinux.a
--
>> ld.lld: error: undefined symbol: tee_client_close_session
   >>> referenced by remoteproc_tee.c
   >>>   drivers/remoteproc/remoteproc_tee.o:(tee_rproc_unregister) 
in archive vmlinux.a
   >>> referenced by remoteproc_tee.c
   >>>   drivers/remoteproc/remoteproc_tee.o:(tee_rproc_remove) in 
archive vmlinux.a
--
>> ld.lld: error: undefined symbol: tee_client_open_context
   >>> referenced by remoteproc_tee.c
   >>>   drivers/remoteproc/remoteproc_tee.o:(tee_rproc_probe) in 
archive vmlinux.a
--
>> ld.lld: error: undefined symbol: tee_client_close_context
   >>> referenced by remoteproc_tee.c
   >>>   drivers/remoteproc/remoteproc_tee.o:(tee_rproc_probe) in 
archive vmlinux.a
   >>> referenced by remoteproc_tee.c
   >>>   drivers/remoteproc/remoteproc_tee.o:(tee_rproc_remove) in 
archive vmlinux.a
--
>> ld.lld: error: undefined symbol: tee_bus_type
   >>> referenced by remoteproc_tee.c
   >>>   drivers/remoteproc/remoteproc_tee.o:(tee_rproc_fw_driver) 
in archive vmlinux.a

Kconfig warnings: (for reference only)
   WARNING: unmet direct dependencies detected for REMOTEPROC_TEE
   Depends on [n]: REMOTEPROC [=y] && OPTEE [=n]
   Selected by [y]:
   - STM32_RPROC [=y] && (ARCH_STM32 [=n] || COMPILE_TEST [=y]) && REMOTEPROC 
[=y]

-- 
0-DAY CI Kernel Test Service
https://github.com/intel/lkp-tests/wiki

Re: [RFC PATCH 30/39] KVM: guest_memfd: Handle folio preparation for guest_memfd mmap

2024-10-08 Thread Sean Christopherson

On Thu, Oct 03, 2024, Ackerley Tng wrote:
> Ackerley Tng  writes:
> 
> > Elliot Berman  writes:
> >> From x86 CoCo perspective, I think it also makes sense to not zero
> >> the folio when changing faultiblity from private to shared:
> >>  - If guest is sharing some data with host, you've wiped the data and
> >>guest has to copy again.
> >>  - Or, if SEV/TDX enforces that page is zero'd between transitions,
> >>Linux has duplicated the work that trusted entity has already done.
> >>
> >> Fuad and I can help add some details for the conversion. Hopefully we
> >> can figure out some of the plan at plumbers this week.
> >
> > Zeroing the page prevents leaking host data (see function docstring for
> > kvm_gmem_prepare_folio() introduced in [1]), so we definitely don't want
> > to introduce a kernel data leak bug here.
> 
> Actually it seems like filemap_grab_folio() already gets a zeroed page.
> 
> filemap_grab_folio() eventually calls __alloc_pages_noprof()
> -> get_page_from_freelist()
>-> prep_new_page()
>   -> post_alloc_hook()
> 
> and post_alloc_hook() calls kernel_init_pages(), which zeroes the page,
> depending on kernel config.
> 
> Paolo, was calling clear_highpage() in kvm_gmem_prepare_folio() zeroing an
> already empty page returned from filemap_grab_folio()?

Yes and no.  CONFIG_INIT_ON_ALLOC_DEFAULT_ON and init_on_alloc are very much
hardening features, not functional behavior that other code _needs_ to be aware
of.  E.g. enabling init-on-alloc comes with a measurable performance cost.

Ignoring hardening, the guest_memfd mapping specifically sets the gfp_mask to
GFP_HIGHUSER, i.e. doesn't set __GFP_ZERO.

That said, I wouldn't be opposed to skipping the clear_highpage() call when
want_init_on_alloc() is true.

Also, the intended behavior (or at least, what  intended) of 
kvm_gmem_prepare_folio()
was it would do clear_highpage() if and only if a trusted entity does NOT zero
the page.  Factoring that in is a bit harder, as it probably requires another
arch hook (or providing an out-param from kvm_arch_gmem_prepare()).  I.e. the
want_init_on_alloc() case isn't the only time KVM could shave cycles by not
redundantly zeroing memory.

Re: [PATCH] selftest: hid: add the missing tests directory


On 10/8/24 03:31, Yun Lu wrote:

Commit 160c826b4dd0 ("selftest: hid: add missing run-hid-tools-tests.sh")
has added the run-hid-tools-tests.sh script for it to be installed, but
I forgot to add the tests directory together.

In fact, the run-hid-tools-tests.sh script uses the scripts in the tests
directory to run tests. The tests directory also needs to be added to be
installed

Include the error you are seeing in here.



Fixes: ffb85d5c9e80 ("selftests: hid: import hid-tools hid-core tests")
Cc: sta...@vger.kernel.org
Signed-off-by: Yun Lu 
---
  tools/testing/selftests/hid/Makefile | 1 +
  1 file changed, 1 insertion(+)

diff --git a/tools/testing/selftests/hid/Makefile 
b/tools/testing/selftests/hid/Makefile
index 38ae31bb07b5..662209f5fabc 100644
--- a/tools/testing/selftests/hid/Makefile
+++ b/tools/testing/selftests/hid/Makefile
@@ -18,6 +18,7 @@ TEST_PROGS += hid-usb_crash.sh
  TEST_PROGS += hid-wacom.sh
  
  TEST_FILES := run-hid-tools-tests.sh

+TEST_FILES += tests


What about the files if any under the tests directory?
The install rule would handle the case, however, did
you verify that those are copied as well?

  
  CXX ?= $(CROSS_COMPILE)g++
  


thanks,
-- Shuah

Re: [RFC PATCH 30/39] KVM: guest_memfd: Handle folio preparation for guest_memfd mmap

2024-10-08 Thread Ackerley Tng

Patrick Roy  writes:

> Hi Ackerley,
>
> On Thu, 2024-10-03 at 22:32 +0100, Ackerley Tng wrote:
>> Elliot Berman  writes:
>>
>>> On Tue, Sep 10, 2024 at 11:44:01PM +, Ackerley Tng wrote:
 Since guest_memfd now supports mmap(), folios have to be prepared
 before they are faulted into userspace.

 When memory attributes are switched between shared and private, the
 up-to-date flags will be cleared.

 Use the folio's up-to-date flag to indicate being ready for the guest
 usage and can be used to mark whether the folio is ready for shared OR
 private use.
>>>
>>> Clearing the up-to-date flag also means that the page gets zero'd out
>>> whenever it transitions between shared and private (either direction).
>>> pKVM (Android) hypervisor policy can allow in-place conversion between
>>> shared/private.
>>>
>>> I believe the important thing is that sev_gmem_prepare() needs to be
>>> called prior to giving page to guest. In my series, I had made a
>>> ->prepare_inaccessible() callback where KVM would only do this part.
>>> When transitioning to inaccessible, only that callback would be made,
>>> besides the bookkeeping. The folio zeroing happens once when allocating
>>> the folio if the folio is initially accessible (faultable).
>>>
>>> From x86 CoCo perspective, I think it also makes sense to not zero
>>> the folio when changing faultiblity from private to shared:
>>>  - If guest is sharing some data with host, you've wiped the data and
>>>guest has to copy again.
>>>  - Or, if SEV/TDX enforces that page is zero'd between transitions,
>>>Linux has duplicated the work that trusted entity has already done.
>>>
>>> Fuad and I can help add some details for the conversion. Hopefully we
>>> can figure out some of the plan at plumbers this week.
>>
>> Zeroing the page prevents leaking host data (see function docstring for
>> kvm_gmem_prepare_folio() introduced in [1]), so we definitely don't want
>> to introduce a kernel data leak bug here.
>>
>> In-place conversion does require preservation of data, so for
>> conversions, shall we zero depending on VM type?
>>
>> + Gunyah: don't zero since ->prepare_inaccessible() is a no-op
>> + pKVM: don't zero
>> + TDX: don't zero
>> + SEV: AMD Architecture Programmers Manual 7.10.6 says there is no
>>   automatic encryption and implies no zeroing, hence perform zeroing
>> + KVM_X86_SW_PROTECTED_VM: Doesn't have a formal definition so I guess
>>   we could require zeroing on transition?
>
> Maybe for KVM_X86_SW_PROTECTED_VM we could make zero-ing configurable
> via some CREATE_GUEST_MEMFD flag, instead of forcing one specific
> behavior.

Sounds good to me, I can set up a flag in the next revision.

> For the "non-CoCo with direct map entries removed" VMs that we at AWS
> are going for, we'd like a VM type with host-controlled in-place
> conversions which doesn't zero on transitions, so if
> KVM_X86_SW_PROTECTED_VM ends up zeroing, we'd need to add another new VM
> type for that.
>
> Somewhat related sidenote: For VMs that allow inplace conversions and do
> not zero, we do not need to zap the stage-2 mappings on memory attribute
> changes, right?
>

Here are some reasons for zapping I can think of:

1. When private pages are split/merged, zapping the stage-2 mappings on
   memory attribute changes allows the private pages to be re-faulted by
   KVM at smaller/larger granularity.

2. The rationale described here
   https://elixir.bootlin.com/linux/v6.11.2/source/arch/x86/kvm/mmu/mmu.c#L7482
   ("Zapping SPTEs in this case ensures KVM will reassess whether or not
   a hugepage can be used for affected ranges.") probably refers to the
   existing implementation, when a different set of physical pages is
   used to back shared and private memory. When the same set of physical
   pages is used for both shared and private memory, then IIUC this
   rationale does not apply.

3. There's another rationale for zapping
   https://elixir.bootlin.com/linux/v6.11.2/source/virt/kvm/kvm_main.c#L2494
   to do with read vs write mappings here. I don't fully understand
   this, does this rationale still apply?

4. Is zapping required if the pages get removed/added to kernel direct
   map?

>> This way, the uptodate flag means that it has been prepared (as in
>> sev_gmem_prepare()), and zeroed if required by VM type.
>>
>> Regarding flushing the dcache/tlb in your other question [2], if we
>> don't use folio_zero_user(), can we relying on unmapping within core-mm
>> to flush after shared use, and unmapping within KVM To flush after
>> private use?
>>
>> Or should flush_dcache_folio() be explicitly called on kvm_gmem_fault()?
>>
>> clear_highpage(), used in the non-hugetlb (original) path, doesn't flush
>> the dcache. Was that intended?
>>
>>> Thanks,
>>> Elliot
>>>

 
>>
>> [1] https://lore.kernel.org/all/20240726185157.72821-8-pbonz...@redhat.com/
>> [2] 
>> https://lore.kernel.org/all/diqz34ldszp3@ackerleytng-ctop.c.googlers.com/
>
> Best,
> P

Re: [PATCH v8 8/8] clk: Add KUnit tests for clks registered with struct clk_parent_data

2024-10-08 Thread Stephen Boyd

Quoting Guenter Roeck (2024-10-03 21:52:09)
> On 10/3/24 17:42, Stephen Boyd wrote:
> > 
> > Can you please describe how you run the kunit test? And provide the qemu
> > command you run to boot arm64 with acpi?
> > 
> 
> Example command line:
> 
> qemu-system-aarch64 -M virt -m 512 \
>   -kernel arch/arm64/boot/Image -no-reboot -nographic \
>   -snapshot \
>   -bios /opt/buildbot/rootfs/arm64/../firmware/QEMU_EFI-aarch64.fd \
>   -device virtio-blk-device,drive=d0 \
>   -drive file=rootfs.ext2,if=none,id=d0,format=raw \
>   -cpu cortex-a57 -serial stdio -monitor none -no-reboot \
>   --append "kunit.stats_enabled=2 kunit.filter=speed>slow root=/dev/vda 
> rootwait earlycon=pl011,0x900 console=ttyAMA0"
> 
> That works fine for me. Configuration is arm64 defconfig plus various
> debug and kunit options. I built the efi image myself from sources.
> The root file system is from buildroot with modified init script.
> kunit tests are all built into the kernel and run during boot.

Thanks. I figured out that I was missing enabling CONFIG_ACPI. Here's my
commandline

./tools/testing/kunit/kunit.py run --arch=arm64 \
--kunitconfig=drivers/of \
--qemu_args="-bios /usr/share/qemu-efi-aarch64/QEMU_EFI.fd -smp 2" \
--kconfig_add="CONFIG_ACPI=y" \
--kernel_args="earlycon=pl011,0x900"

Now I can boot and reproduce the failure, but there's another problem.
ACPI disables itself when it fails to find tables.

 ACPI: Unable to load the System Description Tables

This calls disable_acpi() which sets acpi_disabled to 1. This happens
before the unit test runs, meaning we can't reliably use 'acpi_disabled'
as a method to skip.

The best I can come up with then is to test for a NULL of_root when
CONFIG_ARM64 and CONFIG_ACPI are enabled, because the tests
intentionally don't work when both those configs are enabled and the
'of_root' isn't populated. In all other cases the 'of_root' missing is a
bug. I'll probably make this into some sort of kunit helper function in
of_private.h and send it to DT maintainers.

---8<
diff --git a/drivers/of/of_kunit_helpers.c b/drivers/of/of_kunit_helpers.c
index 287d6c91bb37..a1330e183230 100644
--- a/drivers/of/of_kunit_helpers.c
+++ b/drivers/of/of_kunit_helpers.c
@@ -36,6 +36,9 @@ int of_overlay_fdt_apply_kunit(struct kunit *test, void 
*overlay_fdt,
int ret;
int *copy_id;
 
+   if (IS_ENABLED(CONFIG_ARM64) && IS_ENABLED(CONFIG_ACPI) && !of_root)
+   kunit_skip(test, "arm64+acpi rejects overlays");
+
copy_id = kunit_kmalloc(test, sizeof(*copy_id), GFP_KERNEL);
if (!copy_id)
return -ENOMEM;
diff --git a/drivers/of/of_test.c b/drivers/of/of_test.c
index c85a258bc6ae..6cca43bf8029 100644
--- a/drivers/of/of_test.c
+++ b/drivers/of/of_test.c
@@ -38,6 +38,8 @@ static int of_dtb_test_init(struct kunit *test)
 {
if (!IS_ENABLED(CONFIG_OF_EARLY_FLATTREE))
kunit_skip(test, "requires CONFIG_OF_EARLY_FLATTREE");
+   if (IS_ENABLED(CONFIG_ARM64) && IS_ENABLED(CONFIG_ACPI) && !of_root)
+   kunit_skip(test, "arm64+acpi doesn't populate a root node on 
ACPI systems");
 
return 0;
 }
diff --git a/drivers/of/overlay_test.c b/drivers/of/overlay_test.c
index 19a292cdeee3..3e7ac97a6796 100644
--- a/drivers/of/overlay_test.c
+++ b/drivers/of/overlay_test.c
@@ -64,6 +64,8 @@ static void of_overlay_apply_kunit_cleanup(struct kunit *test)
 
if (!IS_ENABLED(CONFIG_OF_EARLY_FLATTREE))
kunit_skip(test, "requires CONFIG_OF_EARLY_FLATTREE for root 
node");
+   if (IS_ENABLED(CONFIG_ARM64) && IS_ENABLED(CONFIG_ACPI) && !of_root)
+   kunit_skip(test, "arm64+acpi rejects overlays");
 
kunit_init_test(&fake, "fake test", NULL);
KUNIT_ASSERT_EQ(test, fake.status, KUNIT_SUCCESS);

Re: [PATCH v8 8/8] clk: Add KUnit tests for clks registered with struct clk_parent_data

2024-10-08 Thread Guenter Roeck


On 10/8/24 16:12, Stephen Boyd wrote:

Quoting Guenter Roeck (2024-10-03 21:52:09)

On 10/3/24 17:42, Stephen Boyd wrote:


Can you please describe how you run the kunit test? And provide the qemu
command you run to boot arm64 with acpi?



Example command line:

qemu-system-aarch64 -M virt -m 512 \
   -kernel arch/arm64/boot/Image -no-reboot -nographic \
   -snapshot \
   -bios /opt/buildbot/rootfs/arm64/../firmware/QEMU_EFI-aarch64.fd \
   -device virtio-blk-device,drive=d0 \
   -drive file=rootfs.ext2,if=none,id=d0,format=raw \
   -cpu cortex-a57 -serial stdio -monitor none -no-reboot \
   --append "kunit.stats_enabled=2 kunit.filter=speed>slow root=/dev/vda 
rootwait earlycon=pl011,0x900 console=ttyAMA0"

That works fine for me. Configuration is arm64 defconfig plus various
debug and kunit options. I built the efi image myself from sources.
The root file system is from buildroot with modified init script.
kunit tests are all built into the kernel and run during boot.


Thanks. I figured out that I was missing enabling CONFIG_ACPI. Here's my
commandline

./tools/testing/kunit/kunit.py run --arch=arm64 \
--kunitconfig=drivers/of \
--qemu_args="-bios /usr/share/qemu-efi-aarch64/QEMU_EFI.fd -smp 2" \
--kconfig_add="CONFIG_ACPI=y" \
--kernel_args="earlycon=pl011,0x900"

Now I can boot and reproduce the failure, but there's another problem.
ACPI disables itself when it fails to find tables.

  ACPI: Unable to load the System Description Tables

This calls disable_acpi() which sets acpi_disabled to 1. This happens
before the unit test runs, meaning we can't reliably use 'acpi_disabled'
as a method to skip.

The best I can come up with then is to test for a NULL of_root when
CONFIG_ARM64 and CONFIG_ACPI are enabled, because the tests
intentionally don't work when both those configs are enabled and the
'of_root' isn't populated. In all other cases the 'of_root' missing is a
bug. I'll probably make this into some sort of kunit helper function in
of_private.h and send it to DT maintainers.


Sounds good. Thanks a lot for tracking this down.

That makes me wonder though why only arm64 has that restriction. Both
riscv and loongarch have ACPI enabled in their defconfig files but call
unflatten_device_tree() unconditionally.

Oh well ...

Thanks,
Guenter

Re: [PATCH v3] selftests: sched_ext: Add sched_ext as proper selftest target

2024-10-08 Thread David Vernet

On Tue, Oct 08, 2024 at 05:35:18PM +0200, Björn Töpel wrote:
> From: Björn Töpel 
> 
> The sched_ext selftests is missing proper cross-compilation support, a
> proper target entry, and out-of-tree build support.
> 
> When building the kselftest suite, e.g.:
> 
>   make ARCH=riscv CROSS_COMPILE=riscv64-linux-gnu-  \
> TARGETS=sched_ext SKIP_TARGETS="" O=/output/foo \
> -C tools/testing/selftests install
> 
> or:
> 
>   make ARCH=arm64 LLVM=1 TARGETS=sched_ext SKIP_TARGETS="" \
> O=/output/foo -C tools/testing/selftests install
> 
> The expectation is that the sched_ext is included, cross-built, the
> correct toolchain is picked up, and placed into /output/foo.
> 
> In contrast to the BPF selftests, the sched_ext suite does not use
> bpftool at test run-time, so it is sufficient to build bpftool for the
> build host only.
> 
> Add ARCH, CROSS_COMPILE, OUTPUT, and TARGETS support to the sched_ext
> selftest. Also, remove some variables that were unused by the
> Makefile.
> 
> Signed-off-by: Björn Töpel 

Thanks for the improvements!

Acked-by: David Vernet 


signature.asc
Description: PGP signature

Re: [PATCH] selftests: make kselftest-clean remove libynl outputs

2024-10-08 Thread Mina Almasry

On Sat, Oct 5, 2024 at 2:56 PM Greg Thelen  wrote:
>
> Starting with 6.12 commit 85585b4bc8d8 ("selftests: add ncdevmem, netcat
> for devmem TCP") kselftest-all creates additional outputs that
> kselftest-clean does not cleanup:
>   $ make defconfig
>   $ make kselftest-all
>   $ make kselftest-clean
>   $ git clean -ndxf | grep tools/net
>   Would remove tools/net/ynl/lib/__pycache__/
>   Would remove tools/net/ynl/lib/ynl.a
>   Would remove tools/net/ynl/lib/ynl.d
>   Would remove tools/net/ynl/lib/ynl.o
>
> Make kselftest-clean remove the newly added net/ynl outputs.
>
> Fixes: 85585b4bc8d8 ("selftests: add ncdevmem, netcat for devmem TCP")

Probably moot since this is merged already (sorry for the delay), but
likely it's:

Fixes: 07c3cc51a085 ("tools: net: package libynl for use in selftests")

The referenced patch doesn't touch ynl.mk, but adds a dependency on it

> Signed-off-by: Greg Thelen 

Thank you Greg,

Reviewed-by: Mina Almasry 



-- 
Thanks,
Mina

Re: [PATCH 3/3] rcu: Report callbacks enqueued on offline CPU blind spot

2024-10-08 Thread Paul E. McKenney

On Wed, Oct 02, 2024 at 05:00:03PM +0200, Frederic Weisbecker wrote:
> Le Wed, Oct 02, 2024 at 04:57:38PM +0200, Frederic Weisbecker a écrit :
> > Callbacks enqueued after rcutree_report_cpu_dead() fall into RCU barrier
> > blind spot. Report any potential misuse.
> > 
> > Reported-by: Paul E. McKenney 
> > Signed-off-by: Frederic Weisbecker 
> > ---
> >  kernel/rcu/tree.c | 3 +++
> >  1 file changed, 3 insertions(+)
> > 
> > diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
> > index a60616e69b66..36070b6bf4a1 100644
> > --- a/kernel/rcu/tree.c
> > +++ b/kernel/rcu/tree.c
> > @@ -3084,8 +3084,11 @@ __call_rcu_common(struct rcu_head *head, 
> > rcu_callback_t func, bool lazy_in)
> > head->func = func;
> > head->next = NULL;
> > kasan_record_aux_stack_noalloc(head);
> > +
> > local_irq_save(flags);
> > rdp = this_cpu_ptr(&rcu_data);
> > +   RCU_LOCKDEP_WARN(rcu_rdp_cpu_online(rdp), "Callback enqueued on offline
> > CPU!");
> 
> This should be !rcu_rdp_cpu_online(rdp)
> 
> Sigh...

I am pulling this in for testing with this change, thank you!

Thanx, Paul

> > +
> > lazy = lazy_in && !rcu_async_should_hurry();
> >  
> > /* Add the callback to our list. */
> > -- 
> > 2.46.0
> > 
> >

Re: [PATCH net-next v8 03/24] ovpn: add basic netlink support

On 08/10/2024 10:58, Jiri Pirko wrote:

Tue, Oct 08, 2024 at 10:01:40AM CEST, anto...@openvpn.net wrote:

Hi,

On 07/10/24 17:32, Jiri Pirko wrote:

Wed, Oct 02, 2024 at 11:02:17AM CEST, anto...@openvpn.net wrote:

[...]

Why you expose new and del here in ovn specific generic netlink iface?
Why can't you use the exising RTNL api which is used for creation and
destruction of other types of devices?

Can you please point me to the message id?

from Sergey and subsequent replies.

RTNL vs NL topic starts right after the definition of 'ovpn_link_ops'

Recently Kuniyuki commented on this topic as well in:
<20240919055259.17622-1-kun...@amazon.com>
and that is why I added a default dellink implemetation.

However, recently we decided to add a dellink implementation for better
integration with network namespaces and to allow the user to wipe a dangling
interface.

Hmm, one more argument to have symmetric add/del impletentation in RTNL

That would be quite confusing to have RTNL and genetlink iface to
add/del device. From what you described above, makes more sent to have
it just in RTNL

All in all I tend to agree.

Well. technically, you can probabaly do both. But it is quite common
that you can add/delete these kind of devices over RTNL. Lots of
examples. People are used to it, aligns with existing flows.

The only counterargument I see is the one brought by Sergey: "the ovpn
interface is not usable after creation, if no openvpn process is running".

However, allowing to create "persistent interfaces" will define a
use-case for having an ovpn device without any userspace process.

@Sergey what is your opinion here? I am not sure persistent interfaces
were discussed at the time you brought your point about RTNL vs NL.

Regards,

Thanks a lot for your contribution.

Regards,

ip link delete { DEVICE | dev DEVICE | group DEVGROUP } type TYPE [ ARGS ]

Lots of examples of existing types creation is for example here:
https://developers.redhat.com/blog/2018/10/22/introduction-to-linux-interfaces-for-virtual-networking

+ attribute-set: ovpn
+ flags: [ admin-perm ]
+ doc: Delete existing interface of type ovpn
+ do:
+pre: ovpn-nl-pre-doit
+post: ovpn-nl-post-doit
+request:
+ attributes:
+- ifindex

[...]

--
Antonio Quartulli
OpenVPN Inc.

[PATCH V2 4/4] selftests/mm: skip virtual_address_range tests on riscv

RISC-V doesn't currently have the behavior of restricting the virtual
address space which virtual_address_range tests check, this will
cause the tests fail. So lets disable the whole test suite for riscv64
for now, not build it and run_vmtests.sh will skip it if it is not present.

Reviewed-by: Charlie Jenkins 
Signed-off-by: Chunyan Zhang 
---
V1: https://lore.kernel.org/linux-mm/ZuOuedBpS7i3T%2Fo0@ghost/T/
---
 tools/testing/selftests/mm/Makefile   |  2 ++
 tools/testing/selftests/mm/run_vmtests.sh | 10 ++
 2 files changed, 8 insertions(+), 4 deletions(-)

diff --git a/tools/testing/selftests/mm/Makefile 
b/tools/testing/selftests/mm/Makefile
index 02e1204971b0..76a378c5c141 100644
--- a/tools/testing/selftests/mm/Makefile
+++ b/tools/testing/selftests/mm/Makefile
@@ -115,7 +115,9 @@ endif
 
 ifneq (,$(filter $(ARCH),arm64 mips64 parisc64 powerpc riscv64 s390x sparc64 
x86_64 s390))
 TEST_GEN_FILES += va_high_addr_switch
+ifneq ($(ARCH),riscv64)
 TEST_GEN_FILES += virtual_address_range
+endif
 TEST_GEN_FILES += write_to_hugetlbfs
 endif
 
diff --git a/tools/testing/selftests/mm/run_vmtests.sh 
b/tools/testing/selftests/mm/run_vmtests.sh
index c5797ad1d37b..4493bfd1911c 100755
--- a/tools/testing/selftests/mm/run_vmtests.sh
+++ b/tools/testing/selftests/mm/run_vmtests.sh
@@ -347,10 +347,12 @@ if [ $VADDR64 -ne 0 ]; then
# allows high virtual address allocation requests independent
# of platform's physical memory.
 
-   prev_policy=$(cat /proc/sys/vm/overcommit_memory)
-   echo 1 > /proc/sys/vm/overcommit_memory
-   CATEGORY="hugevm" run_test ./virtual_address_range
-   echo $prev_policy > /proc/sys/vm/overcommit_memory
+   if [ -x ./virtual_address_range ]; then
+   prev_policy=$(cat /proc/sys/vm/overcommit_memory)
+   echo 1 > /proc/sys/vm/overcommit_memory
+   CATEGORY="hugevm" run_test ./virtual_address_range
+   echo $prev_policy > /proc/sys/vm/overcommit_memory
+   fi
 
# va high address boundary switch test
ARCH_ARM64="arm64"
-- 
2.34.1

[PATCH V2 0/4] A few fixes for RISC-V

These patches are all simple fixes with no strong dependency though,
I hope that making them a patchset will be more convenient for merge.

The patchset are based on v6.12-rc2.

Chunyan Zhang (4):
  riscv: Remove unused GENERATING_ASM_OFFSETS
  riscv: Remove duplicated GET_RM
  selftest/mm: Fix typo in virtual_address_range
  selftests/mm: skip virtual_address_range tests on riscv

 arch/riscv/kernel/asm-offsets.c|  2 --
 arch/riscv/kernel/traps_misaligned.c   |  2 --
 tools/testing/selftests/mm/Makefile|  2 ++
 tools/testing/selftests/mm/run_vmtests.sh  | 10 ++
 tools/testing/selftests/mm/virtual_address_range.c |  4 ++--
 5 files changed, 10 insertions(+), 10 deletions(-)

-- 
2.34.1

[PATCH V2 3/4] selftest/mm: Fix typo in virtual_address_range

The function name should be *hint* address, so correct it.

Reviewed-by: Charlie Jenkins 
Signed-off-by: Chunyan Zhang 
---
V1: https://lore.kernel.org/linux-mm/ZuOuedBpS7i3T%2Fo0@ghost/T/
---
 tools/testing/selftests/mm/virtual_address_range.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tools/testing/selftests/mm/virtual_address_range.c 
b/tools/testing/selftests/mm/virtual_address_range.c
index 4e4c1e311247..2a2b69e91950 100644
--- a/tools/testing/selftests/mm/virtual_address_range.c
+++ b/tools/testing/selftests/mm/virtual_address_range.c
@@ -64,7 +64,7 @@
 #define NR_CHUNKS_HIGH  NR_CHUNKS_384TB
 #endif
 
-static char *hind_addr(void)
+static char *hint_addr(void)
 {
int bits = HIGH_ADDR_SHIFT + rand() % (63 - HIGH_ADDR_SHIFT);
 
@@ -185,7 +185,7 @@ int main(int argc, char *argv[])
}
 
for (i = 0; i < NR_CHUNKS_HIGH; i++) {
-   hint = hind_addr();
+   hint = hint_addr();
hptr[i] = mmap(hint, MAP_CHUNK_SIZE, PROT_READ | PROT_WRITE,
   MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
 
-- 
2.34.1

[PATCH V2 1/4] riscv: Remove unused GENERATING_ASM_OFFSETS

The macro is not used in the current version of kernel, it looks like
can be removed to avoid a build warning:

../arch/riscv/kernel/asm-offsets.c: At top level:
../arch/riscv/kernel/asm-offsets.c:7: warning: macro "GENERATING_ASM_OFFSETS" 
is not used [-Wunused-macros]
7 | #define GENERATING_ASM_OFFSETS

Fixes: 9639a44394b9 ("RISC-V: Provide a cleaner raw_smp_processor_id()")
Reviewed-by: Alexandre Ghiti 
Tested-by: Alexandre Ghiti 
Signed-off-by: Chunyan Zhang 
---
V1: 
https://lore.kernel.org/lkml/20240816101912.1049329-1-zhangchun...@iscas.ac.cn/T/
---
 arch/riscv/kernel/asm-offsets.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/arch/riscv/kernel/asm-offsets.c b/arch/riscv/kernel/asm-offsets.c
index e94180ba432f..c2f3129a8e5c 100644
--- a/arch/riscv/kernel/asm-offsets.c
+++ b/arch/riscv/kernel/asm-offsets.c
@@ -4,8 +4,6 @@
  * Copyright (C) 2017 SiFive
  */
 
-#define GENERATING_ASM_OFFSETS
-
 #include 
 #include 
 #include 
-- 
2.34.1

[PATCH V2 2/4] riscv: Remove duplicated GET_RM