[PATCH bpf-next v4 3/3] libbpf: Use mmap to parse vmlinux BTF from sysfs

2025-05-10 Thread Lorenz Bauer
Teach libbpf to use mmap when parsing vmlinux BTF from /sys. We don't
apply this to fall-back paths on the regular file system because there
is no way to ensure that modifications underlying the MAP_PRIVATE
mapping are not visible to the process.

Signed-off-by: Lorenz Bauer 
---
 tools/lib/bpf/btf.c | 85 +++--
 1 file changed, 69 insertions(+), 16 deletions(-)

diff --git a/tools/lib/bpf/btf.c b/tools/lib/bpf/btf.c
index 
f18d7e6a453cd9e5c384487659df04f7efafdf5a..42815a29c0a52a1a7eed2c6b22b9b1754ae01c9a
 100644
--- a/tools/lib/bpf/btf.c
+++ b/tools/lib/bpf/btf.c
@@ -12,6 +12,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
@@ -120,6 +121,9 @@ struct btf {
/* whether base_btf should be freed in btf_free for this instance */
bool owns_base;
 
+   /* whether raw_data is a (read-only) mmap */
+   bool raw_data_is_mmap;
+
/* BTF object FD, if loaded into kernel */
int fd;
 
@@ -951,6 +955,17 @@ static bool btf_is_modifiable(const struct btf *btf)
return (void *)btf->hdr != btf->raw_data;
 }
 
+static void btf_free_raw_data(struct btf *btf)
+{
+   if (btf->raw_data_is_mmap) {
+   munmap(btf->raw_data, btf->raw_size);
+   btf->raw_data_is_mmap = false;
+   } else {
+   free(btf->raw_data);
+   }
+   btf->raw_data = NULL;
+}
+
 void btf__free(struct btf *btf)
 {
if (IS_ERR_OR_NULL(btf))
@@ -970,7 +985,7 @@ void btf__free(struct btf *btf)
free(btf->types_data);
strset__free(btf->strs_set);
}
-   free(btf->raw_data);
+   btf_free_raw_data(btf);
free(btf->raw_data_swapped);
free(btf->type_offs);
if (btf->owns_base)
@@ -1030,7 +1045,7 @@ struct btf *btf__new_empty_split(struct btf *base_btf)
return libbpf_ptr(btf_new_empty(base_btf));
 }
 
-static struct btf *btf_new(const void *data, __u32 size, struct btf *base_btf)
+static struct btf *btf_new(const void *data, __u32 size, struct btf *base_btf, 
bool is_mmap)
 {
struct btf *btf;
int err;
@@ -1050,12 +1065,18 @@ static struct btf *btf_new(const void *data, __u32 
size, struct btf *base_btf)
btf->start_str_off = base_btf->hdr->str_len;
}
 
-   btf->raw_data = malloc(size);
-   if (!btf->raw_data) {
-   err = -ENOMEM;
-   goto done;
+   if (is_mmap) {
+   btf->raw_data = (void *)data;
+   btf->raw_data_is_mmap = true;
+   } else {
+   btf->raw_data = malloc(size);
+   if (!btf->raw_data) {
+   err = -ENOMEM;
+   goto done;
+   }
+   memcpy(btf->raw_data, data, size);
}
-   memcpy(btf->raw_data, data, size);
+
btf->raw_size = size;
 
btf->hdr = btf->raw_data;
@@ -1083,12 +1104,12 @@ static struct btf *btf_new(const void *data, __u32 
size, struct btf *base_btf)
 
 struct btf *btf__new(const void *data, __u32 size)
 {
-   return libbpf_ptr(btf_new(data, size, NULL));
+   return libbpf_ptr(btf_new(data, size, NULL, false));
 }
 
 struct btf *btf__new_split(const void *data, __u32 size, struct btf *base_btf)
 {
-   return libbpf_ptr(btf_new(data, size, base_btf));
+   return libbpf_ptr(btf_new(data, size, base_btf, false));
 }
 
 struct btf_elf_secs {
@@ -1209,7 +1230,7 @@ static struct btf *btf_parse_elf(const char *path, struct 
btf *base_btf,
 
if (secs.btf_base_data) {
dist_base_btf = btf_new(secs.btf_base_data->d_buf, 
secs.btf_base_data->d_size,
-   NULL);
+   NULL, false);
if (IS_ERR(dist_base_btf)) {
err = PTR_ERR(dist_base_btf);
dist_base_btf = NULL;
@@ -1218,7 +1239,7 @@ static struct btf *btf_parse_elf(const char *path, struct 
btf *base_btf,
}
 
btf = btf_new(secs.btf_data->d_buf, secs.btf_data->d_size,
- dist_base_btf ?: base_btf);
+ dist_base_btf ?: base_btf, false);
if (IS_ERR(btf)) {
err = PTR_ERR(btf);
goto done;
@@ -1335,7 +1356,7 @@ static struct btf *btf_parse_raw(const char *path, struct 
btf *base_btf)
}
 
/* finally parse BTF data */
-   btf = btf_new(data, sz, base_btf);
+   btf = btf_new(data, sz, base_btf, false);
 
 err_out:
free(data);
@@ -1354,6 +1375,36 @@ struct btf *btf__parse_raw_split(const char *path, 
struct btf *base_btf)
return libbpf_ptr(btf_parse_raw(path, base_btf));
 }
 
+static struct btf *btf_parse_raw_mmap(const char *path, struct btf *base_btf)
+{
+   struct stat st;
+   void *data;
+   struct btf *btf;
+   int fd, err;
+
+   fd = open(path, O_RDONLY);
+   if (fd < 0)
+   return libbpf_err_ptr(-errno);
+
+

[PATCH bpf-next v4 0/3] Allow mmap of /sys/kernel/btf/vmlinux

2025-05-10 Thread Lorenz Bauer
I'd like to cut down the memory usage of parsing vmlinux BTF in ebpf-go.
With some upcoming changes the library is sitting at 5MiB for a parse.
Most of that memory is simply copying the BTF blob into user space.
By allowing vmlinux BTF to be mmapped read-only into user space I can
cut memory usage by about 75%.

Signed-off-by: Lorenz Bauer 
---
Changes in v4:
- Go back to remap_pfn_range for aarch64 compat
- Dropped btf_new_no_copy (Andrii)
- Fixed nits in selftests (Andrii)
- Clearer error handling in the mmap handler (Andrii)
- Fixed build on s390
- Link to v3: 
https://lore.kernel.org/r/20250505-vmlinux-mmap-v3-0-5d53afa06...@isovalent.com

Changes in v3:
- Remove slightly confusing calculation of trailing (Alexei)
- Use vm_insert_page (Alexei)
- Simplified libbpf code
- Link to v2: 
https://lore.kernel.org/r/20250502-vmlinux-mmap-v2-0-95c271434...@isovalent.com

Changes in v2:
- Use btf__new in selftest
- Avoid vm_iomap_memory in btf_vmlinux_mmap
- Add VM_DONTDUMP
- Add support to libbpf
- Link to v1: 
https://lore.kernel.org/r/20250501-vmlinux-mmap-v1-0-aa2724572...@isovalent.com

---
Lorenz Bauer (3):
  btf: allow mmap of vmlinux btf
  selftests: bpf: add a test for mmapable vmlinux BTF
  libbpf: Use mmap to parse vmlinux BTF from sysfs

 include/asm-generic/vmlinux.lds.h  |  3 +-
 kernel/bpf/sysfs_btf.c | 32 
 tools/lib/bpf/btf.c| 85 ++
 tools/testing/selftests/bpf/prog_tests/btf_sysfs.c | 81 +
 4 files changed, 184 insertions(+), 17 deletions(-)
---
base-commit: 7220eabff8cb4af3b93cd021aa853b9f5df2923f
change-id: 20250501-vmlinux-mmap-2ec5563c3ef1

Best regards,
-- 
Lorenz Bauer 




[PATCH bpf-next v4 1/3] btf: allow mmap of vmlinux btf

2025-05-10 Thread Lorenz Bauer
User space needs access to kernel BTF for many modern features of BPF.
Right now each process needs to read the BTF blob either in pieces or
as a whole. Allow mmaping the sysfs file so that processes can directly
access the memory allocated for it in the kernel.

remap_pfn_range is used instead of vm_insert_page due to aarch64
compatibility issues.

Signed-off-by: Lorenz Bauer 
---
 include/asm-generic/vmlinux.lds.h |  3 ++-
 kernel/bpf/sysfs_btf.c| 32 
 2 files changed, 34 insertions(+), 1 deletion(-)

diff --git a/include/asm-generic/vmlinux.lds.h 
b/include/asm-generic/vmlinux.lds.h
index 
58a635a6d5bdf0c53c267c2a3d21a5ed8678ce73..1750390735fac7637cc4d2fa05f96cb2a36aa448
 100644
--- a/include/asm-generic/vmlinux.lds.h
+++ b/include/asm-generic/vmlinux.lds.h
@@ -667,10 +667,11 @@ defined(CONFIG_AUTOFDO_CLANG) || 
defined(CONFIG_PROPELLER_CLANG)
  */
 #ifdef CONFIG_DEBUG_INFO_BTF
 #define BTF\
+   . = ALIGN(PAGE_SIZE);   \
.BTF : AT(ADDR(.BTF) - LOAD_OFFSET) {   \
BOUNDED_SECTION_BY(.BTF, _BTF)  \
}   \
-   . = ALIGN(4);   \
+   . = ALIGN(PAGE_SIZE);   \
.BTF_ids : AT(ADDR(.BTF_ids) - LOAD_OFFSET) {   \
*(.BTF_ids) \
}
diff --git a/kernel/bpf/sysfs_btf.c b/kernel/bpf/sysfs_btf.c
index 
81d6cf90584a7157929c50f62a5c6862e7a3d081..941d0d2427e3a2d27e8f1cff7b6424d0d41817c1
 100644
--- a/kernel/bpf/sysfs_btf.c
+++ b/kernel/bpf/sysfs_btf.c
@@ -7,14 +7,46 @@
 #include 
 #include 
 #include 
+#include 
+#include 
+#include 
 
 /* See scripts/link-vmlinux.sh, gen_btf() func for details */
 extern char __start_BTF[];
 extern char __stop_BTF[];
 
+static int btf_sysfs_vmlinux_mmap(struct file *filp, struct kobject *kobj,
+ const struct bin_attribute *attr,
+ struct vm_area_struct *vma)
+{
+   unsigned long pages = PAGE_ALIGN(attr->size) >> PAGE_SHIFT;
+   size_t vm_size = vma->vm_end - vma->vm_start;
+   phys_addr_t addr = virt_to_phys(__start_BTF);
+   unsigned long pfn = addr >> PAGE_SHIFT;
+
+   if (attr->private != __start_BTF || !PAGE_ALIGNED(addr))
+   return -EINVAL;
+
+   if (vma->vm_pgoff)
+   return -EINVAL;
+
+   if (vma->vm_flags & (VM_WRITE | VM_EXEC | VM_MAYSHARE))
+   return -EACCES;
+
+   if (pfn + pages < pfn)
+   return -EINVAL;
+
+   if ((vm_size >> PAGE_SHIFT) > pages)
+   return -EINVAL;
+
+   vm_flags_mod(vma, VM_DONTDUMP, VM_MAYEXEC | VM_MAYWRITE);
+   return remap_pfn_range(vma, vma->vm_start, pfn, vm_size, 
vma->vm_page_prot);
+}
+
 static struct bin_attribute bin_attr_btf_vmlinux __ro_after_init = {
.attr = { .name = "vmlinux", .mode = 0444, },
.read_new = sysfs_bin_attr_simple_read,
+   .mmap = btf_sysfs_vmlinux_mmap,
 };
 
 struct kobject *btf_kobj;

-- 
2.49.0




[PATCH bpf-next v4 2/3] selftests: bpf: add a test for mmapable vmlinux BTF

2025-05-10 Thread Lorenz Bauer
Add a basic test for the ability to mmap /sys/kernel/btf/vmlinux.
Ensure that the data is valid BTF and that it is padded with zero.

Signed-off-by: Lorenz Bauer 
---
 tools/testing/selftests/bpf/prog_tests/btf_sysfs.c | 81 ++
 1 file changed, 81 insertions(+)

diff --git a/tools/testing/selftests/bpf/prog_tests/btf_sysfs.c 
b/tools/testing/selftests/bpf/prog_tests/btf_sysfs.c
new file mode 100644
index 
..3923e64c4c1d0f1dfeef2a39c7bbab7c9a19f0ca
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/btf_sysfs.c
@@ -0,0 +1,81 @@
+// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
+/* Copyright (c) 2025 Isovalent */
+
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+
+static void test_btf_mmap_sysfs(const char *path, struct btf *base)
+{
+   struct stat st;
+   __u64 btf_size, end;
+   void *raw_data = NULL;
+   int fd = -1;
+   long page_size;
+   struct btf *btf = NULL;
+
+   page_size = sysconf(_SC_PAGESIZE);
+   if (!ASSERT_GE(page_size, 0, "get_page_size"))
+   goto cleanup;
+
+   if (!ASSERT_OK(stat(path, &st), "stat_btf"))
+   goto cleanup;
+
+   btf_size = st.st_size;
+   end = (btf_size + page_size - 1) / page_size * page_size;
+
+   fd = open(path, O_RDONLY);
+   if (!ASSERT_GE(fd, 0, "open_btf"))
+   goto cleanup;
+
+   raw_data = mmap(NULL, btf_size, PROT_READ | PROT_WRITE, MAP_PRIVATE, 
fd, 0);
+   if (!ASSERT_EQ(raw_data, MAP_FAILED, "mmap_btf_writable"))
+   goto cleanup;
+
+   raw_data = mmap(NULL, btf_size, PROT_READ, MAP_SHARED, fd, 0);
+   if (!ASSERT_EQ(raw_data, MAP_FAILED, "mmap_btf_shared"))
+   goto cleanup;
+
+   raw_data = mmap(NULL, end + 1, PROT_READ, MAP_PRIVATE, fd, 0);
+   if (!ASSERT_EQ(raw_data, MAP_FAILED, "mmap_btf_invalid_size"))
+   goto cleanup;
+
+   raw_data = mmap(NULL, end, PROT_READ, MAP_PRIVATE, fd, 0);
+   if (!ASSERT_OK_PTR(raw_data, "mmap_btf"))
+   goto cleanup;
+
+   if (!ASSERT_EQ(mprotect(raw_data, btf_size, PROT_READ | PROT_WRITE), -1,
+   "mprotect_writable"))
+   goto cleanup;
+
+   if (!ASSERT_EQ(mprotect(raw_data, btf_size, PROT_READ | PROT_EXEC), -1,
+   "mprotect_executable"))
+   goto cleanup;
+
+   /* Check padding is zeroed */
+   for (int i = btf_size; i < end; i++) {
+   if (((__u8 *)raw_data)[i] != 0) {
+   PRINT_FAIL("tail of BTF is not zero at page offset 
%d\n", i);
+   goto cleanup;
+   }
+   }
+
+   btf = btf__new_split(raw_data, btf_size, base);
+   if (!ASSERT_OK_PTR(btf, "parse_btf"))
+   goto cleanup;
+
+cleanup:
+   btf__free(btf);
+   if (raw_data && raw_data != MAP_FAILED)
+   munmap(raw_data, btf_size);
+   if (fd >= 0)
+   close(fd);
+}
+
+void test_btf_sysfs(void)
+{
+   test_btf_mmap_sysfs("/sys/kernel/btf/vmlinux", NULL);
+}

-- 
2.49.0




Re: [PATCH v2 06/19] iommufd/viommu: Add IOMMU_VIOMMU_SET/UNSET_VDEV_ID ioctl

2025-05-10 Thread Alexey Kardashevskiy




On 10/5/25 08:07, Jason Gunthorpe wrote:

On Fri, May 09, 2025 at 12:57:18PM +1000, Alexey Kardashevskiy wrote:



On 7/5/25 22:24, Jason Gunthorpe wrote:

On Wed, May 07, 2025 at 09:18:29PM +1000, Alexey Kardashevskiy wrote:


We should not destroy the vdevice for something like that. In a CC
case that would unplug it from the VM which is not right.


vdevice is not directly seen by the guest, is not it? The guest will
see, for example, an "AMD IOMMU" and assume there is device table
for all 64K devices, and so on, it is QEMU which will be
reallocating vdevice in the host's IOMMUFD. Did I miss something
here? Thanks,


I imagined the vdevice would trigger the CC call to create the vPCI
function in the guest attached to the secure iommu.


What is this vPCI going to look like inside the guest? There still
be AMD IOMMU PCI function and passed through to-be-trusted PCI
function.


However the VMM tells it to look using the arguments to the create
vdevice ioctl?

I don't understand the question?


"We should not destroy the vdevice" confused me. It is not a device which a VM would see 
(or is it?), it is a QEMU/IOMMUFD object which we can create/destroy at any time (well, may be not, 
but the VM does not have a say in this). And then this vPCI thing - what is it, synonym to 
"vdevice" or a different thing?


--
Alexey




Re: [PATCH 4/9] CodingStyle: mention "typedef struct S {} S;" if typedef is used

2025-05-10 Thread Mauro Carvalho Chehab
Em Fri,  9 May 2025 23:34:25 +0300
Alexey Dobriyan  escreveu:

> Signed-off-by: Alexey Dobriyan 
> ---
>  Documentation/process/coding-style.rst | 14 ++
>  1 file changed, 14 insertions(+)
> 
> diff --git a/Documentation/process/coding-style.rst 
> b/Documentation/process/coding-style.rst
> index ac9c1dbe00b7..5c5902a0f897 100644
> --- a/Documentation/process/coding-style.rst
> +++ b/Documentation/process/coding-style.rst
> @@ -443,6 +443,20 @@ EVER use a typedef unless you can clearly match one of 
> those rules.
>  In general, a pointer, or a struct that has elements that can reasonably
>  be directly accessed should **never** be a typedef.
>  
> +If you must use ``typedef`` consider using identical names for both the type
> +and its alias so that the type can be forward declared if necessary:

Better not, as symbols with duplicated names will generate a Sphinx warning 
(*). 

(*) It shouldn't, but there is a pending issue on Sphinx since version 3.1
still not addressed:

https://github.com/sphinx-doc/sphinx/pull/8313

Regards,

Thanks,
Mauro



Re: [PATCH 3/3] KVM: arm64: selftests: arch_timer_edge_cases - workaround for AC03_CPU_14

2025-05-10 Thread Marc Zyngier
On Fri, 09 May 2025 15:33:12 +0100,
Sebastian Ott  wrote:
> 
> arch_timer_edge_cases currently fails on ampere-one machines with
> the following assertion failure:
> 
>  Test Assertion Failure 
>   arm64/arch_timer_edge_cases.c:169: timer_condition == istatus
>   pid=11236 tid=11236 errno=4 - Interrupted system call
>  1  0x00404ce7: test_run at arch_timer_edge_cases.c:938
>  2  0x00401ebb: main at arch_timer_edge_cases.c:1053
>  3  0x9fa8625b: ?? ??:0
>  4  0x9fa8633b: ?? ??:0
>  5  0x00401fef: _start at ??:?
>   0x1 != 0x0 (timer_condition != istatus)
> 
> Meaning that the timer condition was met and an interrupt
> was presented but the timer status bit in the control register
> was not set.
> 
> This happens due to AC03_CPU_14 "Timer CVAL programming of a delta
> greater than 2^63 will result in incorrect behavior."
> 
> Work around this issue by reducing the value that is used to reset
> the counter and thus reduce the delta.
> 
> Link: 
> https://lore.kernel.org/kvmarm/ac1de1d2-ef2b-d439-dc48-8615e121b...@redhat.com
> Link: 
> https://amperecomputing.com/assets/AmpereOne_Developer_ER_v0_80_20240823_28945022f4.pdf
> Signed-off-by: Sebastian Ott 
> ---
>  tools/testing/selftests/kvm/arm64/arch_timer_edge_cases.c | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
> 
> diff --git a/tools/testing/selftests/kvm/arm64/arch_timer_edge_cases.c 
> b/tools/testing/selftests/kvm/arm64/arch_timer_edge_cases.c
> index a813b4c6c817..2f0397df0aa6 100644
> --- a/tools/testing/selftests/kvm/arm64/arch_timer_edge_cases.c
> +++ b/tools/testing/selftests/kvm/arm64/arch_timer_edge_cases.c
> @@ -31,7 +31,7 @@ static const int32_t TVAL_MIN = INT32_MIN;
>  static const uint32_t TIMEOUT_NO_IRQ_US = 5;
>  
>  /* A nice counter value to use as the starting one for most tests. */
> -static const uint64_t DEF_CNT = (CVAL_MAX / 2);
> +static const uint64_t DEF_CNT = (CVAL_MAX / 4);

This is rather arbitrary, and only sidestep the issue: the core
problem is that CVAL_MAX is defined as ~0, and that we have no idea
what the *effective* counter width is.

So while this happen to sidestep the particular Ampere erratum (and
avoid failures on X1E), this is only papering over the problem. Which
is why I always had some reservations on this particular test -- it is
remarkably broken.

If anything, we should compute the expected width of the counter based
on the frequency and the architectural guarantees ("Roll-over time of
not less than 40 years."), just like the kernel driver does (see
arch_counter_get_width()).

I'm also not keen on hiding a HW bug by altering the test. What of
other guests that would fall into the same issue? If we think the
problem exposed by this test is serious enough, then we need to fully
trap and emulate the timers, X1E style. Performance would definitely
suffer, but that would be the correct thing to do.

So my proposal is to fix the test to be compliant with the intent of
the architecture instead of making bets and using semi-random values.
If that's good enough to make that test pass on A1, great.

Thanks,

M.

-- 
Jazz isn't dead. It just smells funny.



[PATCH v3 1/1] selftests/mm/uffd: Refactor non-composite global vars into struct

2025-05-10 Thread Ujwal Kundur
Refactor macros and non-composite global variable definitions into a
struct that is defined at the start of a test and is passed around
instead of relying on global vars.

Signed-off-by: Ujwal Kundur 
---
 Changes since v2:
 - redo patch on mm-new branch
 Changes since v1:
 - indentation fixes
 - squash into single patch to assist bisections

 tools/testing/selftests/mm/uffd-common.c | 275 -
 tools/testing/selftests/mm/uffd-common.h |  78 +--
 tools/testing/selftests/mm/uffd-stress.c | 226 
 tools/testing/selftests/mm/uffd-unit-tests.c | 557 ++-
 tools/testing/selftests/mm/uffd-wp-mremap.c  |  23 +-
 5 files changed, 615 insertions(+), 544 deletions(-)

diff --git a/tools/testing/selftests/mm/uffd-common.c 
b/tools/testing/selftests/mm/uffd-common.c
index a37088a23ffe..1b13107ef3c3 100644
--- a/tools/testing/selftests/mm/uffd-common.c
+++ b/tools/testing/selftests/mm/uffd-common.c
@@ -7,18 +7,31 @@
 
 #include "uffd-common.h"
 
-#define BASE_PMD_ADDR ((void *)(1UL << 30))
-
-volatile bool test_uffdio_copy_eexist = true;
-unsigned long nr_parallel, nr_pages, nr_pages_per_cpu, page_size;
-char *area_src, *area_src_alias, *area_dst, *area_dst_alias, *area_remap;
-int uffd = -1, uffd_flags, finished, *pipefd, test_type;
-bool map_shared;
-bool test_uffdio_wp = true;
-unsigned long long *count_verify;
 uffd_test_ops_t *uffd_test_ops;
 uffd_test_case_ops_t *uffd_test_case_ops;
-atomic_bool ready_for_fork;
+
+#define BASE_PMD_ADDR ((void *)(1UL << 30))
+
+/* pthread_mutex_t starts at page offset 0 */
+pthread_mutex_t *area_mutex(char *area, unsigned long nr, 
uffd_global_test_opts_t *gopts)
+{
+   return (pthread_mutex_t *) (area + nr * gopts->page_size);
+}
+
+/*
+ * count is placed in the page after pthread_mutex_t naturally aligned
+ * to avoid non alignment faults on non-x86 archs.
+ */
+volatile unsigned long long *area_count(
+   char *area, unsigned long nr,
+   uffd_global_test_opts_t *gopts)
+{
+   return (volatile unsigned long long *)
+   ((unsigned long) (area + nr * gopts->page_size +
+ sizeof(pthread_mutex_t) +
+ sizeof(unsigned long long) - 1) &
+~(unsigned long)(sizeof(unsigned long long) - 1));
+}
 
 static int uffd_mem_fd_create(off_t mem_size, bool hugetlb)
 {
@@ -40,15 +53,15 @@ static int uffd_mem_fd_create(off_t mem_size, bool hugetlb)
return mem_fd;
 }
 
-static void anon_release_pages(char *rel_area)
+static void anon_release_pages(uffd_global_test_opts_t *gopts, char *rel_area)
 {
-   if (madvise(rel_area, nr_pages * page_size, MADV_DONTNEED))
+   if (madvise(rel_area, gopts->nr_pages * gopts->page_size, 
MADV_DONTNEED))
err("madvise(MADV_DONTNEED) failed");
 }
 
-static int anon_allocate_area(void **alloc_area, bool is_src)
+static int anon_allocate_area(uffd_global_test_opts_t *gopts, void 
**alloc_area, bool is_src)
 {
-   *alloc_area = mmap(NULL, nr_pages * page_size, PROT_READ | PROT_WRITE,
+   *alloc_area = mmap(NULL, gopts->nr_pages * gopts->page_size, PROT_READ 
| PROT_WRITE,
   MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
if (*alloc_area == MAP_FAILED) {
*alloc_area = NULL;
@@ -57,31 +70,33 @@ static int anon_allocate_area(void **alloc_area, bool 
is_src)
return 0;
 }
 
-static void noop_alias_mapping(__u64 *start, size_t len, unsigned long offset)
+static void noop_alias_mapping(
+   uffd_global_test_opts_t *gopts, __u64 *start, size_t len,
+   unsigned long offset)
 {
 }
 
-static void hugetlb_release_pages(char *rel_area)
+static void hugetlb_release_pages(uffd_global_test_opts_t *gopts, char 
*rel_area)
 {
-   if (!map_shared) {
-   if (madvise(rel_area, nr_pages * page_size, MADV_DONTNEED))
+   if (!gopts->map_shared) {
+   if (madvise(rel_area, gopts->nr_pages * gopts->page_size, 
MADV_DONTNEED))
err("madvise(MADV_DONTNEED) failed");
} else {
-   if (madvise(rel_area, nr_pages * page_size, MADV_REMOVE))
+   if (madvise(rel_area, gopts->nr_pages * gopts->page_size, 
MADV_REMOVE))
err("madvise(MADV_REMOVE) failed");
}
 }
 
-static int hugetlb_allocate_area(void **alloc_area, bool is_src)
+static int hugetlb_allocate_area(uffd_global_test_opts_t *gopts, void 
**alloc_area, bool is_src)
 {
-   off_t size = nr_pages * page_size;
+   off_t size = gopts->nr_pages * gopts->page_size;
off_t offset = is_src ? 0 : size;
void *area_alias = NULL;
char **alloc_area_alias;
int mem_fd = uffd_mem_fd_create(size * 2, true);
 
*alloc_area = mmap(NULL, size, PROT_READ | PROT_WRITE,
-  (map_shared ? MAP_SHARED : MAP_PRIVATE) |
+  (gopts->map_shared ? MAP_SHARED : MAP_PRIVATE) |
   (is

Re: [PATCH 1/9] CodingStyle: make Documentation/CodingStyle into symlink

2025-05-10 Thread Jonathan Corbet
Alexey Dobriyan  writes:

> Every time I open Documentation/CodingStyle it says the party moved
> somewhere else. :-(
>
> Of course, I forget where it moved to by the next time.
>
> Signed-off-by: Alexey Dobriyan 
> ---

No 0/9 cover letter?

Just FYI, I won't apply coding-style patches without a strong sense that
there is a consensus behind them...I suspect that could prove to be a
high bar here.

Thanks,

jon



[PATCH] params: Add support for static keys

2025-05-10 Thread Kent Overstreet
Static keys can now be a module parameter, e.g.

module_param_named(foo, foo.key, static_key_t, 0644)

bcachefs is now using this.

Cc: Luis Chamberlain 
Cc: Petr Pavlu 
Cc: Sami Tolvanen 
Cc: Daniel Gomez 
Cc: linux-modu...@vger.kernel.org
Signed-off-by: Kent Overstreet 
---
 include/linux/jump_label.h  |  2 ++
 include/linux/moduleparam.h |  6 ++
 kernel/params.c | 35 +++
 3 files changed, 43 insertions(+)

diff --git a/include/linux/jump_label.h b/include/linux/jump_label.h
index fdb79dd1ebd8..0fc9b71db56f 100644
--- a/include/linux/jump_label.h
+++ b/include/linux/jump_label.h
@@ -107,6 +107,8 @@ struct static_key {
 #endif /* CONFIG_JUMP_LABEL */
 };
 
+typedef struct static_key static_key_t;
+
 #endif /* __ASSEMBLY__ */
 
 #ifdef CONFIG_JUMP_LABEL
diff --git a/include/linux/moduleparam.h b/include/linux/moduleparam.h
index bfb85fd13e1f..2494e7e67453 100644
--- a/include/linux/moduleparam.h
+++ b/include/linux/moduleparam.h
@@ -488,6 +488,12 @@ extern int param_set_bint(const char *val, const struct 
kernel_param *kp);
 #define param_get_bint param_get_int
 #define param_check_bint param_check_int
 
+/* A static key, which can only be set like a bool */
+extern const struct kernel_param_ops param_ops_static_key_t;
+extern int param_set_static_key_t(const char *val, const struct kernel_param 
*kp);
+extern int param_get_static_key_t(char *buffer, const struct kernel_param *kp);
+#define param_check_static_key_t(name, p) __param_check(name, p, struct 
static_key)
+
 /**
  * module_param_array - a parameter which is an array of some type
  * @name: the name of the array variable
diff --git a/kernel/params.c b/kernel/params.c
index 2509f216c9f3..991f49e138e7 100644
--- a/kernel/params.c
+++ b/kernel/params.c
@@ -14,6 +14,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 
 #ifdef CONFIG_SYSFS
@@ -412,6 +413,40 @@ const struct kernel_param_ops param_ops_bint = {
 };
 EXPORT_SYMBOL(param_ops_bint);
 
+int param_set_static_key_t(const char *val, const struct kernel_param *kp)
+{
+   /* Match bool exactly, by re-using it. */
+   struct kernel_param boolkp = *kp;
+   bool v;
+   int ret;
+
+   boolkp.arg = &v;
+
+   ret = param_set_bool(val, &boolkp);
+   if (ret)
+   return ret;
+   if (v)
+   static_key_enable(kp->arg);
+   else
+   static_key_disable(kp->arg);
+   return 0;
+}
+EXPORT_SYMBOL(param_set_static_key_t);
+
+int param_get_static_key_t(char *buffer, const struct kernel_param *kp)
+{
+   struct static_key *key = kp->arg;
+   return sprintf(buffer, "%c\n", static_key_enabled(key) ? 'N' : 'Y');
+}
+EXPORT_SYMBOL(param_get_static_key_t);
+
+const struct kernel_param_ops param_ops_static_key_t = {
+   .flags = KERNEL_PARAM_OPS_FL_NOARG,
+   .set = param_set_static_key_t,
+   .get = param_get_static_key_t,
+};
+EXPORT_SYMBOL(param_ops_static_key_t);
+
 /* We break the rule and mangle the string. */
 static int param_array(struct module *mod,
   const char *name,
-- 
2.49.0




Re: [PATCH 8/9] CodingStyle: tell people how to split long "for" loops

2025-05-10 Thread David Laight
On Fri,  9 May 2025 23:34:29 +0300
Alexey Dobriyan  wrote:

> Signed-off-by: Alexey Dobriyan 
> ---
>  Documentation/process/coding-style.rst | 16 +++-
>  1 file changed, 15 insertions(+), 1 deletion(-)
> 
> diff --git a/Documentation/process/coding-style.rst 
> b/Documentation/process/coding-style.rst
> index e17de69845ff..494ab3201112 100644
> --- a/Documentation/process/coding-style.rst
> +++ b/Documentation/process/coding-style.rst
> @@ -183,7 +183,21 @@ Descendants are always substantially shorter than the 
> parent and
>  are placed substantially to the right.  A very commonly used style
>  is to align descendants to a function open parenthesis.
>  
> -These same rules are applied to function headers with a long argument list.
> +These same rules are applied to function prototypes with a long argument 
> list.
> +
> +Very long ``for`` loops are split at the ``;`` characters making it easier
> +to see which code goes to which clause:
> +
> +.. code-block:: c
> +
> + for (int i = 0;
> +  i < N;
> +  i += 1)
> + {
> + }
> +
> +Opening curly is placed on a separate line then to make it easier to tell
> +loop body from iteration clause.

Is that actually the style - I don't remember seeing it.

The location of the { isn't a significant problem with for (;;), it can be
much worse elsewhere.
In reality the 'align with the (' is what causes the problems, either
double indenting (two tabs) or half indent (4 spaces - to annoy anyone who
sets an editor to 4 space tabs) is more readable.

For for (;;) loops I'll normally try moving the initialisation outside the
loop and even put an inverted condition inside the loop to avoid long lines.

If a #define all bets are off :-)

David



>  
>  However, never break user-visible strings such as printk messages because
>  that breaks the ability to grep for them.