On Mon, Sep 15, 2025 at 09:36:38AM -0700, Kalesh Singh wrote:
> Needed observability on in field devices can be collected with minimal
> overhead and can be toggled on and off. Event driven telemetry can be
> done with tracepoint BPF programs.
>
> The process comm is provided for aggregation across devices and tgid is
> to enable per-process aggregation per device.
>
> This allows for observing the distribution of such problems in the
> field, to deduce if there are legitimate bugs or if a bump to the limit is
> warranted.

It's not really a bug though is it? It's somebody running out of resources.

I'm not sure how useful this is really. But I'm open to being convinced!

I also wonder if this is better as a statistic? You'd figure out it was a
problem that way too right?

>
> Cc: Andrew Morton <[email protected]>
> Cc: David Hildenbrand <[email protected]>
> Cc: "Liam R. Howlett" <[email protected]>
> Cc: Lorenzo Stoakes <[email protected]>
> Cc: Mike Rapoport <[email protected]>
> Cc: Minchan Kim <[email protected]>
> Cc: Pedro Falcato <[email protected]>
> Signed-off-by: Kalesh Singh <[email protected]>

This breaks the VMA tests, please make sure to always check them:

cc -I../shared -I. -I../../include -I../../arch/x86/include -I../../../lib -g 
-Og -Wall -D_LGPL_SOURCE -fsanitize=address -fsanitize=undefined    -c -o vma.o 
vma.c
In file included from vma.c:33:
../../../mm/vma.c:10:10: fatal error: trace/events/vma.h: No such file or 
directory
   10 | #include <trace/events/vma.h>
      |          ^~~~~~~~~~~~~~~~~~~~
compilation terminated.
make: *** [<builtin>: vma.o] Error 1

See below though, you've included this in the wrong place (I don't blame you,
perhaps we've not made this _very_ clear :)

> ---
>
> Chnages in v2:
>   - Add needed observability for operations failing due to the vma count 
> limit,
>       per Minchan
>     (Since there isn't a common point for debug logging due checks being
>     external to the capacity based vma_count_remaining() helper. I used a
>     trace event for low overhead and to facilitate event driven telemetry
>     for in field devices)
>
>  include/trace/events/vma.h | 32 ++++++++++++++++++++++++++++++++

Do we really need a new file? We already have VMA-related tracepoints no?

Also if you add a new file you _have_ to update MAINTAINERS.

>  mm/mmap.c                  |  5 ++++-
>  mm/mremap.c                | 10 ++++++++--
>  mm/vma.c                   | 11 +++++++++--
>  4 files changed, 53 insertions(+), 5 deletions(-)
>  create mode 100644 include/trace/events/vma.h
>
> diff --git a/include/trace/events/vma.h b/include/trace/events/vma.h
> new file mode 100644
> index 000000000000..2fed63b0d0a6
> --- /dev/null
> +++ b/include/trace/events/vma.h
> @@ -0,0 +1,32 @@
> +/* SPDX-License-Identifier: GPL-2.0 */
> +#undef TRACE_SYSTEM
> +#define TRACE_SYSTEM vma
> +
> +#if !defined(_TRACE_VMA_H) || defined(TRACE_HEADER_MULTI_READ)
> +#define _TRACE_VMA_H
> +
> +#include <linux/tracepoint.h>
> +
> +TRACE_EVENT(max_vma_count_exceeded,
> +
> +     TP_PROTO(struct task_struct *task),
> +
> +     TP_ARGS(task),
> +
> +     TP_STRUCT__entry(
> +             __string(comm,  task->comm)
> +             __field(pid_t,  tgid)
> +     ),
> +
> +     TP_fast_assign(
> +             __assign_str(comm);
> +             __entry->tgid = task->tgid;
> +     ),
> +
> +     TP_printk("comm=%s tgid=%d", __get_str(comm), __entry->tgid)
> +);
> +
> +#endif /*  _TRACE_VMA_H */
> +
> +/* This part must be outside protection */
> +#include <trace/define_trace.h>
> diff --git a/mm/mmap.c b/mm/mmap.c
> index 30ddd550197e..0bb311bf48f3 100644
> --- a/mm/mmap.c
> +++ b/mm/mmap.c
> @@ -56,6 +56,7 @@
>
>  #define CREATE_TRACE_POINTS
>  #include <trace/events/mmap.h>
> +#include <trace/events/vma.h>
>
>  #include "internal.h"
>
> @@ -374,8 +375,10 @@ unsigned long do_mmap(struct file *file, unsigned long 
> addr,
>               return -EOVERFLOW;
>
>       /* Too many mappings? */
> -     if (!vma_count_remaining(mm))
> +     if (!vma_count_remaining(mm)) {
> +             trace_max_vma_count_exceeded(current);
>               return -ENOMEM;
> +     }
>
>       /*
>        * addr is returned from get_unmapped_area,
> diff --git a/mm/mremap.c b/mm/mremap.c
> index 14d35d87e89b..f42ac05f0069 100644
> --- a/mm/mremap.c
> +++ b/mm/mremap.c
> @@ -30,6 +30,8 @@
>  #include <asm/tlb.h>
>  #include <asm/pgalloc.h>
>
> +#include <trace/events/vma.h>
> +
>  #include "internal.h"
>
>  /* Classify the kind of remap operation being performed. */
> @@ -1040,8 +1042,10 @@ static unsigned long prep_move_vma(struct 
> vma_remap_struct *vrm)
>        * We'd prefer to avoid failure later on in do_munmap:
>        * which may split one vma into three before unmapping.
>        */
> -     if (vma_count_remaining(current->mm) < 4)
> +     if (vma_count_remaining(current->mm) < 4) {
> +             trace_max_vma_count_exceeded(current);

But we didn't exceed it, we're guessing we might?

>               return -ENOMEM;
> +     }
>
>       if (vma->vm_ops && vma->vm_ops->may_split) {
>               if (vma->vm_start != old_addr)
> @@ -1817,8 +1821,10 @@ static unsigned long check_mremap_params(struct 
> vma_remap_struct *vrm)
>        * the threshold. In other words, is the current map count + 6 at or
>        * below the threshold? Otherwise return -ENOMEM here to be more safe.
>        */
> -     if (vma_count_remaining(current->mm) < 6)
> +     if (vma_count_remaining(current->mm) < 6) {
> +             trace_max_vma_count_exceeded(current);

Similar point here.

>               return -ENOMEM;
> +     }
>
>       return 0;
>  }
> diff --git a/mm/vma.c b/mm/vma.c
> index 0e4fcaebe209..692c33c3e84d 100644
> --- a/mm/vma.c
> +++ b/mm/vma.c
> @@ -7,6 +7,8 @@
>  #include "vma_internal.h"
>  #include "vma.h"
>
> +#include <trace/events/vma.h>

Nope you don't do this :)

vma.c is designed to be userland testable and _intentionally_ puts all its
includes in vma_internal.h.

So you'd need to add this include over there, and then update the vma tests
vma_internal.h to stub out the trace function.

> +
>  struct mmap_state {
>       struct mm_struct *mm;
>       struct vma_iterator *vmi;
> @@ -621,8 +623,10 @@ __split_vma(struct vma_iterator *vmi, struct 
> vm_area_struct *vma,
>  static int split_vma(struct vma_iterator *vmi, struct vm_area_struct *vma,
>                    unsigned long addr, int new_below)
>  {
> -     if (!vma_count_remaining(vma->vm_mm))
> +     if (!vma_count_remaining(vma->vm_mm)) {
> +             trace_max_vma_count_exceeded(current);
>               return -ENOMEM;
> +     }
>
>       return __split_vma(vmi, vma, addr, new_below);
>  }
> @@ -1375,6 +1379,7 @@ static int vms_gather_munmap_vmas(struct 
> vma_munmap_struct *vms,
>                */
>               if (vms->end < vms->vma->vm_end &&
>                   !vma_count_remaining(vms->vma->vm_mm)) {
> +                     trace_max_vma_count_exceeded(current);
>                       error = -ENOMEM;
>                       goto vma_count_exceeded;
>               }
> @@ -2801,8 +2806,10 @@ int do_brk_flags(struct vma_iterator *vmi, struct 
> vm_area_struct *vma,
>       if (!may_expand_vm(mm, vm_flags, len >> PAGE_SHIFT))
>               return -ENOMEM;
>
> -     if (!vma_count_remaining(mm))
> +     if (!vma_count_remaining(mm)) {
> +             trace_max_vma_count_exceeded(current);
>               return -ENOMEM;
> +     }
>
>       if (security_vm_enough_memory_mm(mm, len >> PAGE_SHIFT))
>               return -ENOMEM;
> --
> 2.51.0.384.g4c02a37b29-goog
>

Cheers, Lorenzo

Reply via email to