Re: Use counters_read(9) from ddb(4)

2023-09-18 Thread Alexander Bluhm
On Fri, Sep 15, 2023 at 04:18:13PM +0200, Martin Pieuchot wrote:
> On 11/09/23(Mon) 21:05, Martin Pieuchot wrote:
> > On 06/09/23(Wed) 23:13, Alexander Bluhm wrote:
> > > On Wed, Sep 06, 2023 at 12:23:33PM -0500, Scott Cheloha wrote:
> > > > On Wed, Sep 06, 2023 at 01:04:19PM +0100, Martin Pieuchot wrote:
> > > > > Debugging OOM is hard.  UVM uses per-CPU counters and sadly
> > > > > counters_read(9) needs to allocate memory.  This is not acceptable in
> > > > > ddb(4).  As a result I cannot see the content of UVM counters in OOM
> > > > > situations.
> > > > > 
> > > > > Diff below introduces a *_static() variant of counters_read(9) that
> > > > > takes a secondary buffer to avoid calling malloc(9).  Is it fine?  Do
> > > > > you have a better idea?  Should we make it the default or using the
> > > > > stack might be a problem?
> > > > 
> > > > Instead of adding a second interface I think we could get away with
> > > > just extending counters_read(9) to take a scratch buffer as an optional
> > > > fourth parameter:
> > > > 
> > > > void
> > > > counters_read(struct cpumem *cm, uint64_t *output, unsigned int n,
> > > > uint64_t *scratch);
> > > > 
> > > > "scratch"?  "temp"?  "tmp"?
> > > 
> > > scratch is fine for me
> > 
> > Fine with me.
> 
> Here's a full diff, works for me(tm), ok?

OK bluhm@

> Index: sys/kern/kern_sysctl.c
> ===
> RCS file: /cvs/src/sys/kern/kern_sysctl.c,v
> retrieving revision 1.418
> diff -u -p -r1.418 kern_sysctl.c
> --- sys/kern/kern_sysctl.c16 Jul 2023 03:01:31 -  1.418
> +++ sys/kern/kern_sysctl.c15 Sep 2023 13:29:53 -
> @@ -519,7 +519,7 @@ kern_sysctl(int *name, u_int namelen, vo
>   unsigned int i;
>  
>   memset(&mbs, 0, sizeof(mbs));
> - counters_read(mbstat, counters, MBSTAT_COUNT);
> + counters_read(mbstat, counters, MBSTAT_COUNT, NULL);
>   for (i = 0; i < MBSTAT_TYPES; i++)
>   mbs.m_mtypes[i] = counters[i];
>  
> Index: sys/kern/subr_evcount.c
> ===
> RCS file: /cvs/src/sys/kern/subr_evcount.c,v
> retrieving revision 1.15
> diff -u -p -r1.15 subr_evcount.c
> --- sys/kern/subr_evcount.c   5 Dec 2022 08:58:49 -   1.15
> +++ sys/kern/subr_evcount.c   15 Sep 2023 14:01:55 -
> @@ -101,7 +101,7 @@ evcount_sysctl(int *name, u_int namelen,
>  {
>   int error = 0, s, nintr, i;
>   struct evcount *ec;
> - u_int64_t count;
> + uint64_t count, scratch;
>  
>   if (newp != NULL)
>   return (EPERM);
> @@ -129,7 +129,7 @@ evcount_sysctl(int *name, u_int namelen,
>   if (ec == NULL)
>   return (ENOENT);
>   if (ec->ec_percpu != NULL) {
> - counters_read(ec->ec_percpu, &count, 1);
> + counters_read(ec->ec_percpu, &count, 1, &scratch);
>   } else {
>   s = splhigh();
>   count = ec->ec_count;
> Index: sys/kern/subr_percpu.c
> ===
> RCS file: /cvs/src/sys/kern/subr_percpu.c,v
> retrieving revision 1.10
> diff -u -p -r1.10 subr_percpu.c
> --- sys/kern/subr_percpu.c3 Oct 2022 14:10:53 -   1.10
> +++ sys/kern/subr_percpu.c15 Sep 2023 14:16:41 -
> @@ -159,17 +159,19 @@ counters_free(struct cpumem *cm, unsigne
>  }
>  
>  void
> -counters_read(struct cpumem *cm, uint64_t *output, unsigned int n)
> +counters_read(struct cpumem *cm, uint64_t *output, unsigned int n,
> +uint64_t *scratch)
>  {
>   struct cpumem_iter cmi;
> - uint64_t *gen, *counters, *temp;
> + uint64_t *gen, *counters, *temp = scratch;
>   uint64_t enter, leave;
>   unsigned int i;
>  
>   for (i = 0; i < n; i++)
>   output[i] = 0;
>  
> - temp = mallocarray(n, sizeof(uint64_t), M_TEMP, M_WAITOK);
> + if (scratch == NULL)
> + temp = mallocarray(n, sizeof(uint64_t), M_TEMP, M_WAITOK);
>  
>   gen = cpumem_first(&cmi, cm);
>   do {
> @@ -202,7 +204,8 @@ counters_read(struct cpumem *cm, uint64_
>   gen = cpumem_next(&cmi, cm);
>   } while (gen != NULL);
>  
> - free(temp, M_TEMP, n * sizeof(uint64_t));
> + if (scratch == NULL)
> + free(temp, M_TEMP, n * sizeof(uint64_t));
>  }
>  
>  void
> @@ -305,7 +308,8 @@ counters_free(struct cpumem *cm, unsigne
>  }
>  
>  void
> -counters_read(struct cpumem *cm, uint64_t *output, unsigned int n)
> +counters_read(struct cpumem *cm, uint64_t *output, unsigned int n,
> +uint64_t *scratch)
>  {
>   uint64_t *counters;
>   unsigned int i;
> Index: sys/net/pfkeyv2_convert.c
> ===
> RCS file: /cvs/src/sys/net/pfkeyv2_convert.c,v
> retrieving revision 1.80
> diff -u -p -r1.80 pfkeyv2_convert.c
> --- sys/net/pfkeyv2_convert.c 7 Aug 20

Re: Use counters_read(9) from ddb(4)

2023-09-15 Thread Vitaliy Makkoveev
On Fri, Sep 15, 2023 at 04:18:13PM +0200, Martin Pieuchot wrote:
> On 11/09/23(Mon) 21:05, Martin Pieuchot wrote:
> > On 06/09/23(Wed) 23:13, Alexander Bluhm wrote:
> > > On Wed, Sep 06, 2023 at 12:23:33PM -0500, Scott Cheloha wrote:
> > > > On Wed, Sep 06, 2023 at 01:04:19PM +0100, Martin Pieuchot wrote:
> > > > > Debugging OOM is hard.  UVM uses per-CPU counters and sadly
> > > > > counters_read(9) needs to allocate memory.  This is not acceptable in
> > > > > ddb(4).  As a result I cannot see the content of UVM counters in OOM
> > > > > situations.
> > > > > 
> > > > > Diff below introduces a *_static() variant of counters_read(9) that
> > > > > takes a secondary buffer to avoid calling malloc(9).  Is it fine?  Do
> > > > > you have a better idea?  Should we make it the default or using the
> > > > > stack might be a problem?
> > > > 
> > > > Instead of adding a second interface I think we could get away with
> > > > just extending counters_read(9) to take a scratch buffer as an optional
> > > > fourth parameter:
> > > > 
> > > > void
> > > > counters_read(struct cpumem *cm, uint64_t *output, unsigned int n,
> > > > uint64_t *scratch);
> > > > 
> > > > "scratch"?  "temp"?  "tmp"?
> > > 
> > > scratch is fine for me
> > 
> > Fine with me.
> 
> Here's a full diff, works for me(tm), ok?
> 

ok mvs

> Index: sys/kern/kern_sysctl.c
> ===
> RCS file: /cvs/src/sys/kern/kern_sysctl.c,v
> retrieving revision 1.418
> diff -u -p -r1.418 kern_sysctl.c
> --- sys/kern/kern_sysctl.c16 Jul 2023 03:01:31 -  1.418
> +++ sys/kern/kern_sysctl.c15 Sep 2023 13:29:53 -
> @@ -519,7 +519,7 @@ kern_sysctl(int *name, u_int namelen, vo
>   unsigned int i;
>  
>   memset(&mbs, 0, sizeof(mbs));
> - counters_read(mbstat, counters, MBSTAT_COUNT);
> + counters_read(mbstat, counters, MBSTAT_COUNT, NULL);
>   for (i = 0; i < MBSTAT_TYPES; i++)
>   mbs.m_mtypes[i] = counters[i];
>  
> Index: sys/kern/subr_evcount.c
> ===
> RCS file: /cvs/src/sys/kern/subr_evcount.c,v
> retrieving revision 1.15
> diff -u -p -r1.15 subr_evcount.c
> --- sys/kern/subr_evcount.c   5 Dec 2022 08:58:49 -   1.15
> +++ sys/kern/subr_evcount.c   15 Sep 2023 14:01:55 -
> @@ -101,7 +101,7 @@ evcount_sysctl(int *name, u_int namelen,
>  {
>   int error = 0, s, nintr, i;
>   struct evcount *ec;
> - u_int64_t count;
> + uint64_t count, scratch;
>  
>   if (newp != NULL)
>   return (EPERM);
> @@ -129,7 +129,7 @@ evcount_sysctl(int *name, u_int namelen,
>   if (ec == NULL)
>   return (ENOENT);
>   if (ec->ec_percpu != NULL) {
> - counters_read(ec->ec_percpu, &count, 1);
> + counters_read(ec->ec_percpu, &count, 1, &scratch);
>   } else {
>   s = splhigh();
>   count = ec->ec_count;
> Index: sys/kern/subr_percpu.c
> ===
> RCS file: /cvs/src/sys/kern/subr_percpu.c,v
> retrieving revision 1.10
> diff -u -p -r1.10 subr_percpu.c
> --- sys/kern/subr_percpu.c3 Oct 2022 14:10:53 -   1.10
> +++ sys/kern/subr_percpu.c15 Sep 2023 14:16:41 -
> @@ -159,17 +159,19 @@ counters_free(struct cpumem *cm, unsigne
>  }
>  
>  void
> -counters_read(struct cpumem *cm, uint64_t *output, unsigned int n)
> +counters_read(struct cpumem *cm, uint64_t *output, unsigned int n,
> +uint64_t *scratch)
>  {
>   struct cpumem_iter cmi;
> - uint64_t *gen, *counters, *temp;
> + uint64_t *gen, *counters, *temp = scratch;
>   uint64_t enter, leave;
>   unsigned int i;
>  
>   for (i = 0; i < n; i++)
>   output[i] = 0;
>  
> - temp = mallocarray(n, sizeof(uint64_t), M_TEMP, M_WAITOK);
> + if (scratch == NULL)
> + temp = mallocarray(n, sizeof(uint64_t), M_TEMP, M_WAITOK);
>  
>   gen = cpumem_first(&cmi, cm);
>   do {
> @@ -202,7 +204,8 @@ counters_read(struct cpumem *cm, uint64_
>   gen = cpumem_next(&cmi, cm);
>   } while (gen != NULL);
>  
> - free(temp, M_TEMP, n * sizeof(uint64_t));
> + if (scratch == NULL)
> + free(temp, M_TEMP, n * sizeof(uint64_t));
>  }
>  
>  void
> @@ -305,7 +308,8 @@ counters_free(struct cpumem *cm, unsigne
>  }
>  
>  void
> -counters_read(struct cpumem *cm, uint64_t *output, unsigned int n)
> +counters_read(struct cpumem *cm, uint64_t *output, unsigned int n,
> +uint64_t *scratch)
>  {
>   uint64_t *counters;
>   unsigned int i;
> Index: sys/net/pfkeyv2_convert.c
> ===
> RCS file: /cvs/src/sys/net/pfkeyv2_convert.c,v
> retrieving revision 1.80
> diff -u -p -r1.80 pfkeyv2_convert.c
> --- sys/net/pfkeyv2_convert.c 7 Aug 20

Re: Use counters_read(9) from ddb(4)

2023-09-15 Thread Martin Pieuchot
On 11/09/23(Mon) 21:05, Martin Pieuchot wrote:
> On 06/09/23(Wed) 23:13, Alexander Bluhm wrote:
> > On Wed, Sep 06, 2023 at 12:23:33PM -0500, Scott Cheloha wrote:
> > > On Wed, Sep 06, 2023 at 01:04:19PM +0100, Martin Pieuchot wrote:
> > > > Debugging OOM is hard.  UVM uses per-CPU counters and sadly
> > > > counters_read(9) needs to allocate memory.  This is not acceptable in
> > > > ddb(4).  As a result I cannot see the content of UVM counters in OOM
> > > > situations.
> > > > 
> > > > Diff below introduces a *_static() variant of counters_read(9) that
> > > > takes a secondary buffer to avoid calling malloc(9).  Is it fine?  Do
> > > > you have a better idea?  Should we make it the default or using the
> > > > stack might be a problem?
> > > 
> > > Instead of adding a second interface I think we could get away with
> > > just extending counters_read(9) to take a scratch buffer as an optional
> > > fourth parameter:
> > > 
> > > void
> > > counters_read(struct cpumem *cm, uint64_t *output, unsigned int n,
> > > uint64_t *scratch);
> > > 
> > > "scratch"?  "temp"?  "tmp"?
> > 
> > scratch is fine for me
> 
> Fine with me.

Here's a full diff, works for me(tm), ok?

Index: sys/kern/kern_sysctl.c
===
RCS file: /cvs/src/sys/kern/kern_sysctl.c,v
retrieving revision 1.418
diff -u -p -r1.418 kern_sysctl.c
--- sys/kern/kern_sysctl.c  16 Jul 2023 03:01:31 -  1.418
+++ sys/kern/kern_sysctl.c  15 Sep 2023 13:29:53 -
@@ -519,7 +519,7 @@ kern_sysctl(int *name, u_int namelen, vo
unsigned int i;
 
memset(&mbs, 0, sizeof(mbs));
-   counters_read(mbstat, counters, MBSTAT_COUNT);
+   counters_read(mbstat, counters, MBSTAT_COUNT, NULL);
for (i = 0; i < MBSTAT_TYPES; i++)
mbs.m_mtypes[i] = counters[i];
 
Index: sys/kern/subr_evcount.c
===
RCS file: /cvs/src/sys/kern/subr_evcount.c,v
retrieving revision 1.15
diff -u -p -r1.15 subr_evcount.c
--- sys/kern/subr_evcount.c 5 Dec 2022 08:58:49 -   1.15
+++ sys/kern/subr_evcount.c 15 Sep 2023 14:01:55 -
@@ -101,7 +101,7 @@ evcount_sysctl(int *name, u_int namelen,
 {
int error = 0, s, nintr, i;
struct evcount *ec;
-   u_int64_t count;
+   uint64_t count, scratch;
 
if (newp != NULL)
return (EPERM);
@@ -129,7 +129,7 @@ evcount_sysctl(int *name, u_int namelen,
if (ec == NULL)
return (ENOENT);
if (ec->ec_percpu != NULL) {
-   counters_read(ec->ec_percpu, &count, 1);
+   counters_read(ec->ec_percpu, &count, 1, &scratch);
} else {
s = splhigh();
count = ec->ec_count;
Index: sys/kern/subr_percpu.c
===
RCS file: /cvs/src/sys/kern/subr_percpu.c,v
retrieving revision 1.10
diff -u -p -r1.10 subr_percpu.c
--- sys/kern/subr_percpu.c  3 Oct 2022 14:10:53 -   1.10
+++ sys/kern/subr_percpu.c  15 Sep 2023 14:16:41 -
@@ -159,17 +159,19 @@ counters_free(struct cpumem *cm, unsigne
 }
 
 void
-counters_read(struct cpumem *cm, uint64_t *output, unsigned int n)
+counters_read(struct cpumem *cm, uint64_t *output, unsigned int n,
+uint64_t *scratch)
 {
struct cpumem_iter cmi;
-   uint64_t *gen, *counters, *temp;
+   uint64_t *gen, *counters, *temp = scratch;
uint64_t enter, leave;
unsigned int i;
 
for (i = 0; i < n; i++)
output[i] = 0;
 
-   temp = mallocarray(n, sizeof(uint64_t), M_TEMP, M_WAITOK);
+   if (scratch == NULL)
+   temp = mallocarray(n, sizeof(uint64_t), M_TEMP, M_WAITOK);
 
gen = cpumem_first(&cmi, cm);
do {
@@ -202,7 +204,8 @@ counters_read(struct cpumem *cm, uint64_
gen = cpumem_next(&cmi, cm);
} while (gen != NULL);
 
-   free(temp, M_TEMP, n * sizeof(uint64_t));
+   if (scratch == NULL)
+   free(temp, M_TEMP, n * sizeof(uint64_t));
 }
 
 void
@@ -305,7 +308,8 @@ counters_free(struct cpumem *cm, unsigne
 }
 
 void
-counters_read(struct cpumem *cm, uint64_t *output, unsigned int n)
+counters_read(struct cpumem *cm, uint64_t *output, unsigned int n,
+uint64_t *scratch)
 {
uint64_t *counters;
unsigned int i;
Index: sys/net/pfkeyv2_convert.c
===
RCS file: /cvs/src/sys/net/pfkeyv2_convert.c,v
retrieving revision 1.80
diff -u -p -r1.80 pfkeyv2_convert.c
--- sys/net/pfkeyv2_convert.c   7 Aug 2023 03:35:06 -   1.80
+++ sys/net/pfkeyv2_convert.c   15 Sep 2023 13:33:37 -
@@ -992,7 +992,7 @@ export_counter(void **p, struct tdb *tdb
uint64_t counters[tdb_ncounters];
struct sadb_x_counter *scnt = (struct

Re: Use counters_read(9) from ddb(4)

2023-09-11 Thread Martin Pieuchot
On 06/09/23(Wed) 23:13, Alexander Bluhm wrote:
> On Wed, Sep 06, 2023 at 12:23:33PM -0500, Scott Cheloha wrote:
> > On Wed, Sep 06, 2023 at 01:04:19PM +0100, Martin Pieuchot wrote:
> > > Debugging OOM is hard.  UVM uses per-CPU counters and sadly
> > > counters_read(9) needs to allocate memory.  This is not acceptable in
> > > ddb(4).  As a result I cannot see the content of UVM counters in OOM
> > > situations.
> > > 
> > > Diff below introduces a *_static() variant of counters_read(9) that
> > > takes a secondary buffer to avoid calling malloc(9).  Is it fine?  Do
> > > you have a better idea?  Should we make it the default or using the
> > > stack might be a problem?
> > 
> > Instead of adding a second interface I think we could get away with
> > just extending counters_read(9) to take a scratch buffer as an optional
> > fourth parameter:
> > 
> > void
> > counters_read(struct cpumem *cm, uint64_t *output, unsigned int n,
> > uint64_t *scratch);
> > 
> > "scratch"?  "temp"?  "tmp"?
> 
> scratch is fine for me

Fine with me.



Re: Use counters_read(9) from ddb(4)

2023-09-06 Thread Alexander Bluhm
On Wed, Sep 06, 2023 at 12:23:33PM -0500, Scott Cheloha wrote:
> On Wed, Sep 06, 2023 at 01:04:19PM +0100, Martin Pieuchot wrote:
> > Debugging OOM is hard.  UVM uses per-CPU counters and sadly
> > counters_read(9) needs to allocate memory.  This is not acceptable in
> > ddb(4).  As a result I cannot see the content of UVM counters in OOM
> > situations.
> > 
> > Diff below introduces a *_static() variant of counters_read(9) that
> > takes a secondary buffer to avoid calling malloc(9).  Is it fine?  Do
> > you have a better idea?  Should we make it the default or using the
> > stack might be a problem?
> 
> Instead of adding a second interface I think we could get away with
> just extending counters_read(9) to take a scratch buffer as an optional
> fourth parameter:
> 
> void
> counters_read(struct cpumem *cm, uint64_t *output, unsigned int n,
> uint64_t *scratch);
> 
> "scratch"?  "temp"?  "tmp"?

scratch is fine for me

> This kinda looks like a case where we could annotate these pointers
> with 'restrict', but I have never fully understood when 'restrict' is
> appropriate vs. when it is overkill or useless.

restrict scares me

#ifdef MULTIPROCESSOR

>  void
> -counters_read(struct cpumem *cm, uint64_t *output, unsigned int n)
> +counters_read(struct cpumem *cm, uint64_t *output, unsigned int n,
> +uint64_t *scratch)

#else

>  void
> -counters_read(struct cpumem *cm, uint64_t *output, unsigned int n)
> +counters_read(struct cpumem *cm, uint64_t *output, uint64_t *scratch,
> +unsigned int n)

Here scratch and n are swapped.  Build a ramdisk kernel :-)

bluhm



Re: Use counters_read(9) from ddb(4)

2023-09-06 Thread Klemens Nanni
On Wed, Sep 06, 2023 at 12:23:33PM -0500, Scott Cheloha wrote:
> On Wed, Sep 06, 2023 at 01:04:19PM +0100, Martin Pieuchot wrote:
> > Debugging OOM is hard.  UVM uses per-CPU counters and sadly
> > counters_read(9) needs to allocate memory.  This is not acceptable in
> > ddb(4).  As a result I cannot see the content of UVM counters in OOM
> > situations.
> > 
> > Diff below introduces a *_static() variant of counters_read(9) that
> > takes a secondary buffer to avoid calling malloc(9).  Is it fine?  Do
> > you have a better idea?  Should we make it the default or using the
> > stack might be a problem?
> 
> Instead of adding a second interface I think we could get away with
> just extending counters_read(9) to take a scratch buffer as an optional
> fourth parameter:
> 
> void
> counters_read(struct cpumem *cm, uint64_t *output, unsigned int n,
> uint64_t *scratch);
> 
> "scratch"?  "temp"?  "tmp"?

Just "buf".

> 
> Anyway, a NULL scratch means "allocate this for me", otherwise you're
> saying you've brought your own.  Obviously the contents of scratch are
> undefined upon return.

That's precisely what I did some a year ago to get rid of counter_read()'s
sleep point in order to replace an rwlock with a mutex in network code.

It was OK, but I think the approach that required it stalled... not sure.

The diff mechanically added a NULL argument to all callers so that future
diffs would actually change semantics along with the code that needs it.

Here are the manual bits I wrote, in case it helps.

diff --git a/share/man/man9/counters_alloc.9 b/share/man/man9/counters_alloc.9
index 1de8bffc639..b8e1438d5fd 100644
--- a/share/man/man9/counters_alloc.9
+++ b/share/man/man9/counters_alloc.9
@@ -64,6 +64,7 @@
 .Fa "struct cpumem *cm"
 .Fa "uint64_t *counters"
 .Fa "unsigned int ncounters"
+.Fa "uint64_t *buf"
 .Fc
 .Ft void
 .Fn counters_zero "struct cpumem *cm" "unsigned int ncounters"
@@ -191,6 +192,12 @@ The sum of the counters is written to the
 array.
 The number of counters is specified with
 .Fa ncounters .
+.Fa buf
+may point to a buffer used to temporarily hold
+.Fa counters .
+If
+.Dv NULL ,
+one will be dynamically allocated and freed.
 .Pp
 .Fn counters_zero
 iterates over each CPU's set of counters referenced by



Re: Use counters_read(9) from ddb(4)

2023-09-06 Thread Scott Cheloha
On Wed, Sep 06, 2023 at 01:04:19PM +0100, Martin Pieuchot wrote:
> Debugging OOM is hard.  UVM uses per-CPU counters and sadly
> counters_read(9) needs to allocate memory.  This is not acceptable in
> ddb(4).  As a result I cannot see the content of UVM counters in OOM
> situations.
> 
> Diff below introduces a *_static() variant of counters_read(9) that
> takes a secondary buffer to avoid calling malloc(9).  Is it fine?  Do
> you have a better idea?  Should we make it the default or using the
> stack might be a problem?

Instead of adding a second interface I think we could get away with
just extending counters_read(9) to take a scratch buffer as an optional
fourth parameter:

void
counters_read(struct cpumem *cm, uint64_t *output, unsigned int n,
uint64_t *scratch);

"scratch"?  "temp"?  "tmp"?

Anyway, a NULL scratch means "allocate this for me", otherwise you're
saying you've brought your own.  Obviously the contents of scratch are
undefined upon return.

This kinda looks like a case where we could annotate these pointers
with 'restrict', but I have never fully understood when 'restrict' is
appropriate vs. when it is overkill or useless.

Index: ./kern/subr_percpu.c
===
RCS file: /cvs/src/sys/kern/subr_percpu.c,v
retrieving revision 1.10
diff -u -p -r1.10 subr_percpu.c
--- ./kern/subr_percpu.c3 Oct 2022 14:10:53 -   1.10
+++ ./kern/subr_percpu.c6 Sep 2023 17:18:46 -
@@ -159,17 +159,19 @@ counters_free(struct cpumem *cm, unsigne
 }
 
 void
-counters_read(struct cpumem *cm, uint64_t *output, unsigned int n)
+counters_read(struct cpumem *cm, uint64_t *output, unsigned int n,
+uint64_t *scratch)
 {
struct cpumem_iter cmi;
-   uint64_t *gen, *counters, *temp;
+   uint64_t *gen, *counters, *temp = scratch;
uint64_t enter, leave;
unsigned int i;
 
for (i = 0; i < n; i++)
output[i] = 0;
 
-   temp = mallocarray(n, sizeof(uint64_t), M_TEMP, M_WAITOK);
+   if (scratch == NULL)
+   temp = mallocarray(n, sizeof(uint64_t), M_TEMP, M_WAITOK);
 
gen = cpumem_first(&cmi, cm);
do {
@@ -202,7 +204,8 @@ counters_read(struct cpumem *cm, uint64_
gen = cpumem_next(&cmi, cm);
} while (gen != NULL);
 
-   free(temp, M_TEMP, n * sizeof(uint64_t));
+   if (scratch == NULL)
+   free(temp, M_TEMP, n * sizeof(uint64_t));
 }
 
 void
@@ -305,7 +308,8 @@ counters_free(struct cpumem *cm, unsigne
 }
 
 void
-counters_read(struct cpumem *cm, uint64_t *output, unsigned int n)
+counters_read(struct cpumem *cm, uint64_t *output, uint64_t *scratch,
+unsigned int n)
 {
uint64_t *counters;
unsigned int i;
Index: ./kern/kern_sysctl.c
===
RCS file: /cvs/src/sys/kern/kern_sysctl.c,v
retrieving revision 1.418
diff -u -p -r1.418 kern_sysctl.c
--- ./kern/kern_sysctl.c16 Jul 2023 03:01:31 -  1.418
+++ ./kern/kern_sysctl.c6 Sep 2023 17:18:47 -
@@ -519,7 +519,7 @@ kern_sysctl(int *name, u_int namelen, vo
unsigned int i;
 
memset(&mbs, 0, sizeof(mbs));
-   counters_read(mbstat, counters, MBSTAT_COUNT);
+   counters_read(mbstat, counters, nitems(counters), NULL);
for (i = 0; i < MBSTAT_TYPES; i++)
mbs.m_mtypes[i] = counters[i];
 
Index: ./kern/subr_evcount.c
===
RCS file: /cvs/src/sys/kern/subr_evcount.c,v
retrieving revision 1.15
diff -u -p -r1.15 subr_evcount.c
--- ./kern/subr_evcount.c   5 Dec 2022 08:58:49 -   1.15
+++ ./kern/subr_evcount.c   6 Sep 2023 17:18:47 -
@@ -101,7 +101,7 @@ evcount_sysctl(int *name, u_int namelen,
 {
int error = 0, s, nintr, i;
struct evcount *ec;
-   u_int64_t count;
+   u_int64_t count, scratch;
 
if (newp != NULL)
return (EPERM);
@@ -129,7 +129,7 @@ evcount_sysctl(int *name, u_int namelen,
if (ec == NULL)
return (ENOENT);
if (ec->ec_percpu != NULL) {
-   counters_read(ec->ec_percpu, &count, 1);
+   counters_read(ec->ec_percpu, &count, 1, &scratch);
} else {
s = splhigh();
count = ec->ec_count;
Index: ./net/if.c
===
RCS file: /cvs/src/sys/net/if.c,v
retrieving revision 1.707
diff -u -p -r1.707 if.c
--- ./net/if.c  18 Aug 2023 08:10:16 -  1.707
+++ ./net/if.c  6 Sep 2023 17:18:49 -
@@ -2797,7 +2797,8 @@ if_getdata(struct ifnet *ifp, struct if_
if (ifp->if_counters != NULL) {
uint64_t counters[ifc_ncounters];
 
-   counters_read(ifp->if_counters, counters, nitems(counters));
+   

Use counters_read(9) from ddb(4)

2023-09-06 Thread Martin Pieuchot
Debugging OOM is hard.  UVM uses per-CPU counters and sadly
counters_read(9) needs to allocate memory.  This is not acceptable in
ddb(4).  As a result I cannot see the content of UVM counters in OOM
situations.

Diff below introduces a *_static() variant of counters_read(9) that
takes a secondary buffer to avoid calling malloc(9).  Is it fine?  Do
you have a better idea?  Should we make it the default or using the
stack might be a problem?

Thanks,
Martin

Index: kern/subr_percpu.c
===
RCS file: /cvs/src/sys/kern/subr_percpu.c,v
retrieving revision 1.10
diff -u -p -r1.10 subr_percpu.c
--- kern/subr_percpu.c  3 Oct 2022 14:10:53 -   1.10
+++ kern/subr_percpu.c  6 Sep 2023 11:54:31 -
@@ -161,15 +161,25 @@ counters_free(struct cpumem *cm, unsigne
 void
 counters_read(struct cpumem *cm, uint64_t *output, unsigned int n)
 {
-   struct cpumem_iter cmi;
-   uint64_t *gen, *counters, *temp;
-   uint64_t enter, leave;
+   uint64_t *temp;
unsigned int i;
 
for (i = 0; i < n; i++)
output[i] = 0;
 
temp = mallocarray(n, sizeof(uint64_t), M_TEMP, M_WAITOK);
+   counters_read_static(cm, output, n, temp);
+   free(temp, M_TEMP, n * sizeof(uint64_t));
+}
+
+void
+counters_read_static(struct cpumem *cm, uint64_t *output, unsigned int n,
+uint64_t *temp)
+{
+   struct cpumem_iter cmi;
+   uint64_t *gen, *counters;
+   uint64_t enter, leave;
+   unsigned int i;
 
gen = cpumem_first(&cmi, cm);
do {
@@ -201,8 +211,6 @@ counters_read(struct cpumem *cm, uint64_
 
gen = cpumem_next(&cmi, cm);
} while (gen != NULL);
-
-   free(temp, M_TEMP, n * sizeof(uint64_t));
 }
 
 void
Index: sys/percpu.h
===
RCS file: /cvs/src/sys/sys/percpu.h,v
retrieving revision 1.8
diff -u -p -r1.8 percpu.h
--- sys/percpu.h28 Aug 2018 15:15:02 -  1.8
+++ sys/percpu.h6 Sep 2023 11:52:55 -
@@ -114,6 +114,8 @@ struct cpumem   *counters_alloc(unsigned i
 struct cpumem  *counters_alloc_ncpus(struct cpumem *, unsigned int);
 voidcounters_free(struct cpumem *, unsigned int);
 voidcounters_read(struct cpumem *, uint64_t *, unsigned int);
+voidcounters_read_static(struct cpumem *, uint64_t *,
+unsigned int, uint64_t *);
 voidcounters_zero(struct cpumem *, unsigned int);
 
 static inline uint64_t *
Index: uvm/uvm_meter.c
===
RCS file: /cvs/src/sys/uvm/uvm_meter.c,v
retrieving revision 1.49
diff -u -p -r1.49 uvm_meter.c
--- uvm/uvm_meter.c 18 Aug 2023 09:18:52 -  1.49
+++ uvm/uvm_meter.c 6 Sep 2023 11:53:02 -
@@ -249,11 +249,12 @@ uvm_total(struct vmtotal *totalp)
 void
 uvmexp_read(struct uvmexp *uexp)
 {
-   uint64_t counters[exp_ncounters];
+   uint64_t counters[exp_ncounters], temp[exp_ncounters];
 
memcpy(uexp, &uvmexp, sizeof(*uexp));
 
-   counters_read(uvmexp_counters, counters, exp_ncounters);
+   counters_read_static(uvmexp_counters, counters, exp_ncounters,
+   temp);
 
/* stat counters */
uexp->faults = (int)counters[faults];