Re: Enable pool cache on knote_pool

2021-06-01 Thread Visa Hankala
On Tue, Jun 01, 2021 at 08:23:24AM +1000, David Gwynne wrote:
> 
> > On 1 Jun 2021, at 02:58, Visa Hankala  wrote:
> > 
> > This patch enables the pool cache feature on the knote pool to reduce
> > the overhead of knote management.
> > 
> > Profiling done by mpi@ and bluhm@ indicate that the potentially needless
> > allocation of knotes in kqueue_register() causes slowdown with
> > kqueue-based poll(2) and select(2).
> > 
> > One approach to fix this is to reverse the function's initial guess
> > about knote: Try without allocation first. Then allocate and retry if
> > the knote is missing from the kqueue and EV_ADD is given.
> > 
> > Another option is to cache free knotes so that the shared knote pool
> > would be accessed less frequently.
> > 
> > The following diff takes the second approach. The caching is implemented
> > simply by enabling the pool cache feature. This makes use of existing
> > code and does not complicate kqueue_register(). The feature also helps
> > if there is heavy knote churn.
> > 
> > I think the most substantial part of the diff is that it extends pool
> > cache usage beyond mbufs. Is this change acceptable?
> 
> absolutely.
> 
> > Note the cache is not particularly useful without kqueue-based poll(2)
> > and select(2). The pool view of systat(1) shows that there are pools
> > that would benefit more than knote_pool from caching, at least in terms
> > of request frequencies. The relative frequencies are dependent on system
> > workload, though. Kqpoll would definitely make knote pool more heavily
> > used.
> 
> ok.
> 
> separate to this diff, at some point maybe we should have a task list/dohook 
> thing for "per cpu init" like mountroot or startup?

At first I fumbled with a deferring version of pool_cache_init() but the
idea felt overly specific. However, a more general dohook might indeed
make sense.

It is a bit unfortunate that the deferring is needed at all, though.
The number of different boot-time deferral options gets larger as well.

The diff below omits a manual page.

Index: kern/init_main.c
===
RCS file: src/sys/kern/init_main.c,v
retrieving revision 1.306
diff -u -p -r1.306 init_main.c
--- kern/init_main.c8 Feb 2021 10:51:01 -   1.306
+++ kern/init_main.c1 Jun 2021 15:29:05 -
@@ -432,6 +432,9 @@ main(void *framep)
prof_init();
 #endif
 
+   /* Run hooks that require the presence of all CPUs. */
+   dopercpuhooks();
+
mbcpuinit();/* enable per cpu mbuf data */
uvm_init_percpu();
 
Index: kern/kern_subr.c
===
RCS file: src/sys/kern/kern_subr.c,v
retrieving revision 1.50
diff -u -p -r1.50 kern_subr.c
--- kern/kern_subr.c29 Apr 2018 17:26:31 -  1.50
+++ kern/kern_subr.c1 Jun 2021 15:29:05 -
@@ -198,6 +198,8 @@ hashfree(void *hash, int elements, int t
  * "startup hook" types, functions, and variables.
  */
 
+struct hook_desc_head percpuhook_list =
+TAILQ_HEAD_INITIALIZER(percpuhook_list);
 struct hook_desc_head startuphook_list =
 TAILQ_HEAD_INITIALIZER(startuphook_list);
 
Index: sys/systm.h
===
RCS file: src/sys/sys/systm.h,v
retrieving revision 1.153
diff -u -p -r1.153 systm.h
--- sys/systm.h 28 Apr 2021 09:42:04 -  1.153
+++ sys/systm.h 1 Jun 2021 15:29:05 -
@@ -283,6 +283,9 @@ voidwdog_register(int (*)(void *, int),
 void   wdog_shutdown(void *);
 
 /*
+ * Per-CPU hooks are functions that are run after all CPUs have been detected
+ * but before the scheduler has started.
+ *
  * Startup hooks are functions running after the scheduler has started
  * but before any threads have been created or root has been mounted.
  */
@@ -294,6 +297,7 @@ struct hook_desc {
 };
 TAILQ_HEAD(hook_desc_head, hook_desc);
 
+extern struct hook_desc_head percpuhook_list;
 extern struct hook_desc_head startuphook_list;
 
 void   *hook_establish(struct hook_desc_head *, int, void (*)(void *), void *);
@@ -303,6 +307,12 @@ void   dohooks(struct hook_desc_head *, in
 #define HOOK_REMOVE0x01
 #define HOOK_FREE  0x02
 
+#define percpuhook_establish(fn, arg) \
+   hook_establish(_list, 1, (fn), (arg))
+#define percpuhook_disestablish(vhook) \
+   hook_disestablish(_list, (vhook))
+#define dopercpuhooks() dohooks(_list, HOOK_REMOVE|HOOK_FREE)
+
 #define startuphook_establish(fn, arg) \
hook_establish(_list, 1, (fn), (arg))
 #define startuphook_disestablish(vhook) \



Re: Enable pool cache on knote_pool

2021-05-31 Thread David Gwynne



> On 1 Jun 2021, at 02:58, Visa Hankala  wrote:
> 
> This patch enables the pool cache feature on the knote pool to reduce
> the overhead of knote management.
> 
> Profiling done by mpi@ and bluhm@ indicate that the potentially needless
> allocation of knotes in kqueue_register() causes slowdown with
> kqueue-based poll(2) and select(2).
> 
> One approach to fix this is to reverse the function's initial guess
> about knote: Try without allocation first. Then allocate and retry if
> the knote is missing from the kqueue and EV_ADD is given.
> 
> Another option is to cache free knotes so that the shared knote pool
> would be accessed less frequently.
> 
> The following diff takes the second approach. The caching is implemented
> simply by enabling the pool cache feature. This makes use of existing
> code and does not complicate kqueue_register(). The feature also helps
> if there is heavy knote churn.
> 
> I think the most substantial part of the diff is that it extends pool
> cache usage beyond mbufs. Is this change acceptable?

absolutely.

> Note the cache is not particularly useful without kqueue-based poll(2)
> and select(2). The pool view of systat(1) shows that there are pools
> that would benefit more than knote_pool from caching, at least in terms
> of request frequencies. The relative frequencies are dependent on system
> workload, though. Kqpoll would definitely make knote pool more heavily
> used.

ok.

separate to this diff, at some point maybe we should have a task list/dohook 
thing for "per cpu init" like mountroot or startup?

> Index: kern/init_main.c
> ===
> RCS file: src/sys/kern/init_main.c,v
> retrieving revision 1.306
> diff -u -p -r1.306 init_main.c
> --- kern/init_main.c  8 Feb 2021 10:51:01 -   1.306
> +++ kern/init_main.c  31 May 2021 16:50:17 -
> @@ -71,6 +71,7 @@
> #include 
> #endif
> #include 
> +#include 
> #include 
> #include 
> #include 
> @@ -148,7 +149,6 @@ void  crypto_init(void);
> void  db_ctf_init(void);
> void  prof_init(void);
> void  init_exec(void);
> -void kqueue_init(void);
> void  futex_init(void);
> void  taskq_init(void);
> void  timeout_proc_init(void);
> @@ -432,7 +432,9 @@ main(void *framep)
>   prof_init();
> #endif
> 
> - mbcpuinit();/* enable per cpu mbuf data */
> + /* Enable per-CPU data. */
> + mbcpuinit();
> + kqueue_init_percpu();
>   uvm_init_percpu();
> 
>   /* init exec and emul */
> Index: kern/kern_event.c
> ===
> RCS file: src/sys/kern/kern_event.c,v
> retrieving revision 1.163
> diff -u -p -r1.163 kern_event.c
> --- kern/kern_event.c 22 Apr 2021 15:30:12 -  1.163
> +++ kern/kern_event.c 31 May 2021 16:50:17 -
> @@ -231,6 +231,12 @@ kqueue_init(void)
>   PR_WAITOK, "knotepl", NULL);
> }
> 
> +void
> +kqueue_init_percpu(void)
> +{
> + pool_cache_init(_pool);
> +}
> +
> int
> filt_fileattach(struct knote *kn)
> {
> Index: sys/event.h
> ===
> RCS file: src/sys/sys/event.h,v
> retrieving revision 1.54
> diff -u -p -r1.54 event.h
> --- sys/event.h   24 Feb 2021 14:59:52 -  1.54
> +++ sys/event.h   31 May 2021 16:50:18 -
> @@ -292,6 +292,8 @@ extern void   knote_fdclose(struct proc *p
> extern void   knote_processexit(struct proc *);
> extern void   knote_modify(const struct kevent *, struct knote *);
> extern void   knote_submit(struct knote *, struct kevent *);
> +extern void  kqueue_init(void);
> +extern void  kqueue_init_percpu(void);
> extern intkqueue_register(struct kqueue *kq,
>   struct kevent *kev, struct proc *p);
> extern intkqueue_scan(struct kqueue_scan_state *, int, struct kevent *,
> 



Enable pool cache on knote_pool

2021-05-31 Thread Visa Hankala
This patch enables the pool cache feature on the knote pool to reduce
the overhead of knote management.

Profiling done by mpi@ and bluhm@ indicate that the potentially needless
allocation of knotes in kqueue_register() causes slowdown with
kqueue-based poll(2) and select(2).

One approach to fix this is to reverse the function's initial guess
about knote: Try without allocation first. Then allocate and retry if
the knote is missing from the kqueue and EV_ADD is given.

Another option is to cache free knotes so that the shared knote pool
would be accessed less frequently.

The following diff takes the second approach. The caching is implemented
simply by enabling the pool cache feature. This makes use of existing
code and does not complicate kqueue_register(). The feature also helps
if there is heavy knote churn.

I think the most substantial part of the diff is that it extends pool
cache usage beyond mbufs. Is this change acceptable?

Note the cache is not particularly useful without kqueue-based poll(2)
and select(2). The pool view of systat(1) shows that there are pools
that would benefit more than knote_pool from caching, at least in terms
of request frequencies. The relative frequencies are dependent on system
workload, though. Kqpoll would definitely make knote pool more heavily
used.

Index: kern/init_main.c
===
RCS file: src/sys/kern/init_main.c,v
retrieving revision 1.306
diff -u -p -r1.306 init_main.c
--- kern/init_main.c8 Feb 2021 10:51:01 -   1.306
+++ kern/init_main.c31 May 2021 16:50:17 -
@@ -71,6 +71,7 @@
 #include 
 #endif
 #include 
+#include 
 #include 
 #include 
 #include 
@@ -148,7 +149,6 @@ voidcrypto_init(void);
 void   db_ctf_init(void);
 void   prof_init(void);
 void   init_exec(void);
-void   kqueue_init(void);
 void   futex_init(void);
 void   taskq_init(void);
 void   timeout_proc_init(void);
@@ -432,7 +432,9 @@ main(void *framep)
prof_init();
 #endif
 
-   mbcpuinit();/* enable per cpu mbuf data */
+   /* Enable per-CPU data. */
+   mbcpuinit();
+   kqueue_init_percpu();
uvm_init_percpu();
 
/* init exec and emul */
Index: kern/kern_event.c
===
RCS file: src/sys/kern/kern_event.c,v
retrieving revision 1.163
diff -u -p -r1.163 kern_event.c
--- kern/kern_event.c   22 Apr 2021 15:30:12 -  1.163
+++ kern/kern_event.c   31 May 2021 16:50:17 -
@@ -231,6 +231,12 @@ kqueue_init(void)
PR_WAITOK, "knotepl", NULL);
 }
 
+void
+kqueue_init_percpu(void)
+{
+   pool_cache_init(_pool);
+}
+
 int
 filt_fileattach(struct knote *kn)
 {
Index: sys/event.h
===
RCS file: src/sys/sys/event.h,v
retrieving revision 1.54
diff -u -p -r1.54 event.h
--- sys/event.h 24 Feb 2021 14:59:52 -  1.54
+++ sys/event.h 31 May 2021 16:50:18 -
@@ -292,6 +292,8 @@ extern void knote_fdclose(struct proc *p
 extern voidknote_processexit(struct proc *);
 extern voidknote_modify(const struct kevent *, struct knote *);
 extern voidknote_submit(struct knote *, struct kevent *);
+extern voidkqueue_init(void);
+extern voidkqueue_init_percpu(void);
 extern int kqueue_register(struct kqueue *kq,
struct kevent *kev, struct proc *p);
 extern int kqueue_scan(struct kqueue_scan_state *, int, struct kevent *,