The branch main has been updated by glebius: URL: https://cgit.FreeBSD.org/src/commit/?id=776604651ea640d65baa241c90fb0531aba30f29
commit 776604651ea640d65baa241c90fb0531aba30f29 Author: Gleb Smirnoff <[email protected]> AuthorDate: 2026-03-05 03:16:00 +0000 Commit: Gleb Smirnoff <[email protected]> CommitDate: 2026-03-05 03:16:00 +0000 hpts: remove call into TCP HPTS from userret() This hack introduced in d7955cc0ffdf and e3cbc572f154 proved to have more ill side effects than benefits. Sorry for that. Now the HPTS soft clock is called only after the LRO completion. Refactor HPTS module linkage to address that and share the pointer only between HPTS and LRO. Reviewed by: Nick Banks Differential Revision: https://reviews.freebsd.org/D55640 --- sys/kern/subr_trap.c | 16 ---------------- sys/netinet/tcp_hpts.c | 21 +++++++++------------ sys/netinet/tcp_hpts.h | 3 +++ sys/netinet/tcp_lro.c | 4 +++- sys/netinet/tcp_lro_hpts.c | 4 +++- sys/sys/systm.h | 8 -------- 6 files changed, 18 insertions(+), 38 deletions(-) diff --git a/sys/kern/subr_trap.c b/sys/kern/subr_trap.c index bac7d0080c71..bad3319afdfe 100644 --- a/sys/kern/subr_trap.c +++ b/sys/kern/subr_trap.c @@ -74,10 +74,6 @@ #include <sys/epoch.h> #endif -volatile uint32_t __read_frequently hpts_that_need_softclock = 0; - -void (*tcp_hpts_softclock)(void); - /* * Define the code needed before returning to user mode, for trap and * syscall. @@ -129,18 +125,6 @@ userret(struct thread *td, struct trapframe *frame) if (PMC_THREAD_HAS_SAMPLES(td)) PMC_CALL_HOOK(td, PMC_FN_THR_USERRET, NULL); #endif - /* - * Calling tcp_hpts_softclock() here allows us to avoid frequent, - * expensive callouts that trash the cache and lead to a much higher - * number of interrupts and context switches. Testing on busy web - * servers at Netflix has shown that this improves CPU use by 7% over - * relying only on callouts to drive HPTS, and also results in idle - * power savings on mostly idle servers. - * This was inspired by the paper "Soft Timers: Efficient Microsecond - * Software Timer Support for Network Processing" - * by Mohit Aron and Peter Druschel. - */ - tcp_hpts_softclock(); /* * Let the scheduler adjust our priority etc. */ diff --git a/sys/netinet/tcp_hpts.c b/sys/netinet/tcp_hpts.c index b8650cd60c2f..67b028d7603b 100644 --- a/sys/netinet/tcp_hpts.c +++ b/sys/netinet/tcp_hpts.c @@ -185,6 +185,7 @@ const struct tcp_hptsi_funcs tcp_hptsi_default_funcs = { static MALLOC_DEFINE(M_TCPHPTS, "tcp_hpts", "TCP hpts"); static void tcp_hpts_thread(void *ctx); +static volatile uint32_t __read_frequently hpts_that_need_softclock; #define LOWEST_SLEEP_ALLOWED 50 #define DEFAULT_MIN_SLEEP 250 /* How many usec's is default for hpts sleep @@ -1560,13 +1561,16 @@ tcp_choose_hpts_to_run(struct tcp_hptsi *pace) return(pace->rp_ent[(curcpu % pace->rp_num_hptss)]); } -static void -__tcp_run_hpts(void) +void +_tcp_hpts_softclock(void) { struct epoch_tracker et; struct tcp_hpts_entry *hpts; int slots_ran; + if (hpts_that_need_softclock == 0) + return; + hpts = tcp_choose_hpts_to_run(tcp_hptsi_pace); if (hpts->p_hpts_active) { @@ -1607,7 +1611,7 @@ __tcp_run_hpts(void) } /* * In this mode the timer is a backstop to - * all the userret/lro_flushes so we use + * all the lro_flushes so we use * the dynamic value and set the on_min_sleep * flag so we will not be awoken. */ @@ -1733,7 +1737,7 @@ tcp_hpts_thread(void *ctx) } /* * In this mode the timer is a backstop to - * all the userret/lro_flushes so we use + * all the lro_flushes so we use * the dynamic value and set the on_min_sleep * flag so we will not be awoken. */ @@ -2124,9 +2128,6 @@ tcp_hpts_mod_load(void) /* Start the threads */ tcp_hptsi_start(tcp_hptsi_pace); - /* Enable the global HPTS softclock function */ - tcp_hpts_softclock = __tcp_run_hpts; - /* Initialize LRO HPTS */ tcp_lro_hpts_init(); @@ -2154,9 +2155,6 @@ tcp_hpts_mod_unload(void) { tcp_lro_hpts_uninit(); - /* Disable the global HPTS softclock function */ - atomic_store_ptr(&tcp_hpts_softclock, NULL); - tcp_hptsi_stop(tcp_hptsi_pace); tcp_hptsi_destroy(tcp_hptsi_pace); tcp_hptsi_pace = NULL; @@ -2185,8 +2183,7 @@ tcp_hpts_mod_event(module_t mod, int what, void *arg) /* * Since we are a dependency of TCP stack modules, they should * already be unloaded, and the HPTS ring is empty. However, - * function pointer manipulations aren't 100% safe. Although, - * tcp_hpts_mod_unload() use atomic(9) the userret() doesn't. + * function pointer manipulations aren't 100% safe. * Thus, allow only forced unload of HPTS. */ return (EBUSY); diff --git a/sys/netinet/tcp_hpts.h b/sys/netinet/tcp_hpts.h index 13527f7f9a4a..b7ff957a0577 100644 --- a/sys/netinet/tcp_hpts.h +++ b/sys/netinet/tcp_hpts.h @@ -93,6 +93,9 @@ struct hpts_diag { #ifdef _KERNEL +void _tcp_hpts_softclock(void); +extern void (*tcp_hpts_softclock)(void); + extern struct tcp_hptsi *tcp_hptsi_pace; /* diff --git a/sys/netinet/tcp_lro.c b/sys/netinet/tcp_lro.c index 3b04d757c0f9..59358db6ecf9 100644 --- a/sys/netinet/tcp_lro.c +++ b/sys/netinet/tcp_lro.c @@ -89,6 +89,7 @@ SYSCTL_NODE(_net_inet_tcp, OID_AUTO, lro, CTLFLAG_RW | CTLFLAG_MPSAFE, 0, long tcplro_stacks_wanting_mbufq; int (*tcp_lro_flush_tcphpts)(struct lro_ctrl *lc, struct lro_entry *le); +void (*tcp_hpts_softclock)(void); counter_u64_t tcp_inp_lro_direct_queue; counter_u64_t tcp_inp_lro_wokeup_queue; @@ -1257,7 +1258,8 @@ tcp_lro_flush_all(struct lro_ctrl *lc) done: /* flush active streams */ tcp_lro_rx_done(lc); - tcp_hpts_softclock(); + if (tcp_hpts_softclock != NULL) + tcp_hpts_softclock(); lc->lro_mbuf_count = 0; } diff --git a/sys/netinet/tcp_lro_hpts.c b/sys/netinet/tcp_lro_hpts.c index 99989d2833c0..4a606e36514f 100644 --- a/sys/netinet/tcp_lro_hpts.c +++ b/sys/netinet/tcp_lro_hpts.c @@ -665,11 +665,13 @@ _tcp_lro_flush_tcphpts(struct lro_ctrl *lc, struct lro_entry *le) void tcp_lro_hpts_init(void) { + tcp_hpts_softclock = _tcp_hpts_softclock; tcp_lro_flush_tcphpts = _tcp_lro_flush_tcphpts; } void tcp_lro_hpts_uninit(void) { - atomic_store_ptr(&tcp_lro_flush_tcphpts, NULL); + tcp_hpts_softclock = NULL; + tcp_lro_flush_tcphpts = NULL; } diff --git a/sys/sys/systm.h b/sys/sys/systm.h index c4e0aafac452..1c96d99ff98f 100644 --- a/sys/sys/systm.h +++ b/sys/sys/systm.h @@ -399,14 +399,6 @@ void cpu_et_frequency(struct eventtimer *et, uint64_t newfreq); extern int cpu_disable_c2_sleep; extern int cpu_disable_c3_sleep; -extern void (*tcp_hpts_softclock)(void); -extern volatile uint32_t __read_frequently hpts_that_need_softclock; - -#define tcp_hpts_softclock() do { \ - if (hpts_that_need_softclock > 0) \ - tcp_hpts_softclock(); \ -} while (0) - char *kern_getenv(const char *name); void freeenv(char *env); int getenv_int(const char *name, int *data);
