Module Name: src
Committed By: riastradh
Date: Sun Mar 13 17:21:29 UTC 2022
Modified Files:
src/sys/kern: kern_proc.c
Log Message:
kern: Fix ordering of loads for pid_table and pid_tbl_mask.
This introduces a load-acquire where there was none before. This is
a simple correctness change. We could avoid the load-acquire, and
use only load-consume, if we used a pointer indirection for _both_
pid_table and pid_tbl_mask. Takes a little more work, and probably
costs an additional cache line of memory traffic, but might be worth
it to avoid the load-acquire for pid lookup.
Reported-by: [email protected]
Reported-by: [email protected]
Reported-by: [email protected]
To generate a diff of this commit:
cvs rdiff -u -r1.264 -r1.265 src/sys/kern/kern_proc.c
Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.
Modified files:
Index: src/sys/kern/kern_proc.c
diff -u src/sys/kern/kern_proc.c:1.264 src/sys/kern/kern_proc.c:1.265
--- src/sys/kern/kern_proc.c:1.264 Thu Mar 10 12:21:35 2022
+++ src/sys/kern/kern_proc.c Sun Mar 13 17:21:29 2022
@@ -1,4 +1,4 @@
-/* $NetBSD: kern_proc.c,v 1.264 2022/03/10 12:21:35 riastradh Exp $ */
+/* $NetBSD: kern_proc.c,v 1.265 2022/03/13 17:21:29 riastradh Exp $ */
/*-
* Copyright (c) 1999, 2006, 2007, 2008, 2020 The NetBSD Foundation, Inc.
@@ -62,7 +62,7 @@
*/
#include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: kern_proc.c,v 1.264 2022/03/10 12:21:35 riastradh Exp $");
+__KERNEL_RCSID(0, "$NetBSD: kern_proc.c,v 1.265 2022/03/13 17:21:29 riastradh Exp $");
#ifdef _KERNEL_OPT
#include "opt_kstack.h"
@@ -682,6 +682,7 @@ struct lwp *
proc_find_lwp(proc_t *p, pid_t pid)
{
struct pid_table *pt;
+ unsigned pt_mask;
struct lwp *l = NULL;
uintptr_t slot;
int s;
@@ -689,13 +690,22 @@ proc_find_lwp(proc_t *p, pid_t pid)
KASSERT(mutex_owned(p->p_lock));
/*
- * Look in the pid_table. This is done unlocked inside a pserialize
- * read section covering pid_table's memory allocation only, so take
- * care to read the slot atomically and only once. This issues a
- * memory barrier for dependent loads on alpha.
+ * Look in the pid_table. This is done unlocked inside a
+ * pserialize read section covering pid_table's memory
+ * allocation only, so take care to read things in the correct
+ * order:
+ *
+ * 1. First read the table mask -- this only ever increases, in
+ * expand_pid_table, so a stale value is safely
+ * conservative.
+ *
+ * 2. Next read the pid table -- this is always set _before_
+ * the mask increases, so if we see a new table and stale
+ * mask, the mask is still valid for the table.
*/
s = pserialize_read_enter();
- pt = &atomic_load_consume(&pid_table)[pid & pid_tbl_mask];
+ pt_mask = atomic_load_acquire(&pid_tbl_mask);
+ pt = &atomic_load_consume(&pid_table)[pid & pt_mask];
slot = atomic_load_consume(&pt->pt_slot);
if (__predict_false(!PT_IS_LWP(slot))) {
pserialize_read_exit(s);
@@ -742,18 +752,28 @@ struct lwp *
proc_find_lwp_unlocked(proc_t *p, pid_t pid)
{
struct pid_table *pt;
+ unsigned pt_mask;
struct lwp *l = NULL;
uintptr_t slot;
KASSERT(pserialize_in_read_section());
/*
- * Look in the pid_table. This is done unlocked inside a pserialize
- * read section covering pid_table's memory allocation only, so take
- * care to read the slot atomically and only once. This issues a
- * memory barrier for dependent loads on alpha.
+ * Look in the pid_table. This is done unlocked inside a
+ * pserialize read section covering pid_table's memory
+ * allocation only, so take care to read things in the correct
+ * order:
+ *
+ * 1. First read the table mask -- this only ever increases, in
+ * expand_pid_table, so a stale value is safely
+ * conservative.
+ *
+ * 2. Next read the pid table -- this is always set _before_
+ * the mask increases, so if we see a new table and stale
+ * mask, the mask is still valid for the table.
*/
- pt = &atomic_load_consume(&pid_table)[pid & pid_tbl_mask];
+ pt_mask = atomic_load_acquire(&pid_tbl_mask);
+ pt = &atomic_load_consume(&pid_table)[pid & pt_mask];
slot = atomic_load_consume(&pt->pt_slot);
if (__predict_false(!PT_IS_LWP(slot))) {
return NULL;
@@ -1004,7 +1024,8 @@ expand_pid_table(void)
tsz = pt_size * sizeof(struct pid_table);
n_pt = pid_table;
atomic_store_release(&pid_table, new_pt);
- pid_tbl_mask = new_pt_mask;
+ KASSERT(new_pt_mask >= pid_tbl_mask);
+ atomic_store_release(&pid_tbl_mask, new_pt_mask);
/*
* pid_max starts as PID_MAX (= 30000), once we have 16384
@@ -1183,6 +1204,8 @@ proc_free_pid_internal(pid_t pid, uintpt
{
struct pid_table *pt;
+ KASSERT(mutex_owned(&proc_lock));
+
pt = &pid_table[pid & pid_tbl_mask];
KASSERT(PT_GET_TYPE(pt->pt_slot) == type);