Module Name:    src
Committed By:   martin
Date:           Mon Jun 21 16:14:14 UTC 2021

Modified Files:
        src/sys/kern [netbsd-9]: kern_ksyms.c

Log Message:
Pull up following revision(s) (requested by riastradh in ticket #1299):

        sys/kern/kern_ksyms.c: revision 1.90
        sys/kern/kern_ksyms.c: revision 1.91
        sys/kern/kern_ksyms.c: revision 1.92
        sys/kern/kern_ksyms.c: revision 1.93
        sys/kern/kern_ksyms.c: revision 1.94
        sys/kern/kern_ksyms.c: revision 1.95
        sys/kern/kern_ksyms.c: revision 1.96
        sys/kern/kern_ksyms.c: revision 1.97

ksyms(4): Fix ksymsread synchronization.

Fixes crash on concurrent update and read of /dev/ksyms.
XXX Unclear why we have to skip sd_gone entries here -- it seems like
they should be preserved until ksymsclose.
ksyms(4): Modify ksyms_symtabs only at IPL_HIGH.

This limits the opportunities for ddb to witness an inconsistent
state of the symbol table list.
ksyms(4): Don't skip symbol tables that are soon to be freed.

They will not actually be freed until /dev/ksyms is closed, so
continued access to them remains kosher.
Revert "ksyms(4): Don't skip symbol tables that are soon to be freed."

Apparently the equality kassert this restored doesn't work; to be
analyzed.

Fix regression introduced in rev 1.90
in which the last element of ksyms_symtabs is skipped by mistake.

ksyms(4): Fix race in ksymsread iteration.
TAILQ_NEXT(ksyms_last_snapshot) might change while we are iterating,
but ksyms_last_snapshot itself cannot, so invert the loop structure.

Discussed with rin@.

ksyms(4): Don't skip symbol tables that are soon to be freed, take 2.

They will not actually be freed until /dev/ksyms is closed, so
continued access to them remains kosher.
The previous change was busted because of an off-by-one error in a
previous previous change's iteration over the symtabs; that error has
since been corrected.

ksyms(4): Allow multiple concurrent opens of /dev/ksyms.

First one takes a snapshot; others all agree with the snapshot.
Previously this code path was just broken (could fail horribly if
modules were unloaded after one of the opens is closed), so I just
blocked it off in an earlier commit, but that broke crash(8).  So
let's continue allowing multiple opens seeing the same snapshot, but
without the horrible bugs.


To generate a diff of this commit:
cvs rdiff -u -r1.87.8.1 -r1.87.8.2 src/sys/kern/kern_ksyms.c

Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.

Modified files:

Index: src/sys/kern/kern_ksyms.c
diff -u src/sys/kern/kern_ksyms.c:1.87.8.1 src/sys/kern/kern_ksyms.c:1.87.8.2
--- src/sys/kern/kern_ksyms.c:1.87.8.1	Tue Jan  7 11:54:57 2020
+++ src/sys/kern/kern_ksyms.c	Mon Jun 21 16:14:14 2021
@@ -1,4 +1,4 @@
-/*	$NetBSD: kern_ksyms.c,v 1.87.8.1 2020/01/07 11:54:57 martin Exp $	*/
+/*	$NetBSD: kern_ksyms.c,v 1.87.8.2 2021/06/21 16:14:14 martin Exp $	*/
 
 /*-
  * Copyright (c) 2008 The NetBSD Foundation, Inc.
@@ -73,7 +73,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: kern_ksyms.c,v 1.87.8.1 2020/01/07 11:54:57 martin Exp $");
+__KERNEL_RCSID(0, "$NetBSD: kern_ksyms.c,v 1.87.8.2 2021/06/21 16:14:14 martin Exp $");
 
 #if defined(_KERNEL) && defined(_KERNEL_OPT)
 #include "opt_copy_symtab.h"
@@ -92,6 +92,8 @@ __KERNEL_RCSID(0, "$NetBSD: kern_ksyms.c
 #include <sys/proc.h>
 #include <sys/atomic.h>
 #include <sys/ksyms.h>
+#include <sys/kernel.h>
+#include <sys/intr.h>
 
 #ifdef DDB
 #include <ddb/db_output.h>
@@ -110,7 +112,8 @@ static uint32_t *ksyms_nmap = NULL;
 #endif
 
 static int ksyms_maxlen;
-static bool ksyms_isopen;
+static uint64_t ksyms_opencnt;
+static struct ksyms_symtab *ksyms_last_snapshot;
 static bool ksyms_initted;
 static bool ksyms_loaded;
 static kmutex_t ksyms_lock __cacheline_aligned;
@@ -140,7 +143,7 @@ struct ksyms_hdr ksyms_hdr;
 int ksyms_symsz;
 int ksyms_strsz;
 int ksyms_ctfsz;	/* this is not currently used by savecore(8) */
-TAILQ_HEAD(, ksyms_symtab) ksyms_symtabs =
+TAILQ_HEAD(ksyms_symtab_queue, ksyms_symtab) ksyms_symtabs =
     TAILQ_HEAD_INITIALIZER(ksyms_symtabs);
 
 static int
@@ -296,6 +299,7 @@ addsymtab(const char *name, void *symsta
 	int i, j, n, nglob;
 	char *str;
 	int nsyms = symsize / sizeof(Elf_Sym);
+	int s;
 
 	/* Sanity check for pre-allocated map table used during startup. */
 	if ((nmap == ksyms_nmap) && (nsyms >= KSYMS_MAX_ID)) {
@@ -419,7 +423,7 @@ addsymtab(const char *name, void *symsta
 		for (new = 0; new < n; new++) {
 			uint32_t orig = nsym[new].st_size - 1;
 			uint32_t size = nmap[orig];
-	
+
 			nmap[orig] = new + 1;
 
 			/* restore the size */
@@ -428,9 +432,18 @@ addsymtab(const char *name, void *symsta
 	}
 #endif
 
-	/* ksymsread() is unlocked, so membar. */
-	membar_producer();
+	KASSERT(strcmp(name, "netbsd") == 0 || mutex_owned(&ksyms_lock));
+	KASSERT(cold || mutex_owned(&ksyms_lock));
+
+	/*
+	 * Ensure ddb never witnesses an inconsistent state of the
+	 * queue, unless memory is so corrupt that we crash in
+	 * TAILQ_INSERT_TAIL.
+	 */
+	s = splhigh();
 	TAILQ_INSERT_TAIL(&ksyms_symtabs, tab, sd_queue);
+	splx(s);
+
 	ksyms_sizes_calc();
 	ksyms_loaded = true;
 }
@@ -757,6 +770,8 @@ void
 ksyms_modunload(const char *name)
 {
 	struct ksyms_symtab *st;
+	bool do_free = false;
+	int s;
 
 	mutex_enter(&ksyms_lock);
 	TAILQ_FOREACH(st, &ksyms_symtabs, sd_queue) {
@@ -766,16 +781,26 @@ ksyms_modunload(const char *name)
 			continue;
 		st->sd_gone = true;
 		ksyms_sizes_calc();
-		if (!ksyms_isopen) {
+		if (ksyms_opencnt == 0) {
+			/*
+			 * Ensure ddb never witnesses an inconsistent
+			 * state of the queue, unless memory is so
+			 * corrupt that we crash in TAILQ_REMOVE.
+			 */
+			s = splhigh();
 			TAILQ_REMOVE(&ksyms_symtabs, st, sd_queue);
-			kmem_free(st->sd_nmap,
-				  st->sd_nmapsize * sizeof(uint32_t));
-			kmem_free(st, sizeof(*st));
+			splx(s);
+			do_free = true;
 		}
 		break;
 	}
 	mutex_exit(&ksyms_lock);
 	KASSERT(st != NULL);
+
+	if (do_free) {
+		kmem_free(st->sd_nmap, st->sd_nmapsize * sizeof(uint32_t));
+		kmem_free(st, sizeof(*st));
+	}
 }
 
 #ifdef DDB
@@ -854,6 +879,8 @@ ksyms_sizes_calc(void)
 	struct ksyms_symtab *st;
 	int i, delta;
 
+	KASSERT(cold || mutex_owned(&ksyms_lock));
+
 	ksyms_symsz = ksyms_strsz = 0;
 	TAILQ_FOREACH(st, &ksyms_symtabs, sd_queue) {
 		if (__predict_false(st->sd_gone))
@@ -967,10 +994,12 @@ ksymsopen(dev_t dev, int oflags, int dev
 		return ENXIO;
 
 	/*
-	 * Create a "snapshot" of the kernel symbol table.  Setting
-	 * ksyms_isopen will prevent symbol tables from being freed.
+	 * Create a "snapshot" of the kernel symbol table.  Bumping
+	 * ksyms_opencnt will prevent symbol tables from being freed.
 	 */
 	mutex_enter(&ksyms_lock);
+	if (ksyms_opencnt++)
+		goto out;
 	ksyms_hdr.kh_shdr[SYMTAB].sh_size = ksyms_symsz;
 	ksyms_hdr.kh_shdr[SYMTAB].sh_info = ksyms_symsz / sizeof(Elf_Sym);
 	ksyms_hdr.kh_shdr[STRTAB].sh_offset = ksyms_symsz +
@@ -979,8 +1008,8 @@ ksymsopen(dev_t dev, int oflags, int dev
 	ksyms_hdr.kh_shdr[SHCTF].sh_offset = ksyms_strsz +
 	    ksyms_hdr.kh_shdr[STRTAB].sh_offset;
 	ksyms_hdr.kh_shdr[SHCTF].sh_size = ksyms_ctfsz;
-	ksyms_isopen = true;
-	mutex_exit(&ksyms_lock);
+	ksyms_last_snapshot = TAILQ_LAST(&ksyms_symtabs, ksyms_symtab_queue);
+out:	mutex_exit(&ksyms_lock);
 
 	return 0;
 }
@@ -989,25 +1018,35 @@ static int
 ksymsclose(dev_t dev, int oflags, int devtype, struct lwp *l)
 {
 	struct ksyms_symtab *st, *next;
-	bool resize;
+	TAILQ_HEAD(, ksyms_symtab) to_free = TAILQ_HEAD_INITIALIZER(to_free);
+	int s;
 
 	/* Discard references to symbol tables. */
 	mutex_enter(&ksyms_lock);
-	ksyms_isopen = false;
-	resize = false;
-	for (st = TAILQ_FIRST(&ksyms_symtabs); st != NULL; st = next) {
-		next = TAILQ_NEXT(st, sd_queue);
+	if (--ksyms_opencnt)
+		goto out;
+	ksyms_last_snapshot = NULL;
+	TAILQ_FOREACH_SAFE(st, &ksyms_symtabs, sd_queue, next) {
 		if (st->sd_gone) {
+			/*
+			 * Ensure ddb never witnesses an inconsistent
+			 * state of the queue, unless memory is so
+			 * corrupt that we crash in TAILQ_REMOVE.
+			 */
+			s = splhigh();
 			TAILQ_REMOVE(&ksyms_symtabs, st, sd_queue);
-			kmem_free(st->sd_nmap,
-				  st->sd_nmapsize * sizeof(uint32_t));
-			kmem_free(st, sizeof(*st));
-			resize = true;
+			splx(s);
+			TAILQ_INSERT_TAIL(&to_free, st, sd_queue);
 		}
 	}
-	if (resize)
+	if (!TAILQ_EMPTY(&to_free))
 		ksyms_sizes_calc();
-	mutex_exit(&ksyms_lock);
+out:	mutex_exit(&ksyms_lock);
+
+	TAILQ_FOREACH_SAFE(st, &to_free, sd_queue, next) {
+		kmem_free(st->sd_nmap, st->sd_nmapsize * sizeof(uint32_t));
+		kmem_free(st, sizeof(*st));
+	}
 
 	return 0;
 }
@@ -1035,9 +1074,9 @@ ksymsread(dev_t dev, struct uio *uio, in
 	 * Copy out the symbol table.
 	 */
 	filepos = sizeof(struct ksyms_hdr);
-	TAILQ_FOREACH(st, &ksyms_symtabs, sd_queue) {
-		if (__predict_false(st->sd_gone))
-			continue;
+	for (st = TAILQ_FIRST(&ksyms_symtabs);
+	     ;
+	     st = TAILQ_NEXT(st, sd_queue)) {
 		if (uio->uio_resid == 0)
 			return 0;
 		if (uio->uio_offset <= st->sd_symsize + filepos) {
@@ -1048,6 +1087,8 @@ ksymsread(dev_t dev, struct uio *uio, in
 				return error;
 		}
 		filepos += st->sd_symsize;
+		if (st == ksyms_last_snapshot)
+			break;
 	}
 
 	/*
@@ -1055,9 +1096,9 @@ ksymsread(dev_t dev, struct uio *uio, in
 	 */
 	KASSERT(filepos == sizeof(struct ksyms_hdr) +
 	    ksyms_hdr.kh_shdr[SYMTAB].sh_size);
-	TAILQ_FOREACH(st, &ksyms_symtabs, sd_queue) {
-		if (__predict_false(st->sd_gone))
-			continue;
+	for (st = TAILQ_FIRST(&ksyms_symtabs);
+	     ;
+	     st = TAILQ_NEXT(st, sd_queue)) {
 		if (uio->uio_resid == 0)
 			return 0;
 		if (uio->uio_offset <= st->sd_strsize + filepos) {
@@ -1068,6 +1109,8 @@ ksymsread(dev_t dev, struct uio *uio, in
 				return error;
 		}
 		filepos += st->sd_strsize;
+		if (st == ksyms_last_snapshot)
+			break;
 	}
 
 	/*

Reply via email to