Module Name: src Committed By: martin Date: Mon Jun 21 16:14:14 UTC 2021
Modified Files: src/sys/kern [netbsd-9]: kern_ksyms.c Log Message: Pull up following revision(s) (requested by riastradh in ticket #1299): sys/kern/kern_ksyms.c: revision 1.90 sys/kern/kern_ksyms.c: revision 1.91 sys/kern/kern_ksyms.c: revision 1.92 sys/kern/kern_ksyms.c: revision 1.93 sys/kern/kern_ksyms.c: revision 1.94 sys/kern/kern_ksyms.c: revision 1.95 sys/kern/kern_ksyms.c: revision 1.96 sys/kern/kern_ksyms.c: revision 1.97 ksyms(4): Fix ksymsread synchronization. Fixes crash on concurrent update and read of /dev/ksyms. XXX Unclear why we have to skip sd_gone entries here -- it seems like they should be preserved until ksymsclose. ksyms(4): Modify ksyms_symtabs only at IPL_HIGH. This limits the opportunities for ddb to witness an inconsistent state of the symbol table list. ksyms(4): Don't skip symbol tables that are soon to be freed. They will not actually be freed until /dev/ksyms is closed, so continued access to them remains kosher. Revert "ksyms(4): Don't skip symbol tables that are soon to be freed." Apparently the equality kassert this restored doesn't work; to be analyzed. Fix regression introduced in rev 1.90 in which the last element of ksyms_symtabs is skipped by mistake. ksyms(4): Fix race in ksymsread iteration. TAILQ_NEXT(ksyms_last_snapshot) might change while we are iterating, but ksyms_last_snapshot itself cannot, so invert the loop structure. Discussed with rin@. ksyms(4): Don't skip symbol tables that are soon to be freed, take 2. They will not actually be freed until /dev/ksyms is closed, so continued access to them remains kosher. The previous change was busted because of an off-by-one error in a previous previous change's iteration over the symtabs; that error has since been corrected. ksyms(4): Allow multiple concurrent opens of /dev/ksyms. First one takes a snapshot; others all agree with the snapshot. Previously this code path was just broken (could fail horribly if modules were unloaded after one of the opens is closed), so I just blocked it off in an earlier commit, but that broke crash(8). So let's continue allowing multiple opens seeing the same snapshot, but without the horrible bugs. To generate a diff of this commit: cvs rdiff -u -r1.87.8.1 -r1.87.8.2 src/sys/kern/kern_ksyms.c Please note that diffs are not public domain; they are subject to the copyright notices on the relevant files.
Modified files: Index: src/sys/kern/kern_ksyms.c diff -u src/sys/kern/kern_ksyms.c:1.87.8.1 src/sys/kern/kern_ksyms.c:1.87.8.2 --- src/sys/kern/kern_ksyms.c:1.87.8.1 Tue Jan 7 11:54:57 2020 +++ src/sys/kern/kern_ksyms.c Mon Jun 21 16:14:14 2021 @@ -1,4 +1,4 @@ -/* $NetBSD: kern_ksyms.c,v 1.87.8.1 2020/01/07 11:54:57 martin Exp $ */ +/* $NetBSD: kern_ksyms.c,v 1.87.8.2 2021/06/21 16:14:14 martin Exp $ */ /*- * Copyright (c) 2008 The NetBSD Foundation, Inc. @@ -73,7 +73,7 @@ */ #include <sys/cdefs.h> -__KERNEL_RCSID(0, "$NetBSD: kern_ksyms.c,v 1.87.8.1 2020/01/07 11:54:57 martin Exp $"); +__KERNEL_RCSID(0, "$NetBSD: kern_ksyms.c,v 1.87.8.2 2021/06/21 16:14:14 martin Exp $"); #if defined(_KERNEL) && defined(_KERNEL_OPT) #include "opt_copy_symtab.h" @@ -92,6 +92,8 @@ __KERNEL_RCSID(0, "$NetBSD: kern_ksyms.c #include <sys/proc.h> #include <sys/atomic.h> #include <sys/ksyms.h> +#include <sys/kernel.h> +#include <sys/intr.h> #ifdef DDB #include <ddb/db_output.h> @@ -110,7 +112,8 @@ static uint32_t *ksyms_nmap = NULL; #endif static int ksyms_maxlen; -static bool ksyms_isopen; +static uint64_t ksyms_opencnt; +static struct ksyms_symtab *ksyms_last_snapshot; static bool ksyms_initted; static bool ksyms_loaded; static kmutex_t ksyms_lock __cacheline_aligned; @@ -140,7 +143,7 @@ struct ksyms_hdr ksyms_hdr; int ksyms_symsz; int ksyms_strsz; int ksyms_ctfsz; /* this is not currently used by savecore(8) */ -TAILQ_HEAD(, ksyms_symtab) ksyms_symtabs = +TAILQ_HEAD(ksyms_symtab_queue, ksyms_symtab) ksyms_symtabs = TAILQ_HEAD_INITIALIZER(ksyms_symtabs); static int @@ -296,6 +299,7 @@ addsymtab(const char *name, void *symsta int i, j, n, nglob; char *str; int nsyms = symsize / sizeof(Elf_Sym); + int s; /* Sanity check for pre-allocated map table used during startup. */ if ((nmap == ksyms_nmap) && (nsyms >= KSYMS_MAX_ID)) { @@ -419,7 +423,7 @@ addsymtab(const char *name, void *symsta for (new = 0; new < n; new++) { uint32_t orig = nsym[new].st_size - 1; uint32_t size = nmap[orig]; - + nmap[orig] = new + 1; /* restore the size */ @@ -428,9 +432,18 @@ addsymtab(const char *name, void *symsta } #endif - /* ksymsread() is unlocked, so membar. */ - membar_producer(); + KASSERT(strcmp(name, "netbsd") == 0 || mutex_owned(&ksyms_lock)); + KASSERT(cold || mutex_owned(&ksyms_lock)); + + /* + * Ensure ddb never witnesses an inconsistent state of the + * queue, unless memory is so corrupt that we crash in + * TAILQ_INSERT_TAIL. + */ + s = splhigh(); TAILQ_INSERT_TAIL(&ksyms_symtabs, tab, sd_queue); + splx(s); + ksyms_sizes_calc(); ksyms_loaded = true; } @@ -757,6 +770,8 @@ void ksyms_modunload(const char *name) { struct ksyms_symtab *st; + bool do_free = false; + int s; mutex_enter(&ksyms_lock); TAILQ_FOREACH(st, &ksyms_symtabs, sd_queue) { @@ -766,16 +781,26 @@ ksyms_modunload(const char *name) continue; st->sd_gone = true; ksyms_sizes_calc(); - if (!ksyms_isopen) { + if (ksyms_opencnt == 0) { + /* + * Ensure ddb never witnesses an inconsistent + * state of the queue, unless memory is so + * corrupt that we crash in TAILQ_REMOVE. + */ + s = splhigh(); TAILQ_REMOVE(&ksyms_symtabs, st, sd_queue); - kmem_free(st->sd_nmap, - st->sd_nmapsize * sizeof(uint32_t)); - kmem_free(st, sizeof(*st)); + splx(s); + do_free = true; } break; } mutex_exit(&ksyms_lock); KASSERT(st != NULL); + + if (do_free) { + kmem_free(st->sd_nmap, st->sd_nmapsize * sizeof(uint32_t)); + kmem_free(st, sizeof(*st)); + } } #ifdef DDB @@ -854,6 +879,8 @@ ksyms_sizes_calc(void) struct ksyms_symtab *st; int i, delta; + KASSERT(cold || mutex_owned(&ksyms_lock)); + ksyms_symsz = ksyms_strsz = 0; TAILQ_FOREACH(st, &ksyms_symtabs, sd_queue) { if (__predict_false(st->sd_gone)) @@ -967,10 +994,12 @@ ksymsopen(dev_t dev, int oflags, int dev return ENXIO; /* - * Create a "snapshot" of the kernel symbol table. Setting - * ksyms_isopen will prevent symbol tables from being freed. + * Create a "snapshot" of the kernel symbol table. Bumping + * ksyms_opencnt will prevent symbol tables from being freed. */ mutex_enter(&ksyms_lock); + if (ksyms_opencnt++) + goto out; ksyms_hdr.kh_shdr[SYMTAB].sh_size = ksyms_symsz; ksyms_hdr.kh_shdr[SYMTAB].sh_info = ksyms_symsz / sizeof(Elf_Sym); ksyms_hdr.kh_shdr[STRTAB].sh_offset = ksyms_symsz + @@ -979,8 +1008,8 @@ ksymsopen(dev_t dev, int oflags, int dev ksyms_hdr.kh_shdr[SHCTF].sh_offset = ksyms_strsz + ksyms_hdr.kh_shdr[STRTAB].sh_offset; ksyms_hdr.kh_shdr[SHCTF].sh_size = ksyms_ctfsz; - ksyms_isopen = true; - mutex_exit(&ksyms_lock); + ksyms_last_snapshot = TAILQ_LAST(&ksyms_symtabs, ksyms_symtab_queue); +out: mutex_exit(&ksyms_lock); return 0; } @@ -989,25 +1018,35 @@ static int ksymsclose(dev_t dev, int oflags, int devtype, struct lwp *l) { struct ksyms_symtab *st, *next; - bool resize; + TAILQ_HEAD(, ksyms_symtab) to_free = TAILQ_HEAD_INITIALIZER(to_free); + int s; /* Discard references to symbol tables. */ mutex_enter(&ksyms_lock); - ksyms_isopen = false; - resize = false; - for (st = TAILQ_FIRST(&ksyms_symtabs); st != NULL; st = next) { - next = TAILQ_NEXT(st, sd_queue); + if (--ksyms_opencnt) + goto out; + ksyms_last_snapshot = NULL; + TAILQ_FOREACH_SAFE(st, &ksyms_symtabs, sd_queue, next) { if (st->sd_gone) { + /* + * Ensure ddb never witnesses an inconsistent + * state of the queue, unless memory is so + * corrupt that we crash in TAILQ_REMOVE. + */ + s = splhigh(); TAILQ_REMOVE(&ksyms_symtabs, st, sd_queue); - kmem_free(st->sd_nmap, - st->sd_nmapsize * sizeof(uint32_t)); - kmem_free(st, sizeof(*st)); - resize = true; + splx(s); + TAILQ_INSERT_TAIL(&to_free, st, sd_queue); } } - if (resize) + if (!TAILQ_EMPTY(&to_free)) ksyms_sizes_calc(); - mutex_exit(&ksyms_lock); +out: mutex_exit(&ksyms_lock); + + TAILQ_FOREACH_SAFE(st, &to_free, sd_queue, next) { + kmem_free(st->sd_nmap, st->sd_nmapsize * sizeof(uint32_t)); + kmem_free(st, sizeof(*st)); + } return 0; } @@ -1035,9 +1074,9 @@ ksymsread(dev_t dev, struct uio *uio, in * Copy out the symbol table. */ filepos = sizeof(struct ksyms_hdr); - TAILQ_FOREACH(st, &ksyms_symtabs, sd_queue) { - if (__predict_false(st->sd_gone)) - continue; + for (st = TAILQ_FIRST(&ksyms_symtabs); + ; + st = TAILQ_NEXT(st, sd_queue)) { if (uio->uio_resid == 0) return 0; if (uio->uio_offset <= st->sd_symsize + filepos) { @@ -1048,6 +1087,8 @@ ksymsread(dev_t dev, struct uio *uio, in return error; } filepos += st->sd_symsize; + if (st == ksyms_last_snapshot) + break; } /* @@ -1055,9 +1096,9 @@ ksymsread(dev_t dev, struct uio *uio, in */ KASSERT(filepos == sizeof(struct ksyms_hdr) + ksyms_hdr.kh_shdr[SYMTAB].sh_size); - TAILQ_FOREACH(st, &ksyms_symtabs, sd_queue) { - if (__predict_false(st->sd_gone)) - continue; + for (st = TAILQ_FIRST(&ksyms_symtabs); + ; + st = TAILQ_NEXT(st, sd_queue)) { if (uio->uio_resid == 0) return 0; if (uio->uio_offset <= st->sd_strsize + filepos) { @@ -1068,6 +1109,8 @@ ksymsread(dev_t dev, struct uio *uio, in return error; } filepos += st->sd_strsize; + if (st == ksyms_last_snapshot) + break; } /*