From: Rusty Russell [mailto:[email protected]]
>
>For CONFIG_KALLSYMS, we keep two symbol tables and two string tables.
>There's one full copy, marked SHF_ALLOC and laid out at the end of the
>module's init section.  There's also a cut-down version that only
>contains core symbols and strings, and lives in the module's core
>section.
>
>After module init (and before we free the module memory), we switch
>the mod->symtab, mod->num_symtab and mod->strtab to point to the core
>versions.  We do this under the module_mutex.
>
>However, kallsyms doesn't take the module_mutex: it uses
>preempt_disable() and rcu tricks to walk through the modules, because
>it's used in the oops path.  It's also used in /proc/kallsyms.
>There's nothing atomic about the change of these variables, so we can
>get the old (larger!) num_symtab and the new symtab pointer; in fact
>this is what I saw when trying to reproduce.
>
>By grouping these variables together, we can use a
>carefully-dereferenced pointer to ensure we always get one or the
>other (the free of the module init section is already done in an RCU
>callback, so that's safe).  We allocate the init one at the end of the
>module init section, and keep the core one inside the struct module
>itself (it could also have been allocated at the end of the module
>core, but that's probably overkill).
>
>Reported-by: Weilong Chen <[email protected]>
>Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=111541
>Cc: [email protected]
>Signed-off-by: Rusty Russell <[email protected]>

Looks good to me.

Reviewed-by: Masami Hiramatsu <[email protected]>

Thanks!

>---
> include/linux/module.h |  19 +++++----
> kernel/module.c        | 112 ++++++++++++++++++++++++++++++-------------------
> 2 files changed, 79 insertions(+), 52 deletions(-)
>
>diff --git a/include/linux/module.h b/include/linux/module.h
>index 4560d8f1545d..2bb0c3085706 100644
>--- a/include/linux/module.h
>+++ b/include/linux/module.h
>@@ -324,6 +324,12 @@ struct module_layout {
> #define __module_layout_align
> #endif
>
>+struct mod_kallsyms {
>+      Elf_Sym *symtab;
>+      unsigned int num_symtab;
>+      char *strtab;
>+};
>+
> struct module {
>       enum module_state state;
>
>@@ -405,15 +411,10 @@ struct module {
> #endif
>
> #ifdef CONFIG_KALLSYMS
>-      /*
>-       * We keep the symbol and string tables for kallsyms.
>-       * The core_* fields below are temporary, loader-only (they
>-       * could really be discarded after module init).
>-       */
>-      Elf_Sym *symtab, *core_symtab;
>-      unsigned int num_symtab, core_num_syms;
>-      char *strtab, *core_strtab;
>-
>+      /* Protected by RCU and/or module_mutex: use rcu_dereference() */
>+      struct mod_kallsyms *kallsyms;
>+      struct mod_kallsyms core_kallsyms;
>+
>       /* Section attributes */
>       struct module_sect_attrs *sect_attrs;
>
>diff --git a/kernel/module.c b/kernel/module.c
>index 1e79d8157712..9537da37ce87 100644
>--- a/kernel/module.c
>+++ b/kernel/module.c
>@@ -303,6 +303,9 @@ struct load_info {
>       struct _ddebug *debug;
>       unsigned int num_debug;
>       bool sig_ok;
>+#ifdef CONFIG_KALLSYMS
>+      unsigned long mod_kallsyms_init_off;
>+#endif
>       struct {
>               unsigned int sym, str, mod, vers, info, pcpu;
>       } index;
>@@ -2480,10 +2483,21 @@ static void layout_symtab(struct module *mod, struct 
>load_info *info)
>       strsect->sh_flags |= SHF_ALLOC;
>       strsect->sh_entsize = get_offset(mod, &mod->init_layout.size, strsect,
>                                        info->index.str) | INIT_OFFSET_MASK;
>-      mod->init_layout.size = debug_align(mod->init_layout.size);
>       pr_debug("\t%s\n", info->secstrings + strsect->sh_name);
>+
>+      /* We'll tack temporary mod_kallsyms on the end. */
>+      mod->init_layout.size = ALIGN(mod->init_layout.size,
>+                                    __alignof__(struct mod_kallsyms));
>+      info->mod_kallsyms_init_off = mod->init_layout.size;
>+      mod->init_layout.size += sizeof(struct mod_kallsyms);
>+      mod->init_layout.size = debug_align(mod->init_layout.size);
> }
>
>+/*
>+ * We use the full symtab and strtab which layout_symtab arranged to
>+ * be appended to the init section.  Later we switch to the cut-down
>+ * core-only ones.
>+ */
> static void add_kallsyms(struct module *mod, const struct load_info *info)
> {
>       unsigned int i, ndst;
>@@ -2492,29 +2506,34 @@ static void add_kallsyms(struct module *mod, const 
>struct load_info *info)
>       char *s;
>       Elf_Shdr *symsec = &info->sechdrs[info->index.sym];
>
>-      mod->symtab = (void *)symsec->sh_addr;
>-      mod->num_symtab = symsec->sh_size / sizeof(Elf_Sym);
>+      /* Set up to point into init section. */
>+      mod->kallsyms = mod->init_layout.base + info->mod_kallsyms_init_off;
>+
>+      mod->kallsyms->symtab = (void *)symsec->sh_addr;
>+      mod->kallsyms->num_symtab = symsec->sh_size / sizeof(Elf_Sym);
>       /* Make sure we get permanent strtab: don't use info->strtab. */
>-      mod->strtab = (void *)info->sechdrs[info->index.str].sh_addr;
>+      mod->kallsyms->strtab = (void *)info->sechdrs[info->index.str].sh_addr;
>
>       /* Set types up while we still have access to sections. */
>-      for (i = 0; i < mod->num_symtab; i++)
>-              mod->symtab[i].st_info = elf_type(&mod->symtab[i], info);
>-
>-      mod->core_symtab = dst = mod->core_layout.base + info->symoffs;
>-      mod->core_strtab = s = mod->core_layout.base + info->stroffs;
>-      src = mod->symtab;
>-      for (ndst = i = 0; i < mod->num_symtab; i++) {
>+      for (i = 0; i < mod->kallsyms->num_symtab; i++)
>+              mod->kallsyms->symtab[i].st_info
>+                      = elf_type(&mod->kallsyms->symtab[i], info);
>+
>+      /* Now populate the cut down core kallsyms for after init. */
>+      mod->core_kallsyms.symtab = dst = mod->core_layout.base + info->symoffs;
>+      mod->core_kallsyms.strtab = s = mod->core_layout.base + info->stroffs;
>+      src = mod->kallsyms->symtab;
>+      for (ndst = i = 0; i < mod->kallsyms->num_symtab; i++) {
>               if (i == 0 ||
>                   is_core_symbol(src+i, info->sechdrs, info->hdr->e_shnum,
>                                  info->index.pcpu)) {
>                       dst[ndst] = src[i];
>-                      dst[ndst++].st_name = s - mod->core_strtab;
>-                      s += strlcpy(s, &mod->strtab[src[i].st_name],
>+                      dst[ndst++].st_name = s - mod->core_kallsyms.strtab;
>+                      s += strlcpy(s, &mod->kallsyms->strtab[src[i].st_name],
>                                    KSYM_NAME_LEN) + 1;
>               }
>       }
>-      mod->core_num_syms = ndst;
>+      mod->core_kallsyms.num_symtab = ndst;
> }
> #else
> static inline void layout_symtab(struct module *mod, struct load_info *info)
>@@ -3263,9 +3282,8 @@ static noinline int do_init_module(struct module *mod)
>       module_put(mod);
>       trim_init_extable(mod);
> #ifdef CONFIG_KALLSYMS
>-      mod->num_symtab = mod->core_num_syms;
>-      mod->symtab = mod->core_symtab;
>-      mod->strtab = mod->core_strtab;
>+      /* Switch to core kallsyms now init is done: kallsyms may be walking! */
>+      rcu_assign_pointer(mod->kallsyms, &mod->core_kallsyms);
> #endif
>       mod_tree_remove_init(mod);
>       disable_ro_nx(&mod->init_layout);
>@@ -3627,9 +3645,9 @@ static inline int is_arm_mapping_symbol(const char *str)
>              && (str[2] == '\0' || str[2] == '.');
> }
>
>-static const char *symname(struct module *mod, unsigned int symnum)
>+static const char *symname(struct mod_kallsyms *kallsyms, unsigned int symnum)
> {
>-      return mod->strtab + mod->symtab[symnum].st_name;
>+      return kallsyms->strtab + kallsyms->symtab[symnum].st_name;
> }
>
> static const char *get_ksymbol(struct module *mod,
>@@ -3639,6 +3657,7 @@ static const char *get_ksymbol(struct module *mod,
> {
>       unsigned int i, best = 0;
>       unsigned long nextval;
>+      struct mod_kallsyms *kallsyms = rcu_dereference_sched(mod->kallsyms);
>
>       /* At worse, next value is at end of module */
>       if (within_module_init(addr, mod))
>@@ -3648,32 +3667,32 @@ static const char *get_ksymbol(struct module *mod,
>
>       /* Scan for closest preceding symbol, and next symbol. (ELF
>          starts real symbols at 1). */
>-      for (i = 1; i < mod->num_symtab; i++) {
>-              if (mod->symtab[i].st_shndx == SHN_UNDEF)
>+      for (i = 1; i < kallsyms->num_symtab; i++) {
>+              if (kallsyms->symtab[i].st_shndx == SHN_UNDEF)
>                       continue;
>
>               /* We ignore unnamed symbols: they're uninformative
>                * and inserted at a whim. */
>-              if (*symname(mod, i) == '\0'
>-                  || is_arm_mapping_symbol(symname(mod, i)))
>+              if (*symname(kallsyms, i) == '\0'
>+                  || is_arm_mapping_symbol(symname(kallsyms, i)))
>                       continue;
>
>-              if (mod->symtab[i].st_value <= addr
>-                  && mod->symtab[i].st_value > mod->symtab[best].st_value)
>+              if (kallsyms->symtab[i].st_value <= addr
>+                  && kallsyms->symtab[i].st_value > 
>kallsyms->symtab[best].st_value)
>                       best = i;
>-              if (mod->symtab[i].st_value > addr
>-                  && mod->symtab[i].st_value < nextval)
>-                      nextval = mod->symtab[i].st_value;
>+              if (kallsyms->symtab[i].st_value > addr
>+                  && kallsyms->symtab[i].st_value < nextval)
>+                      nextval = kallsyms->symtab[i].st_value;
>       }
>
>       if (!best)
>               return NULL;
>
>       if (size)
>-              *size = nextval - mod->symtab[best].st_value;
>+              *size = nextval - kallsyms->symtab[best].st_value;
>       if (offset)
>-              *offset = addr - mod->symtab[best].st_value;
>-      return symname(mod, best);
>+              *offset = addr - kallsyms->symtab[best].st_value;
>+      return symname(kallsyms, best);
> }
>
> /* For kallsyms to ask for address resolution.  NULL means not found.  Careful
>@@ -3763,18 +3782,21 @@ int module_get_kallsym(unsigned int symnum, unsigned 
>long *value, char *type,
>
>       preempt_disable();
>       list_for_each_entry_rcu(mod, &modules, list) {
>+              struct mod_kallsyms *kallsyms;
>+
>               if (mod->state == MODULE_STATE_UNFORMED)
>                       continue;
>-              if (symnum < mod->num_symtab) {
>-                      *value = mod->symtab[symnum].st_value;
>-                      *type = mod->symtab[symnum].st_info;
>-                      strlcpy(name, symname(mod, symnum), KSYM_NAME_LEN);
>+              kallsyms = rcu_dereference_sched(mod->kallsyms);
>+              if (symnum < kallsyms->num_symtab) {
>+                      *value = kallsyms->symtab[symnum].st_value;
>+                      *type = kallsyms->symtab[symnum].st_info;
>+                      strlcpy(name, symname(kallsyms, symnum), KSYM_NAME_LEN);
>                       strlcpy(module_name, mod->name, MODULE_NAME_LEN);
>                       *exported = is_exported(name, *value, mod);
>                       preempt_enable();
>                       return 0;
>               }
>-              symnum -= mod->num_symtab;
>+              symnum -= kallsyms->num_symtab;
>       }
>       preempt_enable();
>       return -ERANGE;
>@@ -3783,11 +3805,12 @@ int module_get_kallsym(unsigned int symnum, unsigned 
>long *value, char *type,
> static unsigned long mod_find_symname(struct module *mod, const char *name)
> {
>       unsigned int i;
>+      struct mod_kallsyms *kallsyms = rcu_dereference_sched(mod->kallsyms);
>
>-      for (i = 0; i < mod->num_symtab; i++)
>-              if (strcmp(name, symname(mod, i)) == 0 &&
>-                  mod->symtab[i].st_info != 'U')
>-                      return mod->symtab[i].st_value;
>+      for (i = 0; i < kallsyms->num_symtab; i++)
>+              if (strcmp(name, symname(kallsyms, i)) == 0 &&
>+                  kallsyms->symtab[i].st_info != 'U')
>+                      return kallsyms->symtab[i].st_value;
>       return 0;
> }
>
>@@ -3826,11 +3849,14 @@ int module_kallsyms_on_each_symbol(int (*fn)(void *, 
>const char *,
>       module_assert_mutex();
>
>       list_for_each_entry(mod, &modules, list) {
>+              /* We hold module_mutex: no need for rcu_dereference_sched */
>+              struct mod_kallsyms *kallsyms = mod->kallsyms;
>+
>               if (mod->state == MODULE_STATE_UNFORMED)
>                       continue;
>-              for (i = 0; i < mod->num_symtab; i++) {
>-                      ret = fn(data, symname(mod, i),
>-                               mod, mod->symtab[i].st_value);
>+              for (i = 0; i < kallsyms->num_symtab; i++) {
>+                      ret = fn(data, symname(kallsyms, i),
>+                               mod, kallsyms->symtab[i].st_value);
>                       if (ret != 0)
>                               return ret;
>               }
>--
>2.5.0

Reply via email to