When writing to a sysctl string, each write, regardless of VFS position, begins writing the string from the start. This means the contents of the last write to the sysctl controls the string contents instead of the first:
open("/proc/sys/kernel/modprobe", O_WRONLY) = 1 write(1, "AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA"..., 4096) = 4096 write(1, "/bin/true", 9) = 9 close(1) = 0 $ cat /proc/sys/kernel/modprobe /bin/true Expected behaviour would be to have the sysctl be "AAAA..." capped at maxlen (in this case KMOD_PATH_LEN: 256), instead of truncating to the contents of the second write. Similarly, multiple short writes would not append to the sysctl. This provides CONFIG_PROC_SYSCTL_STRICT_WRITES as a way to make this behavior act in a less surprising manner for strings, and disallows non-zero file position when writing numeric sysctls (similar to what is already done when reading from non-zero file positions). Signed-off-by: Kees Cook <keesc...@chromium.org> --- fs/proc/Kconfig | 17 +++++++++++++++++ kernel/sysctl.c | 17 +++++++++++++++++ 2 files changed, 34 insertions(+) diff --git a/fs/proc/Kconfig b/fs/proc/Kconfig index 2183fcf41d59..4a93cf8b7b9f 100644 --- a/fs/proc/Kconfig +++ b/fs/proc/Kconfig @@ -62,6 +62,23 @@ config PROC_SYSCTL building a kernel for install/rescue disks or your system is very limited in memory. +config PROC_SYSCTL_STRICT_WRITES + bool "Perform strict writes to /proc/sys entries" if EXPERT + depends on PROC_SYSCTL + default n + ---help--- + When writing to sysctl entries in /proc, each write syscall + is expected to contain the entire sysctl value. For strings + this means that writes do not append, and the contents of the + buffer will be whatever was written last, regardless of the + file position. + + When PROC_SYSCTL_STRICT_WRITES is enabled, writes to numeric + sysctl entries must always be at file position 0 and the value + must be fully contained in the buffer sent to the write syscall. + For strings, file position is respected, though anything past + the max length of the sysctl buffer will be ignored. + config PROC_PAGE_MONITOR default y depends on PROC_FS && MMU diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 0e08103a69c8..83902ae3e4e6 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -1712,8 +1712,19 @@ static int _proc_do_string(char *data, int maxlen, int write, } if (write) { +#ifdef CONFIG_PROC_SYSCTL_STRICT_WRITES + /* Only continue writes not past the end of buffer. */ + len = strlen(data); + if (len > maxlen - 1) + len = maxlen - 1; + + if (*ppos > len) + return 0; + len = *ppos; +#else /* Start writing from beginning of buffer. */ len = 0; +#endif *ppos += *lenp; p = buffer; while ((p - buffer) < *lenp && len < maxlen - 1) { @@ -1948,6 +1959,8 @@ static int __do_proc_dointvec(void *tbl_data, struct ctl_table *table, conv = do_proc_dointvec_conv; if (write) { + if (IS_ENABLED(CONFIG_PROC_SYSCTL_STRICT_WRITES) && *ppos) + goto out; if (left > PAGE_SIZE - 1) left = PAGE_SIZE - 1; page = __get_free_page(GFP_TEMPORARY); @@ -2005,6 +2018,7 @@ free: return err ? : -EINVAL; } *lenp -= left; +out: *ppos += *lenp; return err; } @@ -2197,6 +2211,8 @@ static int __do_proc_doulongvec_minmax(void *data, struct ctl_table *table, int left = *lenp; if (write) { + if (IS_ENABLED(CONFIG_PROC_SYSCTL_STRICT_WRITES) && *ppos) + goto out; if (left > PAGE_SIZE - 1) left = PAGE_SIZE - 1; page = __get_free_page(GFP_TEMPORARY); @@ -2252,6 +2268,7 @@ free: return err ? : -EINVAL; } *lenp -= left; +out: *ppos += *lenp; return err; } -- 1.7.9.5 -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/