[PATCH v2] selftests: intel_pstate: ftime() is deprecated
Use clock_gettime() instead of deprecated ftime(). aperf.c: In function ‘main’: aperf.c:58:2: warning: ‘ftime’ is deprecated [-Wdeprecated-declarations] 58 | ftime(&before); | ^ In file included from aperf.c:9: /usr/include/sys/timeb.h:39:12: note: declared here 39 | extern int ftime (struct timeb *__timebuf) |^ Signed-off-by: Tommi Rantala --- tools/testing/selftests/intel_pstate/aperf.c | 22 ++-- 1 file changed, 16 insertions(+), 6 deletions(-) v2: define and use NSEC_PER_MSEC and MSEC_PER_SEC diff --git a/tools/testing/selftests/intel_pstate/aperf.c b/tools/testing/selftests/intel_pstate/aperf.c index f6cd03a87493..a8acf3996973 100644 --- a/tools/testing/selftests/intel_pstate/aperf.c +++ b/tools/testing/selftests/intel_pstate/aperf.c @@ -10,8 +10,12 @@ #include #include #include +#include #include "../kselftest.h" +#define MSEC_PER_SEC 1000L +#define NSEC_PER_MSEC 100L + void usage(char *name) { printf ("Usage: %s cpunum\n", name); } @@ -22,7 +26,7 @@ int main(int argc, char **argv) { long long tsc, old_tsc, new_tsc; long long aperf, old_aperf, new_aperf; long long mperf, old_mperf, new_mperf; - struct timeb before, after; + struct timespec before, after; long long int start, finish, total; cpu_set_t cpuset; @@ -55,7 +59,10 @@ int main(int argc, char **argv) { return 1; } - ftime(&before); + if (clock_gettime(CLOCK_MONOTONIC, &before) < 0) { + perror("clock_gettime"); + return 1; + } pread(fd, &old_tsc, sizeof(old_tsc), 0x10); pread(fd, &old_aperf, sizeof(old_mperf), 0xe7); pread(fd, &old_mperf, sizeof(old_aperf), 0xe8); @@ -64,7 +71,10 @@ int main(int argc, char **argv) { sqrt(i); } - ftime(&after); + if (clock_gettime(CLOCK_MONOTONIC, &after) < 0) { + perror("clock_gettime"); + return 1; + } pread(fd, &new_tsc, sizeof(new_tsc), 0x10); pread(fd, &new_aperf, sizeof(new_mperf), 0xe7); pread(fd, &new_mperf, sizeof(new_aperf), 0xe8); @@ -73,11 +83,11 @@ int main(int argc, char **argv) { aperf = new_aperf-old_aperf; mperf = new_mperf-old_mperf; - start = before.time*1000 + before.millitm; - finish = after.time*1000 + after.millitm; + start = before.tv_sec*MSEC_PER_SEC + before.tv_nsec/NSEC_PER_MSEC; + finish = after.tv_sec*MSEC_PER_SEC + after.tv_nsec/NSEC_PER_MSEC; total = finish - start; - printf("runTime: %4.2f\n", 1.0*total/1000); + printf("runTime: %4.2f\n", 1.0*total/MSEC_PER_SEC); printf("freq: %7.0f\n", tsc / (1.0*aperf / (1.0 * mperf)) / total); return 0; } -- 2.26.2
[PATCH] selftests: intel_pstate: ftime() is deprecated
Use clock_gettime() instead of deprecated ftime(). aperf.c: In function ‘main’: aperf.c:58:2: warning: ‘ftime’ is deprecated [-Wdeprecated-declarations] 58 | ftime(&before); | ^ In file included from aperf.c:9: /usr/include/sys/timeb.h:39:12: note: declared here 39 | extern int ftime (struct timeb *__timebuf) |^ Signed-off-by: Tommi Rantala --- tools/testing/selftests/intel_pstate/aperf.c | 17 - 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/tools/testing/selftests/intel_pstate/aperf.c b/tools/testing/selftests/intel_pstate/aperf.c index f6cd03a87493..eea9dbab459b 100644 --- a/tools/testing/selftests/intel_pstate/aperf.c +++ b/tools/testing/selftests/intel_pstate/aperf.c @@ -10,6 +10,7 @@ #include #include #include +#include #include "../kselftest.h" void usage(char *name) { @@ -22,7 +23,7 @@ int main(int argc, char **argv) { long long tsc, old_tsc, new_tsc; long long aperf, old_aperf, new_aperf; long long mperf, old_mperf, new_mperf; - struct timeb before, after; + struct timespec before, after; long long int start, finish, total; cpu_set_t cpuset; @@ -55,7 +56,10 @@ int main(int argc, char **argv) { return 1; } - ftime(&before); + if (clock_gettime(CLOCK_MONOTONIC, &before) < 0) { + perror("clock_gettime"); + return 1; + } pread(fd, &old_tsc, sizeof(old_tsc), 0x10); pread(fd, &old_aperf, sizeof(old_mperf), 0xe7); pread(fd, &old_mperf, sizeof(old_aperf), 0xe8); @@ -64,7 +68,10 @@ int main(int argc, char **argv) { sqrt(i); } - ftime(&after); + if (clock_gettime(CLOCK_MONOTONIC, &after) < 0) { + perror("clock_gettime"); + return 1; + } pread(fd, &new_tsc, sizeof(new_tsc), 0x10); pread(fd, &new_aperf, sizeof(new_mperf), 0xe7); pread(fd, &new_mperf, sizeof(new_aperf), 0xe8); @@ -73,8 +80,8 @@ int main(int argc, char **argv) { aperf = new_aperf-old_aperf; mperf = new_mperf-old_mperf; - start = before.time*1000 + before.millitm; - finish = after.time*1000 + after.millitm; + start = before.tv_sec*1000 + before.tv_nsec/100L; + finish = after.tv_sec*1000 + after.tv_nsec/100L; total = finish - start; printf("runTime: %4.2f\n", 1.0*total/1000); -- 2.26.2
[PATCH] perf test: Implement skip_reason callback for watchpoint tests
Currently reason for skipping the read only watchpoint test is only seen when running in verbose mode: $ perf test watchpoint 23: Watchpoint: 23.1: Read Only Watchpoint: Skip 23.2: Write Only Watchpoint : Ok 23.3: Read / Write Watchpoint : Ok 23.4: Modify Watchpoint : Ok $ perf test -v watchpoint 23: Watchpoint: 23.1: Read Only Watchpoint: --- start --- test child forked, pid 60204 Hardware does not support read only watchpoints. test child finished with -2 Implement skip_reason callback for the watchpoint tests, so that it's easy to see reason why the test is skipped: $ perf test watchpoint 23: Watchpoint: 23.1: Read Only Watchpoint: Skip (missing hardware support) 23.2: Write Only Watchpoint : Ok 23.3: Read / Write Watchpoint : Ok 23.4: Modify Watchpoint : Ok Signed-off-by: Tommi Rantala --- tools/perf/tests/builtin-test.c | 1 + tools/perf/tests/tests.h| 1 + tools/perf/tests/wp.c | 21 +++-- 3 files changed, 17 insertions(+), 6 deletions(-) diff --git a/tools/perf/tests/builtin-test.c b/tools/perf/tests/builtin-test.c index d328caaba45d..3bfad4ee31ae 100644 --- a/tools/perf/tests/builtin-test.c +++ b/tools/perf/tests/builtin-test.c @@ -142,6 +142,7 @@ static struct test generic_tests[] = { .skip_if_fail = false, .get_nr = test__wp_subtest_get_nr, .get_desc = test__wp_subtest_get_desc, + .skip_reason= test__wp_subtest_skip_reason, }, }, { diff --git a/tools/perf/tests/tests.h b/tools/perf/tests/tests.h index 4447a516c689..0630301087a6 100644 --- a/tools/perf/tests/tests.h +++ b/tools/perf/tests/tests.h @@ -66,6 +66,7 @@ int test__bp_signal_overflow(struct test *test, int subtest); int test__bp_accounting(struct test *test, int subtest); int test__wp(struct test *test, int subtest); const char *test__wp_subtest_get_desc(int subtest); +const char *test__wp_subtest_skip_reason(int subtest); int test__wp_subtest_get_nr(void); int test__task_exit(struct test *test, int subtest); int test__mem(struct test *test, int subtest); diff --git a/tools/perf/tests/wp.c b/tools/perf/tests/wp.c index d262d6639829..9387fa76faa5 100644 --- a/tools/perf/tests/wp.c +++ b/tools/perf/tests/wp.c @@ -174,10 +174,12 @@ static bool wp_ro_supported(void) #endif } -static void wp_ro_skip_msg(void) +static const char *wp_ro_skip_msg(void) { #if defined (__x86_64__) || defined (__i386__) - pr_debug("Hardware does not support read only watchpoints.\n"); + return "missing hardware support"; +#else + return NULL; #endif } @@ -185,7 +187,7 @@ static struct { const char *desc; int (*target_func)(void); bool (*is_supported)(void); - void (*skip_msg)(void); + const char *(*skip_msg)(void); } wp_testcase_table[] = { { .desc = "Read Only Watchpoint", @@ -219,16 +221,23 @@ const char *test__wp_subtest_get_desc(int i) return wp_testcase_table[i].desc; } +const char *test__wp_subtest_skip_reason(int i) +{ + if (i < 0 || i >= (int)ARRAY_SIZE(wp_testcase_table)) + return NULL; + if (!wp_testcase_table[i].skip_msg) + return NULL; + return wp_testcase_table[i].skip_msg(); +} + int test__wp(struct test *test __maybe_unused, int i) { if (i < 0 || i >= (int)ARRAY_SIZE(wp_testcase_table)) return TEST_FAIL; if (wp_testcase_table[i].is_supported && - !wp_testcase_table[i].is_supported()) { - wp_testcase_table[i].skip_msg(); + !wp_testcase_table[i].is_supported()) return TEST_SKIP; - } return !wp_testcase_table[i].target_func() ? TEST_OK : TEST_FAIL; } -- 2.26.2
[PATCH] perf tools: Fix crash with non-jited bpf progs
The addr in PERF_RECORD_KSYMBOL events for non-jited bpf progs points to the bpf interpreter, ie. within kernel text section. When processing the unregister event, this causes unexpected removal of vmlinux_map, crashing perf later in cleanup: # perf record -- timeout --signal=INT 2s /usr/share/bcc/tools/execsnoop PCOMMPIDPPID RET ARGS [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.208 MB perf.data (5155 samples) ] perf: tools/include/linux/refcount.h:131: refcount_sub_and_test: Assertion `!(new > val)' failed. Aborted (core dumped) # perf script -D|grep KSYM 0 0xa40 [0x48]: PERF_RECORD_KSYMBOL addr a9b6b530 len 0 type 1 flags 0x0 name bpf_prog_f958f6eb72ef5af6 0 0xab0 [0x48]: PERF_RECORD_KSYMBOL addr a9b6b530 len 0 type 1 flags 0x0 name bpf_prog_8c42dee26e8cd4c2 0 0xb20 [0x48]: PERF_RECORD_KSYMBOL addr a9b6b530 len 0 type 1 flags 0x0 name bpf_prog_f958f6eb72ef5af6 108563691893 0x33d98 [0x58]: PERF_RECORD_KSYMBOL addr a9b6b3b0 len 0 type 1 flags 0x0 name bpf_prog_bc5697a410556fc2_syscall__execve 108568518458 0x34098 [0x58]: PERF_RECORD_KSYMBOL addr a9b6b3f0 len 0 type 1 flags 0x0 name bpf_prog_45e2203c2928704d_do_ret_sys_execve 109301967895 0x34830 [0x58]: PERF_RECORD_KSYMBOL addr a9b6b3b0 len 0 type 1 flags 0x1 name bpf_prog_bc5697a410556fc2_syscall__execve 109302007356 0x348b0 [0x58]: PERF_RECORD_KSYMBOL addr a9b6b3f0 len 0 type 1 flags 0x1 name bpf_prog_45e2203c2928704d_do_ret_sys_execve perf: tools/include/linux/refcount.h:131: refcount_sub_and_test: Assertion `!(new > val)' failed. Here the addresses match the bpf interpreter: # grep -e a9b6b530 -e a9b6b3b0 -e a9b6b3f0 /proc/kallsyms a9b6b3b0 t __bpf_prog_run224 a9b6b3f0 t __bpf_prog_run192 a9b6b530 t __bpf_prog_run32 Fix by not allowing vmlinux_map to be removed by PERF_RECORD_KSYMBOL unregister event. Signed-off-by: Tommi Rantala --- tools/perf/util/machine.c | 11 ++- tools/perf/util/symbol.c | 7 +++ tools/perf/util/symbol.h | 2 ++ 3 files changed, 19 insertions(+), 1 deletion(-) diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c index 85587de027a5..d93d35463c61 100644 --- a/tools/perf/util/machine.c +++ b/tools/perf/util/machine.c @@ -786,11 +786,20 @@ static int machine__process_ksymbol_unregister(struct machine *machine, union perf_event *event, struct perf_sample *sample __maybe_unused) { + struct symbol *sym; struct map *map; map = maps__find(&machine->kmaps, event->ksymbol.addr); - if (map) + if (!map) + return 0; + + if (map != machine->vmlinux_map) maps__remove(&machine->kmaps, map); + else { + sym = dso__find_symbol(map->dso, map->map_ip(map, map->start)); + if (sym) + dso__delete_symbol(map->dso, sym); + } return 0; } diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c index 5151a8c0b791..6bf8e74ea1d1 100644 --- a/tools/perf/util/symbol.c +++ b/tools/perf/util/symbol.c @@ -515,6 +515,13 @@ void dso__insert_symbol(struct dso *dso, struct symbol *sym) } } +void dso__delete_symbol(struct dso *dso, struct symbol *sym) +{ + rb_erase_cached(&sym->rb_node, &dso->symbols); + symbol__delete(sym); + dso__reset_find_symbol_cache(dso); +} + struct symbol *dso__find_symbol(struct dso *dso, u64 addr) { if (dso->last_find_result.addr != addr || dso->last_find_result.symbol == NULL) { diff --git a/tools/perf/util/symbol.h b/tools/perf/util/symbol.h index 03e264a27cd3..60345691db09 100644 --- a/tools/perf/util/symbol.h +++ b/tools/perf/util/symbol.h @@ -130,6 +130,8 @@ int dso__load_kallsyms(struct dso *dso, const char *filename, struct map *map); void dso__insert_symbol(struct dso *dso, struct symbol *sym); +void dso__delete_symbol(struct dso *dso, + struct symbol *sym); struct symbol *dso__find_symbol(struct dso *dso, u64 addr); struct symbol *dso__find_symbol_by_name(struct dso *dso, const char *name); -- 2.26.2
[PATCH 12/13] selftests: clone3: use SKIP instead of XFAIL
XFAIL is gone since 9847d24af95c ("selftests/harness: Refactor XFAIL into SKIP"), use SKIP instead. Fixes: 9847d24af95c ("selftests/harness: Refactor XFAIL into SKIP") Signed-off-by: Tommi Rantala --- tools/testing/selftests/clone3/clone3_cap_checkpoint_restore.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/clone3/clone3_cap_checkpoint_restore.c b/tools/testing/selftests/clone3/clone3_cap_checkpoint_restore.c index 9562425aa0a9..614091de4c54 100644 --- a/tools/testing/selftests/clone3/clone3_cap_checkpoint_restore.c +++ b/tools/testing/selftests/clone3/clone3_cap_checkpoint_restore.c @@ -145,7 +145,7 @@ TEST(clone3_cap_checkpoint_restore) test_clone3_supported(); EXPECT_EQ(getuid(), 0) - XFAIL(return, "Skipping all tests as non-root\n"); + SKIP(return, "Skipping all tests as non-root"); memset(&set_tid, 0, sizeof(set_tid)); -- 2.26.2
[PATCH 06/13] selftests: pidfd: skip test on kcmp() ENOSYS
Skip test if kcmp() is not available, for example if kernel is compiled without CONFIG_CHECKPOINT_RESTORE=y. Signed-off-by: Tommi Rantala --- tools/testing/selftests/pidfd/pidfd_getfd_test.c | 5 - 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/tools/testing/selftests/pidfd/pidfd_getfd_test.c b/tools/testing/selftests/pidfd/pidfd_getfd_test.c index 7758c98be015..0930e2411dfb 100644 --- a/tools/testing/selftests/pidfd/pidfd_getfd_test.c +++ b/tools/testing/selftests/pidfd/pidfd_getfd_test.c @@ -204,7 +204,10 @@ TEST_F(child, fetch_fd) fd = sys_pidfd_getfd(self->pidfd, self->remote_fd, 0); ASSERT_GE(fd, 0); - EXPECT_EQ(0, sys_kcmp(getpid(), self->pid, KCMP_FILE, fd, self->remote_fd)); + ret = sys_kcmp(getpid(), self->pid, KCMP_FILE, fd, self->remote_fd); + if (ret < 0 && errno == ENOSYS) + SKIP(return, "kcmp() syscall not supported"); + EXPECT_EQ(ret, 0); ret = fcntl(fd, F_GETFD); ASSERT_GE(ret, 0); -- 2.26.2
[PATCH 10/13] selftests: proc: fix warning: _GNU_SOURCE redefined
Makefile already contains -D_GNU_SOURCE, so we can remove it from the *.c files. Signed-off-by: Tommi Rantala --- tools/testing/selftests/proc/proc-loadavg-001.c | 1 - tools/testing/selftests/proc/proc-self-syscall.c | 1 - tools/testing/selftests/proc/proc-uptime-002.c | 1 - 3 files changed, 3 deletions(-) diff --git a/tools/testing/selftests/proc/proc-loadavg-001.c b/tools/testing/selftests/proc/proc-loadavg-001.c index 471e2aa28077..fb4fe9188806 100644 --- a/tools/testing/selftests/proc/proc-loadavg-001.c +++ b/tools/testing/selftests/proc/proc-loadavg-001.c @@ -14,7 +14,6 @@ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */ /* Test that /proc/loadavg correctly reports last pid in pid namespace. */ -#define _GNU_SOURCE #include #include #include diff --git a/tools/testing/selftests/proc/proc-self-syscall.c b/tools/testing/selftests/proc/proc-self-syscall.c index 9f6d000c0245..8511dcfe67c7 100644 --- a/tools/testing/selftests/proc/proc-self-syscall.c +++ b/tools/testing/selftests/proc/proc-self-syscall.c @@ -13,7 +13,6 @@ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */ -#define _GNU_SOURCE #include #include #include diff --git a/tools/testing/selftests/proc/proc-uptime-002.c b/tools/testing/selftests/proc/proc-uptime-002.c index 30e2b7849089..e7ceabed7f51 100644 --- a/tools/testing/selftests/proc/proc-uptime-002.c +++ b/tools/testing/selftests/proc/proc-uptime-002.c @@ -15,7 +15,6 @@ */ // Test that values in /proc/uptime increment monotonically // while shifting across CPUs. -#define _GNU_SOURCE #undef NDEBUG #include #include -- 2.26.2
[PATCH 08/13] selftests: pidfd: drop needless linux/kcmp.h inclusion in pidfd_setns_test.c
kcmp is not used in pidfd_setns_test.c, so do not include Signed-off-by: Tommi Rantala --- tools/testing/selftests/pidfd/pidfd_setns_test.c | 1 - 1 file changed, 1 deletion(-) diff --git a/tools/testing/selftests/pidfd/pidfd_setns_test.c b/tools/testing/selftests/pidfd/pidfd_setns_test.c index 7dca1aa4672d..3f3dc7a02a01 100644 --- a/tools/testing/selftests/pidfd/pidfd_setns_test.c +++ b/tools/testing/selftests/pidfd/pidfd_setns_test.c @@ -16,7 +16,6 @@ #include #include #include -#include #include "pidfd.h" #include "../clone3/clone3_selftests.h" -- 2.26.2
[PATCH 07/13] selftests: pidfd: add CONFIG_CHECKPOINT_RESTORE=y to config
kcmp syscall is used in pidfd_getfd_test.c, so add CONFIG_CHECKPOINT_RESTORE=y to config to ensure kcmp is available. Signed-off-by: Tommi Rantala --- tools/testing/selftests/pidfd/config | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/testing/selftests/pidfd/config b/tools/testing/selftests/pidfd/config index bb11de90c0c9..f6f2965e17af 100644 --- a/tools/testing/selftests/pidfd/config +++ b/tools/testing/selftests/pidfd/config @@ -4,3 +4,4 @@ CONFIG_USER_NS=y CONFIG_PID_NS=y CONFIG_NET_NS=y CONFIG_CGROUPS=y +CONFIG_CHECKPOINT_RESTORE=y -- 2.26.2
[PATCH 11/13] selftests: core: use SKIP instead of XFAIL in close_range_test.c
XFAIL is gone since 9847d24af95c ("selftests/harness: Refactor XFAIL into SKIP"), use SKIP instead. Fixes: 9847d24af95c ("selftests/harness: Refactor XFAIL into SKIP") Signed-off-by: Tommi Rantala --- tools/testing/selftests/core/close_range_test.c | 8 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tools/testing/selftests/core/close_range_test.c b/tools/testing/selftests/core/close_range_test.c index c99b98b0d461..575b391ddc78 100644 --- a/tools/testing/selftests/core/close_range_test.c +++ b/tools/testing/selftests/core/close_range_test.c @@ -44,7 +44,7 @@ TEST(close_range) fd = open("/dev/null", O_RDONLY | O_CLOEXEC); ASSERT_GE(fd, 0) { if (errno == ENOENT) - XFAIL(return, "Skipping test since /dev/null does not exist"); + SKIP(return, "Skipping test since /dev/null does not exist"); } open_fds[i] = fd; @@ -52,7 +52,7 @@ TEST(close_range) EXPECT_EQ(-1, sys_close_range(open_fds[0], open_fds[100], -1)) { if (errno == ENOSYS) - XFAIL(return, "close_range() syscall not supported"); + SKIP(return, "close_range() syscall not supported"); } EXPECT_EQ(0, sys_close_range(open_fds[0], open_fds[50], 0)); @@ -108,7 +108,7 @@ TEST(close_range_unshare) fd = open("/dev/null", O_RDONLY | O_CLOEXEC); ASSERT_GE(fd, 0) { if (errno == ENOENT) - XFAIL(return, "Skipping test since /dev/null does not exist"); + SKIP(return, "Skipping test since /dev/null does not exist"); } open_fds[i] = fd; @@ -197,7 +197,7 @@ TEST(close_range_unshare_capped) fd = open("/dev/null", O_RDONLY | O_CLOEXEC); ASSERT_GE(fd, 0) { if (errno == ENOENT) - XFAIL(return, "Skipping test since /dev/null does not exist"); + SKIP(return, "Skipping test since /dev/null does not exist"); } open_fds[i] = fd; -- 2.26.2
[PATCH 09/13] selftests: android: fix multiple definition of sock_name
Fix multiple definition of sock_name compilation error: tools/testing/selftests/android/ion/ipcsocket.h:8: multiple definition of `sock_name' Signed-off-by: Tommi Rantala --- tools/testing/selftests/android/ion/ipcsocket.c | 1 + tools/testing/selftests/android/ion/ipcsocket.h | 2 -- 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/tools/testing/selftests/android/ion/ipcsocket.c b/tools/testing/selftests/android/ion/ipcsocket.c index 7dc521002095..67ec69410d2e 100644 --- a/tools/testing/selftests/android/ion/ipcsocket.c +++ b/tools/testing/selftests/android/ion/ipcsocket.c @@ -10,6 +10,7 @@ #include "ipcsocket.h" +static char sock_name[MAX_SOCK_NAME_LEN]; int opensocket(int *sockfd, const char *name, int connecttype) { diff --git a/tools/testing/selftests/android/ion/ipcsocket.h b/tools/testing/selftests/android/ion/ipcsocket.h index b3e84498a8a1..ec5efb23e7b0 100644 --- a/tools/testing/selftests/android/ion/ipcsocket.h +++ b/tools/testing/selftests/android/ion/ipcsocket.h @@ -5,8 +5,6 @@ #define MAX_SOCK_NAME_LEN 64 -char sock_name[MAX_SOCK_NAME_LEN]; - /* This structure is responsible for holding the IPC data * data: hold the buffer fd * len: just the length of 32-bit integer fd -- 2.26.2
[PATCH 04/13] selftests/harness: prettify SKIP message whitespace again
Commit 9847d24af95c ("selftests/harness: Refactor XFAIL into SKIP") replaced XFAIL with SKIP in the output. Add one more space to make the output aligned and pretty again. Fixes: 9847d24af95c ("selftests/harness: Refactor XFAIL into SKIP") Signed-off-by: Tommi Rantala --- tools/testing/selftests/kselftest_harness.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/kselftest_harness.h b/tools/testing/selftests/kselftest_harness.h index 4f78e4805633..d8f44f4bdb3f 100644 --- a/tools/testing/selftests/kselftest_harness.h +++ b/tools/testing/selftests/kselftest_harness.h @@ -126,7 +126,7 @@ snprintf(_metadata->results->reason, \ sizeof(_metadata->results->reason), fmt, ##__VA_ARGS__); \ if (TH_LOG_ENABLED) { \ - fprintf(TH_LOG_STREAM, "# SKIP %s\n", \ + fprintf(TH_LOG_STREAM, "# SKIP %s\n", \ _metadata->results->reason); \ } \ _metadata->passed = 1; \ -- 2.26.2
[PATCH 00/13] selftests fixes
Hi, small fixes to issues I hit with selftests. Tommi Rantala (13): selftests: filter kselftest headers from command in lib.mk selftests: pidfd: fix compilation errors due to wait.h selftests: add vmaccess to .gitignore selftests/harness: prettify SKIP message whitespace again selftests: pidfd: use ksft_test_result_skip() when skipping test selftests: pidfd: skip test on kcmp() ENOSYS selftests: pidfd: add CONFIG_CHECKPOINT_RESTORE=y to config selftests: pidfd: drop needless linux/kcmp.h inclusion in pidfd_setns_test.c selftests: android: fix multiple definition of sock_name selftests: proc: fix warning: _GNU_SOURCE redefined selftests: core: use SKIP instead of XFAIL in close_range_test.c selftests: clone3: use SKIP instead of XFAIL selftests: binderfs: use SKIP instead of XFAIL tools/testing/selftests/android/ion/ipcsocket.c | 1 + tools/testing/selftests/android/ion/ipcsocket.h | 2 -- .../selftests/clone3/clone3_cap_checkpoint_restore.c | 2 +- tools/testing/selftests/core/close_range_test.c | 8 .../selftests/filesystems/binderfs/binderfs_test.c| 8 tools/testing/selftests/kselftest_harness.h | 2 +- tools/testing/selftests/lib.mk| 2 +- tools/testing/selftests/pidfd/config | 1 + tools/testing/selftests/pidfd/pidfd_getfd_test.c | 5 - tools/testing/selftests/pidfd/pidfd_open_test.c | 1 - tools/testing/selftests/pidfd/pidfd_poll_test.c | 1 - tools/testing/selftests/pidfd/pidfd_setns_test.c | 1 - tools/testing/selftests/pidfd/pidfd_test.c| 2 +- tools/testing/selftests/proc/proc-loadavg-001.c | 1 - tools/testing/selftests/proc/proc-self-syscall.c | 1 - tools/testing/selftests/proc/proc-uptime-002.c| 1 - tools/testing/selftests/ptrace/.gitignore | 1 + 17 files changed, 19 insertions(+), 21 deletions(-) -- 2.26.2
[PATCH 13/13] selftests: binderfs: use SKIP instead of XFAIL
XFAIL is gone since 9847d24af95c ("selftests/harness: Refactor XFAIL into SKIP"), use SKIP instead. Fixes: 9847d24af95c ("selftests/harness: Refactor XFAIL into SKIP") Signed-off-by: Tommi Rantala --- .../selftests/filesystems/binderfs/binderfs_test.c| 8 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tools/testing/selftests/filesystems/binderfs/binderfs_test.c b/tools/testing/selftests/filesystems/binderfs/binderfs_test.c index 1d27f52c61e6..477cbb042f5b 100644 --- a/tools/testing/selftests/filesystems/binderfs/binderfs_test.c +++ b/tools/testing/selftests/filesystems/binderfs/binderfs_test.c @@ -74,7 +74,7 @@ static int __do_binderfs_test(struct __test_metadata *_metadata) ret = mount(NULL, binderfs_mntpt, "binder", 0, 0); EXPECT_EQ(ret, 0) { if (errno == ENODEV) - XFAIL(goto out, "binderfs missing"); + SKIP(goto out, "binderfs missing"); TH_LOG("%s - Failed to mount binderfs", strerror(errno)); goto rmdir; } @@ -475,10 +475,10 @@ TEST(binderfs_stress) TEST(binderfs_test_privileged) { if (geteuid() != 0) - XFAIL(return, "Tests are not run as root. Skipping privileged tests"); + SKIP(return, "Tests are not run as root. Skipping privileged tests"); if (__do_binderfs_test(_metadata)) - XFAIL(return, "The Android binderfs filesystem is not available"); + SKIP(return, "The Android binderfs filesystem is not available"); } TEST(binderfs_test_unprivileged) @@ -511,7 +511,7 @@ TEST(binderfs_test_unprivileged) ret = wait_for_pid(pid); if (ret) { if (ret == 2) - XFAIL(return, "The Android binderfs filesystem is not available"); + SKIP(return, "The Android binderfs filesystem is not available"); ASSERT_EQ(ret, 0) { TH_LOG("wait_for_pid() failed"); } -- 2.26.2
[PATCH 05/13] selftests: pidfd: use ksft_test_result_skip() when skipping test
There's planned tests != run tests in pidfd_test when some test is skipped: $ ./pidfd_test TAP version 13 1..8 [...] # pidfd_send_signal signal recycled pid test: Skipping test # Planned tests != run tests (8 != 7) # Totals: pass:7 fail:0 xfail:0 xpass:0 skip:0 error:0 Fix by using ksft_test_result_skip(): $ ./pidfd_test TAP version 13 1..8 [...] ok 8 # SKIP pidfd_send_signal signal recycled pid test: Unsharing pid namespace not permitted # Totals: pass:7 fail:0 xfail:0 xpass:0 skip:1 error:0 Signed-off-by: Tommi Rantala --- tools/testing/selftests/pidfd/pidfd_test.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/pidfd/pidfd_test.c b/tools/testing/selftests/pidfd/pidfd_test.c index c585aaa2acd8..529eb700ac26 100644 --- a/tools/testing/selftests/pidfd/pidfd_test.c +++ b/tools/testing/selftests/pidfd/pidfd_test.c @@ -330,7 +330,7 @@ static int test_pidfd_send_signal_recycled_pid_fail(void) ksft_exit_fail_msg("%s test: Failed to recycle pid %d\n", test_name, PID_RECYCLE); case PIDFD_SKIP: - ksft_print_msg("%s test: Skipping test\n", test_name); + ksft_test_result_skip("%s test: Skipping test\n", test_name); ret = 0; break; case PIDFD_XFAIL: -- 2.26.2
[PATCH 02/13] selftests: pidfd: fix compilation errors due to wait.h
Drop unneeded header inclusion to fix pidfd compilation errors seen in Fedora 32: In file included from pidfd_open_test.c:9: ../../../../usr/include/linux/wait.h:17:16: error: expected identifier before numeric constant 17 | #define P_ALL 0 |^ Signed-off-by: Tommi Rantala --- tools/testing/selftests/pidfd/pidfd_open_test.c | 1 - tools/testing/selftests/pidfd/pidfd_poll_test.c | 1 - 2 files changed, 2 deletions(-) diff --git a/tools/testing/selftests/pidfd/pidfd_open_test.c b/tools/testing/selftests/pidfd/pidfd_open_test.c index b9fe75fc3e51..8a59438ccc78 100644 --- a/tools/testing/selftests/pidfd/pidfd_open_test.c +++ b/tools/testing/selftests/pidfd/pidfd_open_test.c @@ -6,7 +6,6 @@ #include #include #include -#include #include #include #include diff --git a/tools/testing/selftests/pidfd/pidfd_poll_test.c b/tools/testing/selftests/pidfd/pidfd_poll_test.c index 4b115444dfe9..610811275357 100644 --- a/tools/testing/selftests/pidfd/pidfd_poll_test.c +++ b/tools/testing/selftests/pidfd/pidfd_poll_test.c @@ -3,7 +3,6 @@ #define _GNU_SOURCE #include #include -#include #include #include #include -- 2.26.2
[PATCH 01/13] selftests: filter kselftest headers from command in lib.mk
Commit 1056d3d2c97e ("selftests: enforce local header dependency in lib.mk") added header dependency to the rule, but as the rule uses $^, the headers are added to the compiler command line. This can cause unexpected precompiled header files being generated when compilation fails: $ echo { >> openat2_test.c $ make gcc -Wall -O2 -g -fsanitize=address -fsanitize=undefined openat2_test.c tools/testing/selftests/kselftest_harness.h tools/testing/selftests/kselftest.h helpers.c -o tools/testing/selftests/openat2/openat2_test openat2_test.c:313:1: error: expected identifier or ‘(’ before ‘{’ token 313 | { | ^ make: *** [../lib.mk:140: tools/testing/selftests/openat2/openat2_test] Error 1 $ file openat2_test* openat2_test: GCC precompiled header (version 014) for C openat2_test.c: C source, ASCII text Fix it by filtering out the headers, so that we'll only pass the actual *.c files in the compiler command line. Fixes: 1056d3d2c97e ("selftests: enforce local header dependency in lib.mk") Signed-off-by: Tommi Rantala --- tools/testing/selftests/lib.mk | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/lib.mk b/tools/testing/selftests/lib.mk index 7a17ea815736..66f3317dc365 100644 --- a/tools/testing/selftests/lib.mk +++ b/tools/testing/selftests/lib.mk @@ -137,7 +137,7 @@ endif ifeq ($(OVERRIDE_TARGETS),) LOCAL_HDRS := $(selfdir)/kselftest_harness.h $(selfdir)/kselftest.h $(OUTPUT)/%:%.c $(LOCAL_HDRS) - $(LINK.c) $^ $(LDLIBS) -o $@ + $(LINK.c) $(filter-out $(LOCAL_HDRS),$^) $(LDLIBS) -o $@ $(OUTPUT)/%.o:%.S $(COMPILE.S) $^ -o $@ -- 2.26.2
[PATCH 03/13] selftests: add vmaccess to .gitignore
Commit 2de4e82318c7 ("selftests/ptrace: add test cases for dead-locks") added vmaccess testcase, add the binary to .gitignore Fixes: 2de4e82318c7 ("selftests/ptrace: add test cases for dead-locks") Signed-off-by: Tommi Rantala --- tools/testing/selftests/ptrace/.gitignore | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/testing/selftests/ptrace/.gitignore b/tools/testing/selftests/ptrace/.gitignore index 7bebf9534a86..792318aaa30c 100644 --- a/tools/testing/selftests/ptrace/.gitignore +++ b/tools/testing/selftests/ptrace/.gitignore @@ -1,3 +1,4 @@ # SPDX-License-Identifier: GPL-2.0-only get_syscall_info peeksiginfo +vmaccess -- 2.26.2
[tip: perf/core] perf bench: Fix div-by-zero if runtime is zero
The following commit has been merged into the perf/core branch of tip: Commit-ID: 41e7c32b978974adaadd4808ba42f9026634dca3 Gitweb: https://git.kernel.org/tip/41e7c32b978974adaadd4808ba42f9026634dca3 Author:Tommi Rantala AuthorDate:Fri, 17 Apr 2020 16:23:29 +03:00 Committer: Arnaldo Carvalho de Melo CommitterDate: Wed, 22 Apr 2020 10:01:33 -03:00 perf bench: Fix div-by-zero if runtime is zero Fix div-by-zero if runtime is zero: $ perf bench futex hash --runtime=0 # Running 'futex/hash' benchmark: Run summary [PID 12090]: 4 threads, each operating on 1024 [private] futexes for 0 secs. Floating point exception (core dumped) Signed-off-by: Tommi Rantala Acked-by: Jiri Olsa Cc: Alexander Shishkin Cc: Darren Hart Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lore.kernel.org/lkml/20200417132330.119407-4-tommi.t.rant...@nokia.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/bench/epoll-wait.c| 3 ++- tools/perf/bench/futex-hash.c| 3 ++- tools/perf/bench/futex-lock-pi.c | 3 ++- 3 files changed, 6 insertions(+), 3 deletions(-) diff --git a/tools/perf/bench/epoll-wait.c b/tools/perf/bench/epoll-wait.c index f938c58..cf79736 100644 --- a/tools/perf/bench/epoll-wait.c +++ b/tools/perf/bench/epoll-wait.c @@ -519,7 +519,8 @@ int bench_epoll_wait(int argc, const char **argv) qsort(worker, nthreads, sizeof(struct worker), cmpworker); for (i = 0; i < nthreads; i++) { - unsigned long t = worker[i].ops / bench__runtime.tv_sec; + unsigned long t = bench__runtime.tv_sec > 0 ? + worker[i].ops / bench__runtime.tv_sec : 0; update_stats(&throughput_stats, t); diff --git a/tools/perf/bench/futex-hash.c b/tools/perf/bench/futex-hash.c index 65eebe0..915bf3d 100644 --- a/tools/perf/bench/futex-hash.c +++ b/tools/perf/bench/futex-hash.c @@ -205,7 +205,8 @@ int bench_futex_hash(int argc, const char **argv) pthread_mutex_destroy(&thread_lock); for (i = 0; i < nthreads; i++) { - unsigned long t = worker[i].ops / bench__runtime.tv_sec; + unsigned long t = bench__runtime.tv_sec > 0 ? + worker[i].ops / bench__runtime.tv_sec : 0; update_stats(&throughput_stats, t); if (!silent) { if (nfutexes == 1) diff --git a/tools/perf/bench/futex-lock-pi.c b/tools/perf/bench/futex-lock-pi.c index 89fd8f3..bb25d8b 100644 --- a/tools/perf/bench/futex-lock-pi.c +++ b/tools/perf/bench/futex-lock-pi.c @@ -211,7 +211,8 @@ int bench_futex_lock_pi(int argc, const char **argv) pthread_mutex_destroy(&thread_lock); for (i = 0; i < nthreads; i++) { - unsigned long t = worker[i].ops / bench__runtime.tv_sec; + unsigned long t = bench__runtime.tv_sec > 0 ? + worker[i].ops / bench__runtime.tv_sec : 0; update_stats(&throughput_stats, t); if (!silent)
[tip: perf/core] perf cgroup: Avoid needless closing of unopened fd
The following commit has been merged into the perf/core branch of tip: Commit-ID: d2e7d8636fb7d3e30aa8f894003f9e293ea62eea Gitweb: https://git.kernel.org/tip/d2e7d8636fb7d3e30aa8f894003f9e293ea62eea Author:Tommi Rantala AuthorDate:Fri, 17 Apr 2020 16:23:26 +03:00 Committer: Arnaldo Carvalho de Melo CommitterDate: Wed, 22 Apr 2020 10:01:33 -03:00 perf cgroup: Avoid needless closing of unopened fd Do not bother with close() if fd is not valid, just to silence valgrind: $ valgrind ./perf script ==59169== Memcheck, a memory error detector ==59169== Copyright (C) 2002-2017, and GNU GPL'd, by Julian Seward et al. ==59169== Using Valgrind-3.14.0 and LibVEX; rerun with -h for copyright info ==59169== Command: ./perf script ==59169== ==59169== Warning: invalid file descriptor -1 in syscall close() ==59169== Warning: invalid file descriptor -1 in syscall close() ==59169== Warning: invalid file descriptor -1 in syscall close() ==59169== Warning: invalid file descriptor -1 in syscall close() ==59169== Warning: invalid file descriptor -1 in syscall close() ==59169== Warning: invalid file descriptor -1 in syscall close() ==59169== Warning: invalid file descriptor -1 in syscall close() ==59169== Warning: invalid file descriptor -1 in syscall close() Signed-off-by: Tommi Rantala Acked-by: Jiri Olsa Cc: Alexander Shishkin Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lore.kernel.org/lkml/20200417132330.119407-1-tommi.t.rant...@nokia.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/cgroup.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tools/perf/util/cgroup.c b/tools/perf/util/cgroup.c index b73fb78..050dea9 100644 --- a/tools/perf/util/cgroup.c +++ b/tools/perf/util/cgroup.c @@ -107,7 +107,8 @@ found: static void cgroup__delete(struct cgroup *cgroup) { - close(cgroup->fd); + if (cgroup->fd >= 0) + close(cgroup->fd); zfree(&cgroup->name); free(cgroup); }
[tip: perf/core] perf test session topology: Fix data path
The following commit has been merged into the perf/core branch of tip: Commit-ID: dbd660e6b2884b864d2642d930a163d3bcebe4be Gitweb: https://git.kernel.org/tip/dbd660e6b2884b864d2642d930a163d3bcebe4be Author:Tommi Rantala AuthorDate:Thu, 23 Apr 2020 14:53:40 +03:00 Committer: Arnaldo Carvalho de Melo CommitterDate: Thu, 23 Apr 2020 11:08:24 -03:00 perf test session topology: Fix data path Commit 2d4f27999b88 ("perf data: Add global path holder") missed path conversion in tests/topology.c, causing the "Session topology" testcase to "hang" (waits forever for input from stdin) when doing "ssh $VM perf test". Can be reproduced by running "cat | perf test topo", and crashed by replacing cat with true: $ true | perf test -v topo 40: Session topology : --- start --- test child forked, pid 3638 templ file: /tmp/perf-test-QPvAch incompatible file format incompatible file format (rerun with -v to learn more) free(): invalid pointer test child interrupted end Session topology: FAILED! Committer testing: Reproduced the above result before the patch and after it is back working: # true | perf test -v topo 41: Session topology : --- start --- test child forked, pid 19374 templ file: /tmp/perf-test-YOTEQg CPU 0, core 0, socket 0 CPU 1, core 1, socket 0 CPU 2, core 2, socket 0 CPU 3, core 3, socket 0 CPU 4, core 0, socket 0 CPU 5, core 1, socket 0 CPU 6, core 2, socket 0 CPU 7, core 3, socket 0 test child finished with 0 end Session topology: Ok # Fixes: 2d4f27999b88 ("perf data: Add global path holder") Signed-off-by: Tommi Rantala Tested-by: Arnaldo Carvalho de Melo Acked-by: Jiri Olsa Cc: Alexander Shishkin Cc: Mamatha Inamdar Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Ravi Bangoria Link: http://lore.kernel.org/lkml/20200423115341.562782-1-tommi.t.rant...@nokia.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/topology.c | 12 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/tools/perf/tests/topology.c b/tools/perf/tests/topology.c index 4a80049..22daf2b 100644 --- a/tools/perf/tests/topology.c +++ b/tools/perf/tests/topology.c @@ -33,10 +33,8 @@ static int session_write_header(char *path) { struct perf_session *session; struct perf_data data = { - .file = { - .path = path, - }, - .mode = PERF_DATA_MODE_WRITE, + .path = path, + .mode = PERF_DATA_MODE_WRITE, }; session = perf_session__new(&data, false, NULL); @@ -63,10 +61,8 @@ static int check_cpu_topology(char *path, struct perf_cpu_map *map) { struct perf_session *session; struct perf_data data = { - .file = { - .path = path, - }, - .mode = PERF_DATA_MODE_READ, + .path = path, + .mode = PERF_DATA_MODE_READ, }; int i;
[tip:perf/core] perf tests shell: Skip trace+probe_vfs_getname.sh if built without trace support
Commit-ID: 83244772a4cf9490a54182be2f65f45d6b1a1ee8 Gitweb: https://git.kernel.org/tip/83244772a4cf9490a54182be2f65f45d6b1a1ee8 Author: Tommi Rantala AuthorDate: Fri, 15 Feb 2019 15:42:46 +0200 Committer: Arnaldo Carvalho de Melo CommitDate: Fri, 15 Feb 2019 13:42:26 -0300 perf tests shell: Skip trace+probe_vfs_getname.sh if built without trace support If perf was built without trace support, the trace+probe_vfs_getname.sh 'perf test' entry fails: # perf trace -h perf: 'trace' is not a perf-command. See 'perf --help' # perf test 64 64: Check open filename arg using perf trace + vfs_getname: FAILED! Check trace support, so that we'll skip the test in that case: # perf test 64 64: Check open filename arg using perf trace + vfs_getname: Skip Signed-off-by: Tommi Rantala Cc: Alexander Shishkin Cc: Hendrik Brueckner Cc: Jiri Olsa Cc: Kim Phillips Cc: Michael Petlan Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20190215134253.11454-1-tt.rant...@gmail.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/shell/lib/probe.sh | 5 + tools/perf/tests/shell/trace+probe_vfs_getname.sh | 1 + 2 files changed, 6 insertions(+) diff --git a/tools/perf/tests/shell/lib/probe.sh b/tools/perf/tests/shell/lib/probe.sh index 6293cc660947..e37787be672b 100644 --- a/tools/perf/tests/shell/lib/probe.sh +++ b/tools/perf/tests/shell/lib/probe.sh @@ -4,3 +4,8 @@ skip_if_no_perf_probe() { perf probe 2>&1 | grep -q 'is not a perf-command' && return 2 return 0 } + +skip_if_no_perf_trace() { + perf trace -h 2>&1 | grep -q -e 'is not a perf-command' -e 'trace command not available' && return 2 + return 0 +} diff --git a/tools/perf/tests/shell/trace+probe_vfs_getname.sh b/tools/perf/tests/shell/trace+probe_vfs_getname.sh index 50109f27ca07..147efeb6b195 100755 --- a/tools/perf/tests/shell/trace+probe_vfs_getname.sh +++ b/tools/perf/tests/shell/trace+probe_vfs_getname.sh @@ -12,6 +12,7 @@ . $(dirname $0)/lib/probe.sh skip_if_no_perf_probe || exit 2 +skip_if_no_perf_trace || exit 2 . $(dirname $0)/lib/probe_vfs_getname.sh
[PATCH] perf tests shell: Skip trace+probe_vfs_getname.sh if built without trace support
From: Tommi Rantala If perf was built without trace support, trace+probe_vfs_getname.sh fails: # perf trace -h perf: 'trace' is not a perf-command. See 'perf --help' # perf test 64 64: Check open filename arg using perf trace + vfs_getname: FAILED! Check trace support, so that we'll skip the test: # perf test 64 64: Check open filename arg using perf trace + vfs_getname: Skip Signed-off-by: Tommi Rantala --- tools/perf/tests/shell/lib/probe.sh | 5 + tools/perf/tests/shell/trace+probe_vfs_getname.sh | 1 + 2 files changed, 6 insertions(+) diff --git a/tools/perf/tests/shell/lib/probe.sh b/tools/perf/tests/shell/lib/probe.sh index 6293cc660947..e37787be672b 100644 --- a/tools/perf/tests/shell/lib/probe.sh +++ b/tools/perf/tests/shell/lib/probe.sh @@ -4,3 +4,8 @@ skip_if_no_perf_probe() { perf probe 2>&1 | grep -q 'is not a perf-command' && return 2 return 0 } + +skip_if_no_perf_trace() { + perf trace -h 2>&1 | grep -q -e 'is not a perf-command' -e 'trace command not available' && return 2 + return 0 +} diff --git a/tools/perf/tests/shell/trace+probe_vfs_getname.sh b/tools/perf/tests/shell/trace+probe_vfs_getname.sh index 50109f27ca07..147efeb6b195 100755 --- a/tools/perf/tests/shell/trace+probe_vfs_getname.sh +++ b/tools/perf/tests/shell/trace+probe_vfs_getname.sh @@ -12,6 +12,7 @@ . $(dirname $0)/lib/probe.sh skip_if_no_perf_probe || exit 2 +skip_if_no_perf_trace || exit 2 . $(dirname $0)/lib/probe_vfs_getname.sh -- 2.20.1
perf trace --no-syscalls -e rcu:* -- garbage in output
Hi, There some garbage in perf trace output, when tracing some rcu tracepoints (kernel is configured with CONFIG_RCU_TRACE=y). For example in rcu:rcu_callback, instead of getting proper rcuname in the first "%s" here, there's garbage: $ tail -1 /sys/kernel/debug/tracing/events/rcu/rcu_callback/format print fmt: "%s rhp=%p func=%pf %ld/%ld", REC->rcuname, REC->rhp, REC->func, REC->qlen_lazy, REC->qlen $ ./perf trace --no-syscalls -e 'rcu:*' -- sleep 1 2>&1 | od -t c 000 0 . 0 0 0 r c u : r 020 c u _ c a l l b a c k : 200 351 345 215 040 377 377 377 377 r h p = 0 x f f f f 8 060 d 5 5 8 f 5 b 0 1 0 0 f u n c 100 = f i l e _ f r e e _ r c u 0 120 / 1 \n 0 . 0 0 8 r c 140 u : r c u _ d y n t i c k : 373 217 160 276 215 377 377 377 377 4 0 0 0 0 0 0 0 0 200 0 0 0 0 0 0 0 4 0 0 0 0 0 0 0 220 0 0 0 0 0 0 0 2 0 x 1 d 2 \n 240 0 . 0 2 3 r c u : r c 260 u _ d y n t i c k : 023 220 276 215 377 377 300 377 377 4 0 0 0 0 0 0 0 0 0 0 0 0 320 0 0 2 4 0 0 0 0 0 0 0 0 0 0 0 340 0 0 0 0 0 x 1 d 2 \n 360 0 . 0 2 8 r c u : r c u _ d y 400 n t i c k : 373 217 276 215 377 377 377 377 4 420 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 440 4 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 460 0 x 1 d 2 \n 0 . 0 3 [...] If I got it right, "pevent" in print_str_arg() is zero-initialized (pevent->long_size is zero...), causing "%s" format to produce garbage bytes instead of the proper string. -Tommi
Re: backporting "ext4: inplace xattr block update fails to deduplicate blocks" to LTS kernels?
On 21.02.2018 17:56, Theodore Ts'o wrote: On Wed, Feb 21, 2018 at 12:40:00PM +0100, Greg Kroah-Hartman wrote: On Mon, Feb 19, 2018 at 03:26:37PM +0200, Tommi Rantala wrote: OK to backport it? I tested it briefly in 4.9, seems to work. It looks sane, but it would be nice if I can get people who are backporting ext4 patches to make sure there are no regressions using one of kvm-xfstests[1] or gce-xfstests[2][3]. [1] https://github.com/tytso/xfstests-bld/blob/master/Documentation/kvm-xfstests.md [2] https://github.com/tytso/xfstests-bld/blob/master/Documentation/gce-xfstests.md [3] https://thunk.org/gce-xfstests I do run regression tests[4] on stable kernels when I have time, but it scales much better when other people can help. [4] https://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4.git/tag/?h=ext4-4.9.54-1 I need an ack from the ext4 maintainers before I can take this... Greg, you can go ahead and take this, but in the future I'd appreciate it if ext4 backporters could at least run a smoke test (which takes less than 15 minutes on GCE) before and after the patch, and report no test regressions. Thanks for the instructions! Smoke test results 4.9.82 with and without the patch (attached, to avoid email client mangling it), no new failures: Summary report KERNEL:kernel 4.9.82-xfstests #2 SMP Thu Feb 22 14:58:27 EET 2018 x86_64 CPUS: 2 MEM: 1989.2 ext4/4k: 271 tests, 7 failures, 34 skipped, 737 seconds Failures: generic/081 generic/383 generic/384 generic/386 generic/441 generic/451 generic/472 Totals: 271 tests, 34 skipped, 7 failures, 0 errors, 685s Summary report KERNEL:kernel 4.9.82-xfstests-1-gb98ae0251413 #1 SMP Thu Feb 22 14:31:01 EET 2018 x86_64 CPUS: 2 MEM: 1989.2 ext4/4k: 271 tests, 7 failures, 34 skipped, 749 seconds Failures: generic/081 generic/383 generic/384 generic/386 generic/441 generic/451 generic/472 Totals: 271 tests, 34 skipped, 7 failures, 0 errors, 694s FSTESTVER: e2fsprogs v1.43.6-85-g7595699d0 (Wed, 6 Sep 2017 22:04:14 -0400) FSTESTVER: fio fio-3.2 (Fri, 3 Nov 2017 15:23:49 -0600) FSTESTVER: quota 4d81e8b (Mon, 16 Oct 2017 09:42:44 +0200) FSTESTVER: stress-ng 977ae35 (Wed, 6 Sep 2017 23:45:03 -0400) FSTESTVER: xfsprogs v4.14.0-rc2-1-g19ca9b0b (Mon, 27 Nov 2017 10:56:21 -0600) FSTESTVER: xfstests-bld ff7b8c2 (Wed, 13 Dec 2017 21:24:24 -0500) FSTESTVER: xfstests linux-v3.8-1832-gafeee2d9 (Sun, 31 Dec 2017 13:35:28 -0500) FSTESTCFG: 4k FSTESTSET: -g quick FSTESTOPT: aex Tommi >From b98ae025141361b9e92fdd470dfd2314a64a47d0 Mon Sep 17 00:00:00 2001 From: Tahsin Erdogan Date: Sat, 5 Aug 2017 22:41:42 -0400 Subject: [PATCH] ext4: inplace xattr block update fails to deduplicate blocks commit ec00022030da5761518476096626338bd67df57a upstream. When an xattr block has a single reference, block is updated inplace and it is reinserted to the cache. Later, a cache lookup is performed to see whether an existing block has the same contents. This cache lookup will most of the time return the just inserted entry so deduplication is not achieved. Running the following test script will produce two xattr blocks which can be observed in "File ACL: " line of debugfs output: mke2fs -b 1024 -I 128 -F -O extent /dev/sdb 1G mount /dev/sdb /mnt/sdb touch /mnt/sdb/{x,y} setfattr -n user.1 -v aaa /mnt/sdb/x setfattr -n user.2 -v bbb /mnt/sdb/x setfattr -n user.1 -v aaa /mnt/sdb/y setfattr -n user.2 -v bbb /mnt/sdb/y debugfs -R 'stat x' /dev/sdb | cat debugfs -R 'stat y' /dev/sdb | cat This patch defers the reinsertion to the cache so that we can locate other blocks with the same contents. Signed-off-by: Tahsin Erdogan Signed-off-by: Theodore Ts'o Reviewed-by: Andreas Dilger Signed-off-by: Tommi Rantala --- fs/ext4/xattr.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c index 3eeed8f0aa06..3fadfabcac39 100644 --- a/fs/ext4/xattr.c +++ b/fs/ext4/xattr.c @@ -837,8 +837,6 @@ ext4_xattr_block_set(handle_t *handle, struct inode *inode, if (!IS_LAST_ENTRY(s->first)) ext4_xattr_rehash(header(s->base), s->here); -ext4_xattr_cache_insert(ext4_mb_cache, - bs->bh); } ext4_xattr_block_csum_set(inode, bs->bh); unlock_buffer(bs->bh); @@ -959,6 +957,7 @@ ext4_xattr_block_set(handle_t *handle, struct inode *inode, } else if (bs->bh && s->base == bs->bh->b_data) { /* We were modifying this block in-place. */ ea_bdebug(bs->bh, "keeping this block"); + ext4_xattr_cache_insert(ext4_mb_cache, bs->bh); new_bh = bs->bh; get_bh(new_bh); } else { -- 2.14.3
Re: net: hang in unregister_netdevice: waiting for lo to become free
On 20.02.2018 18:26, Neil Horman wrote: On Tue, Feb 20, 2018 at 09:14:41AM +0100, Dmitry Vyukov wrote: On Tue, Feb 20, 2018 at 8:56 AM, Tommi Rantala wrote: On 19.02.2018 20:59, Dmitry Vyukov wrote: Is this meant to be fixed already? I am still seeing this on the latest upstream tree. These two commits are in v4.16-rc1: commit 4a31a6b19f9ddf498c81f5c9b089742b7472a6f8 Author: Tommi Rantala Date: Mon Feb 5 21:48:14 2018 +0200 sctp: fix dst refcnt leak in sctp_v4_get_dst ... Fixes: 410f03831 ("sctp: add routing output fallback") Fixes: 0ca50d12f ("sctp: fix src address selection if using secondary addresses") commit 957d761cf91cdbb175ad7d8f5472336a4d54dbf2 Author: Alexey Kodanev Date: Mon Feb 5 15:10:35 2018 +0300 sctp: fix dst refcnt leak in sctp_v6_get_dst() ... Fixes: dbc2b5e9a09e ("sctp: fix src address selection if using secondary addresses for ipv6") I guess we missed something if it's still reproducible. I can check it later this week, unless someone else beat me to it. Hi Tommi, Hmmm, I can't claim that it's exactly the same bug. Perhaps it's another one then. But I am still seeing these: [ 58.799130] unregister_netdevice: waiting for lo to become free. Usage count = 4 [ 60.847138] unregister_netdevice: waiting for lo to become free. Usage count = 4 [ 62.895093] unregister_netdevice: waiting for lo to become free. Usage count = 4 [ 64.943103] unregister_netdevice: waiting for lo to become free. Usage count = 4 on upstream tree pulled ~12 hours ago. Can you write a systemtap script to probe dev_hold, and dev_put, printing out a backtrace if the device name matches "lo". That should tell us definitively if the problem is in the same location or not Hi Dmitry, I tested with the reproducer and the kernel .config file that you sent in the first email in this thread: With 4.16-rc2 unable to reproduce. With 4.15-rc9 bug reproducible, and I get "unregister_netdevice: waiting for lo to become free. Usage count = 3" With 4.15-rc9 and Alexey's "sctp: fix dst refcnt leak in sctp_v6_get_dst()" cherry-picked on top, unable to reproduce. Is syzkaller doing something else now to trigger the bug...? Can you still trigger the bug with the same reproducer? Tommi
Re: net: hang in unregister_netdevice: waiting for lo to become free
On 19.02.2018 20:59, Dmitry Vyukov wrote: On Sat, Feb 3, 2018 at 1:15 PM, Xin Long wrote: On 1/30/18 1:57 PM, David Ahern wrote: On 1/30/18 1:08 PM, Daniel Borkmann wrote: On 01/30/2018 07:32 PM, Cong Wang wrote: On Tue, Jan 30, 2018 at 4:09 AM, Dmitry Vyukov wrote: Hello, The following program creates a hang in unregister_netdevice. cleanup_net work hangs there forever periodically printing "unregister_netdevice: waiting for lo to become free. Usage count = 3" and creation of any new network namespaces hangs forever. Interestingly, this is not reproducible on net-next. The most recent change on netns refcnt was 4ee806d51176 ("net: tcp: close sock if net namespace is exiting") in net/net-next from 5 days ago, maybe fixed due to that? This appears to be the commit introducing the refcnt leak: $ git bisect bad dbc2b5e9a09e9a6664679a667ff81cff6e5f2641 is the first bad commit commit dbc2b5e9a09e9a6664679a667ff81cff6e5f2641 Author: Xin Long Date: Fri May 12 14:39:52 2017 +0800 sctp: fix src address selection if using secondary addresses for ipv6 v4.14 is bad. Running bisect in the background while doing other things Interesting. The commit that avoids the refcnt leak is commit 955ec4cb3b54c7c389a9f830be7d3ae2056b9212 Author: David Ahern Date: Wed Jan 24 19:45:29 2018 -0800 net/ipv6: Do not allow route add with a device that is down That commit does not intentionally address the problem so it is just masking the problematic code introduced by the commit above. Thanks, David A. I'm still on a trip. will look into this asap. Alexey and Tommi already had the patches for this issue on both SCTP v4 and v6 dst_get, Thanks. Is this meant to be fixed already? I am still seeing this on the latest upstream tree. These two commits are in v4.16-rc1: commit 4a31a6b19f9ddf498c81f5c9b089742b7472a6f8 Author: Tommi Rantala Date: Mon Feb 5 21:48:14 2018 +0200 sctp: fix dst refcnt leak in sctp_v4_get_dst ... Fixes: 410f03831 ("sctp: add routing output fallback") Fixes: 0ca50d12f ("sctp: fix src address selection if using secondary addresses") commit 957d761cf91cdbb175ad7d8f5472336a4d54dbf2 Author: Alexey Kodanev Date: Mon Feb 5 15:10:35 2018 +0300 sctp: fix dst refcnt leak in sctp_v6_get_dst() ... Fixes: dbc2b5e9a09e ("sctp: fix src address selection if using secondary addresses for ipv6") I guess we missed something if it's still reproducible. I can check it later this week, unless someone else beat me to it. Tommi
backporting "ext4: inplace xattr block update fails to deduplicate blocks" to LTS kernels?
Hi, 4.9 (and earlier) LTS kernels are missing this: commit ec00022030da5761518476096626338bd67df57a Author: Tahsin Erdogan Date: Sat Aug 5 22:41:42 2017 -0400 ext4: inplace xattr block update fails to deduplicate blocks OK to backport it? I tested it briefly in 4.9, seems to work. One of our testers noticed a glusterfs performance regression when going from 4.4 to 4.9, caused by the duplicated blocks. In I understand everything correctly, in 4.4 mbcache uses the block number in the hash table bucket calculation, and the hash table is populated quite evenly even if there are duplicates. So the mbcache is fast. But in later kernels mbcache puts all the duplicate entries into a single bucket. As the entries are stored in one big linked list, this obviously makes the mbcache slow. I tested this in 4.9 (which still has the ext4_xattr_rehash() call that got eliminated in commit "ext4: eliminate xattr entry e_hash recalculation for removes"): diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c index 3eeed8f0aa06..3fadfabcac39 100644 --- a/fs/ext4/xattr.c +++ b/fs/ext4/xattr.c @@ -837,8 +837,6 @@ ext4_xattr_block_set(handle_t *handle, struct inode *inode, if (!IS_LAST_ENTRY(s->first)) ext4_xattr_rehash(header(s->base), s->here); - ext4_xattr_cache_insert(ext4_mb_cache, - bs->bh); } ext4_xattr_block_csum_set(inode, bs->bh); unlock_buffer(bs->bh); @@ -959,6 +957,7 @@ ext4_xattr_block_set(handle_t *handle, struct inode *inode, } else if (bs->bh && s->base == bs->bh->b_data) { /* We were modifying this block in-place. */ ea_bdebug(bs->bh, "keeping this block"); + ext4_xattr_cache_insert(ext4_mb_cache, bs->bh); new_bh = bs->bh; get_bh(new_bh); } else { Tommi
intel_pmu_init() extra_attr memory leak
Hi, I'm seeing this kmemleak report in v4.15-rc4: # cat /sys/kernel/debug/kmemleak unreferenced object 0x8801f3d5d720 (size 64): comm "swapper/0", pid 1, jiffies 4294667312 (age 2687.423s) hex dump (first 32 bytes): 60 d1 41 ad ff ff ff ff 20 d1 41 ad ff ff ff ff `.A. .A. 80 d0 41 ad ff ff ff ff 40 d0 41 ad ff ff ff ff ..A.@.A. backtrace: [] intel_pmu_init+0x1844/0x1d38 [ ] init_hw_perf_events+0x8c/0x66f [ ] do_one_initcall+0x7b/0x1d0 [<8ee1f02a>] kernel_init_freeable+0x163/0x2f9 [ ] kernel_init+0xf/0x120 [<38a99264>] ret_from_fork+0x24/0x30 [ ] 0x $ ./scripts/faddr2line vmlinux intel_pmu_init+0x1844/0x1d38 intel_pmu_init+0x1844/0x1d38: intel_pmu_init at arch/x86/events/intel/core.c:4296 Which matches line: extra_attr = merge_attr(extra_attr, skl_format_attr); So looks like "extra_attr" is leaked here. "git blame" points to this commit: commit a5df70c354c26e20d5fd8eb64517f724e97ef0b2 Author: Andi Kleen Date: Tue Aug 22 11:52:00 2017 -0700 perf/x86: Only show format attributes when supported -Tommi
Re: [PATCH net v2] tipc: call tipc_rcv() only if bearer is up in tipc_udp_recv()
On 01.12.2017 15:18, Ying Xue wrote: On 11/30/2017 08:32 PM, Tommi Rantala wrote: In my opinion, the real root cause of the issue is because we too early set a not-yet-initialized bearer instance to ub->bearer through rcu_assign_pointer(ub->bearer, b) in tipc_udp_enable(). Instead if we assign the bearer pointer at the end of tipc_udp_enable() where the bearer has been completed the initialization, the issue would be avoided. Hi, sorry, I fail to see how that helps. bearer->tolerance is only initialized in tipc_enable_bearer() after calling m->enable_media() ie. tipc_udp_enable(). So even if we do "rcu_assign_pointer(ub->bearer, b)" later in tipc_udp_enable(), bearer->tolerance will still be uninitialized, and the crash can happen. Sorry, I missed the point that b->tolerance is not uninitialized when we assign bearer pointer to ub->bearer later. But in my view the issue happened is because we enable media too early. So it's better to change the code as belows: Thanks, looks good to me! Tested in 4.4 (which does not have b->up), and this fixes the oops also there. -Tommi diff --git a/net/tipc/bearer.c b/net/tipc/bearer.c index 47ec121..ec6f02a 100644 --- a/net/tipc/bearer.c +++ b/net/tipc/bearer.c @@ -320,19 +320,18 @@ static int tipc_enable_bearer(struct net *net, const char *name, strcpy(b->name, name); b->media = m; - res = m->enable_media(net, b, attr); - if (res) { - pr_warn("Bearer <%s> rejected, enable failure (%d)\n", - name, -res); - return -EINVAL; - } - b->identity = bearer_id; b->tolerance = m->tolerance; b->window = m->window; b->domain = disc_domain; b->net_plane = bearer_id + 'A'; b->priority = priority; + res = m->enable_media(net, b, attr); + if (res) { + pr_warn("Bearer <%s> rejected, enable failure (%d)\n", + name, -res); + return -EINVAL; + } test_and_set_bit_lock(0, &b->up);
Re: [PATCH net v2] tipc: call tipc_rcv() only if bearer is up in tipc_udp_recv()
On 30.11.2017 12:57, Ying Xue wrote: On 11/29/2017 06:48 PM, Tommi Rantala wrote: Remove the second tipc_rcv() call in tipc_udp_recv(). We have just checked that the bearer is not up, and calling tipc_rcv() with a bearer that is not up leads to a TIPC div-by-zero crash in tipc_node_calculate_timer(). The crash is rare in practice, but can happen like this: In my opinion, the real root cause of the issue is because we too early set a not-yet-initialized bearer instance to ub->bearer through rcu_assign_pointer(ub->bearer, b) in tipc_udp_enable(). Instead if we assign the bearer pointer at the end of tipc_udp_enable() where the bearer has been completed the initialization, the issue would be avoided. Hi, sorry, I fail to see how that helps. bearer->tolerance is only initialized in tipc_enable_bearer() after calling m->enable_media() ie. tipc_udp_enable(). So even if we do "rcu_assign_pointer(ub->bearer, b)" later in tipc_udp_enable(), bearer->tolerance will still be uninitialized, and the crash can happen. BR, Tommi Thanks, Ying We're enabling a bearer, but it's not yet up and fully initialized. At the same time we receive a discovery packet, and in tipc_udp_recv() we end up calling tipc_rcv() with the not-yet-initialized bearer, causing later the div-by-zero crash in tipc_node_calculate_timer(). Jon Maloy explains the impact of removing the second tipc_rcv() call: "link setup in the worst case will be delayed until the next arriving discovery messages, 1 sec later, and this is an acceptable delay." As the tipc_rcv() call is removed, just leave the function via the rcu_out label, so that we will kfree_skb(). [ 12.590450] Own node address <1.1.1>, network identity 1 [ 12.668088] divide error: [#1] SMP [ 12.676952] CPU: 2 PID: 0 Comm: swapper/2 Not tainted 4.14.2-dirty #1 [ 12.679225] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.10.2-2.fc27 04/01/2014 [ 12.682095] task: 8c2a761edb80 task.stack: a41cc0cac000 [ 12.684087] RIP: 0010:tipc_node_calculate_timer.isra.12+0x45/0x60 [tipc] [ 12.686486] RSP: 0018:8c2a7fc838a0 EFLAGS: 00010246 [ 12.688451] RAX: RBX: 8c2a5b382600 RCX: [ 12.691197] RDX: RSI: 8c2a5b382600 RDI: 8c2a5b382600 [ 12.693945] RBP: 8c2a7fc838b0 R08: 0001 R09: 0001 [ 12.696632] R10: R11: R12: 8c2a5d8949d8 [ 12.699491] R13: 95ede400 R14: R15: 8c2a5d894800 [ 12.702338] FS: () GS:8c2a7fc8() knlGS: [ 12.705099] CS: 0010 DS: ES: CR0: 80050033 [ 12.706776] CR2: 01bb9440 CR3: bd009001 CR4: 003606e0 [ 12.708847] DR0: DR1: DR2: [ 12.711016] DR3: DR6: fffe0ff0 DR7: 0400 [ 12.712627] Call Trace: [ 12.713390] [ 12.714011] tipc_node_check_dest+0x2e8/0x350 [tipc] [ 12.715286] tipc_disc_rcv+0x14d/0x1d0 [tipc] [ 12.716370] tipc_rcv+0x8b0/0xd40 [tipc] [ 12.717396] ? minmax_running_min+0x2f/0x60 [ 12.718248] ? dst_alloc+0x4c/0xa0 [ 12.718964] ? tcp_ack+0xaf1/0x10b0 [ 12.719658] ? tipc_udp_is_known_peer+0xa0/0xa0 [tipc] [ 12.720634] tipc_udp_recv+0x71/0x1d0 [tipc] [ 12.721459] ? dst_alloc+0x4c/0xa0 [ 12.722130] udp_queue_rcv_skb+0x264/0x490 [ 12.722924] __udp4_lib_rcv+0x21e/0x990 [ 12.723670] ? ip_route_input_rcu+0x2dd/0xbf0 [ 12.724442] ? tcp_v4_rcv+0x958/0xa40 [ 12.725039] udp_rcv+0x1a/0x20 [ 12.725587] ip_local_deliver_finish+0x97/0x1d0 [ 12.726323] ip_local_deliver+0xaf/0xc0 [ 12.726959] ? ip_route_input_noref+0x19/0x20 [ 12.727689] ip_rcv_finish+0xdd/0x3b0 [ 12.728307] ip_rcv+0x2ac/0x360 [ 12.728839] __netif_receive_skb_core+0x6fb/0xa90 [ 12.729580] ? udp4_gro_receive+0x1a7/0x2c0 [ 12.730274] __netif_receive_skb+0x1d/0x60 [ 12.730953] ? __netif_receive_skb+0x1d/0x60 [ 12.731637] netif_receive_skb_internal+0x37/0xd0 [ 12.732371] napi_gro_receive+0xc7/0xf0 [ 12.732920] receive_buf+0x3c3/0xd40 [ 12.733441] virtnet_poll+0xb1/0x250 [ 12.733944] net_rx_action+0x23e/0x370 [ 12.734476] __do_softirq+0xc5/0x2f8 [ 12.734922] irq_exit+0xfa/0x100 [ 12.735315] do_IRQ+0x4f/0xd0 [ 12.735680] common_interrupt+0xa2/0xa2 [ 12.736126] [ 12.736416] RIP: 0010:native_safe_halt+0x6/0x10 [ 12.736925] RSP: 0018:a41cc0cafe90 EFLAGS: 0246 ORIG_RAX: ff4d [ 12.737756] RAX: RBX: 8c2a761edb80 RCX: [ 12.738504] RDX: RSI: RDI: [ 12.739258] RBP: a41cc0cafe90 R08: 014b5b9795e5 R09: a41cc12c7e88 [ 12.740118] R10: R11: R12: 0002 [ 12.740964] R13: 8c2a761edb80 R14
[PATCH net v2] tipc: call tipc_rcv() only if bearer is up in tipc_udp_recv()
ff8200 (relocation range: 0x8000-0xbfff) [ 12.751215] Rebooting in 60 seconds.. Fixes: c9b64d492b1f ("tipc: add replicast peer discovery") Signed-off-by: Tommi Rantala Cc: Jon Maloy --- v2: Resorted to a minimal fix, as per feedback from David M. net/tipc/udp_media.c | 4 1 file changed, 4 deletions(-) diff --git a/net/tipc/udp_media.c b/net/tipc/udp_media.c index ecca64fc6a6f..3deabcab4882 100644 --- a/net/tipc/udp_media.c +++ b/net/tipc/udp_media.c @@ -371,10 +371,6 @@ static int tipc_udp_recv(struct sock *sk, struct sk_buff *skb) goto rcu_out; } - tipc_rcv(sock_net(sk), skb, b); - rcu_read_unlock(); - return 0; - rcu_out: rcu_read_unlock(); out: -- 2.14.3
Re: [PATCH] tipc: call tipc_rcv() only if bearer is up in tipc_udp_recv()
On 28.11.2017 16:58, David Miller wrote: From: Tommi Rantala Date: Tue, 28 Nov 2017 14:53:15 +0200 - - if (unlikely(msg_user(hdr) == LINK_CONFIG)) { - err = tipc_udp_rcast_disc(b, skb); - if (err) - goto rcu_out; + } else { + if (unlikely(b && msg_user(hdr) == LINK_CONFIG)) + tipc_udp_rcast_disc(b, skb); + kfree_skb(skb); } Either put the 'err' propagation back or clearly explain in your commit log message why this part of the change if absolutely essential for this bug fix. Thank you. Thanks for the feedback. I'll post patch v2 soon. -Tommi
[PATCH] tipc: call tipc_rcv() only if bearer is up in tipc_udp_recv()
Call tipc_rcv() only if bearer is up in tipc_udp_recv(). Fixes a rare TIPC div-by-zero crash in tipc_node_calculate_timer(): We're enabling a bearer, but it's not yet up and fully initialized. At the same time we receive a discovery packet, and in tipc_udp_recv() we end up calling tipc_rcv() with the not-yet-initialized bearer, causing later a div-by-zero crash in tipc_node_calculate_timer(). [ 12.590450] Own node address <1.1.1>, network identity 1 [ 12.668088] divide error: [#1] SMP [ 12.676952] CPU: 2 PID: 0 Comm: swapper/2 Not tainted 4.14.2-dirty #1 [ 12.679225] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.10.2-2.fc27 04/01/2014 [ 12.682095] task: 8c2a761edb80 task.stack: a41cc0cac000 [ 12.684087] RIP: 0010:tipc_node_calculate_timer.isra.12+0x45/0x60 [tipc] [ 12.686486] RSP: 0018:8c2a7fc838a0 EFLAGS: 00010246 [ 12.688451] RAX: RBX: 8c2a5b382600 RCX: [ 12.691197] RDX: RSI: 8c2a5b382600 RDI: 8c2a5b382600 [ 12.693945] RBP: 8c2a7fc838b0 R08: 0001 R09: 0001 [ 12.696632] R10: R11: R12: 8c2a5d8949d8 [ 12.699491] R13: 95ede400 R14: R15: 8c2a5d894800 [ 12.702338] FS: () GS:8c2a7fc8() knlGS: [ 12.705099] CS: 0010 DS: ES: CR0: 80050033 [ 12.706776] CR2: 01bb9440 CR3: bd009001 CR4: 003606e0 [ 12.708847] DR0: DR1: DR2: [ 12.711016] DR3: DR6: fffe0ff0 DR7: 0400 [ 12.712627] Call Trace: [ 12.713390] [ 12.714011] tipc_node_check_dest+0x2e8/0x350 [tipc] [ 12.715286] tipc_disc_rcv+0x14d/0x1d0 [tipc] [ 12.716370] tipc_rcv+0x8b0/0xd40 [tipc] [ 12.717396] ? minmax_running_min+0x2f/0x60 [ 12.718248] ? dst_alloc+0x4c/0xa0 [ 12.718964] ? tcp_ack+0xaf1/0x10b0 [ 12.719658] ? tipc_udp_is_known_peer+0xa0/0xa0 [tipc] [ 12.720634] tipc_udp_recv+0x71/0x1d0 [tipc] [ 12.721459] ? dst_alloc+0x4c/0xa0 [ 12.722130] udp_queue_rcv_skb+0x264/0x490 [ 12.722924] __udp4_lib_rcv+0x21e/0x990 [ 12.723670] ? ip_route_input_rcu+0x2dd/0xbf0 [ 12.724442] ? tcp_v4_rcv+0x958/0xa40 [ 12.725039] udp_rcv+0x1a/0x20 [ 12.725587] ip_local_deliver_finish+0x97/0x1d0 [ 12.726323] ip_local_deliver+0xaf/0xc0 [ 12.726959] ? ip_route_input_noref+0x19/0x20 [ 12.727689] ip_rcv_finish+0xdd/0x3b0 [ 12.728307] ip_rcv+0x2ac/0x360 [ 12.728839] __netif_receive_skb_core+0x6fb/0xa90 [ 12.729580] ? udp4_gro_receive+0x1a7/0x2c0 [ 12.730274] __netif_receive_skb+0x1d/0x60 [ 12.730953] ? __netif_receive_skb+0x1d/0x60 [ 12.731637] netif_receive_skb_internal+0x37/0xd0 [ 12.732371] napi_gro_receive+0xc7/0xf0 [ 12.732920] receive_buf+0x3c3/0xd40 [ 12.733441] virtnet_poll+0xb1/0x250 [ 12.733944] net_rx_action+0x23e/0x370 [ 12.734476] __do_softirq+0xc5/0x2f8 [ 12.734922] irq_exit+0xfa/0x100 [ 12.735315] do_IRQ+0x4f/0xd0 [ 12.735680] common_interrupt+0xa2/0xa2 [ 12.736126] [ 12.736416] RIP: 0010:native_safe_halt+0x6/0x10 [ 12.736925] RSP: 0018:a41cc0cafe90 EFLAGS: 0246 ORIG_RAX: ff4d [ 12.737756] RAX: RBX: 8c2a761edb80 RCX: [ 12.738504] RDX: RSI: RDI: [ 12.739258] RBP: a41cc0cafe90 R08: 014b5b9795e5 R09: a41cc12c7e88 [ 12.740118] R10: R11: R12: 0002 [ 12.740964] R13: 8c2a761edb80 R14: R15: [ 12.741831] default_idle+0x2a/0x100 [ 12.742323] arch_cpu_idle+0xf/0x20 [ 12.742796] default_idle_call+0x28/0x40 [ 12.743312] do_idle+0x179/0x1f0 [ 12.743761] cpu_startup_entry+0x1d/0x20 [ 12.744291] start_secondary+0x112/0x120 [ 12.744816] secondary_startup_64+0xa5/0xa5 [ 12.745367] Code: b9 f4 01 00 00 48 89 c2 48 c1 ea 02 48 3d d3 07 00 00 48 0f 47 d1 49 8b 0c 24 48 39 d1 76 07 49 89 14 24 48 89 d1 31 d2 48 89 df <48> f7 f1 89 c6 e8 81 6e ff ff 5b 41 5c 5d c3 66 90 66 2e 0f 1f [ 12.747527] RIP: tipc_node_calculate_timer.isra.12+0x45/0x60 [tipc] RSP: 8c2a7fc838a0 [ 12.748555] ---[ end trace 1399ab83390650fd ]--- [ 12.749296] Kernel panic - not syncing: Fatal exception in interrupt [ 12.750123] Kernel Offset: 0x1320 from 0x8200 (relocation range: 0x8000-0xbfff) [ 12.751215] Rebooting in 60 seconds.. Fixes: c9b64d492b1f ("tipc: add replicast peer discovery") Signed-off-by: Tommi Rantala --- net/tipc/udp_media.c | 29 +++-- 1 file changed, 7 insertions(+), 22 deletions(-) diff --git a/net/tipc/udp_media.c b/net/tipc/udp_media.c index ecca64fc6a6f..599e7be92024 100644 --- a/net/tipc/udp_media.c +++ b/net/tipc/udp_media.c @@ -344,42 +344,27 @@
tipc_node_calculate_timer div-by-zero
Hi, I'm seeing a rare TIPC div-by-zero crash in tipc_node_calculate_timer(). If I get it right, we're receiving a discovery packet while enabling a bearer. The bearer is not yet fully initialized, causing the discovery packet processing to use zero tolerance value, which then causes the div-by-zero in tipc_node_calculate_timer(). I can reproduce the issue easily by adding some sleep in tipc_enable_bearer(), and the machine always crashes after booting it up: diff --git a/net/tipc/bearer.c b/net/tipc/bearer.c index 47ec121574ce..00bdd87cd614 100644 --- a/net/tipc/bearer.c +++ b/net/tipc/bearer.c @@ -327,12 +327,16 @@ static int tipc_enable_bearer(struct net *net, const char *name, return -EINVAL; } + pr_warn("HACK: sleeping 2 seconds!"); + usleep_range(200, 210); + b->identity = bearer_id; b->tolerance = m->tolerance; b->window = m->window; In 4.14 removing the latter tipc_rcv() call in tipc_udp_recv() [which was added in in commit c9b64d492b1f (tipc: add replicast peer discovery)] seems to fix the crash, but I do not really understand the code: diff --git a/net/tipc/udp_media.c b/net/tipc/udp_media.c index ecca64fc6a6f..371653acf1f6 100644 --- a/net/tipc/udp_media.c +++ b/net/tipc/udp_media.c @@ -371,9 +371,11 @@ static int tipc_udp_recv(struct sock *sk, struct sk_buff *skb) goto rcu_out; } + /* tipc_rcv(sock_net(sk), skb, b); rcu_read_unlock(); return 0; + */ rcu_out: rcu_read_unlock(); I have tested 4.4 and 4.14, and the bug is reproducible in both versions. Comments? -Tommi [ 12.576885] tipc: Activated (version 2.0.0) [ 12.577506] NET: Registered protocol family 30 [ 12.578212] tipc: Started in single node mode [ 12.589907] Started in network mode [ 12.590450] Own node address <1.1.1>, network identity 1 [ 12.592022] HACK: sleeping 2 seconds! [ 12.668088] divide error: [#1] SMP [ 12.670993] Modules linked in: tipc ip6_udp_tunnel udp_tunnel nfsd auth_rpcgss oid_registry nfs_acl nfs lockd grace sunrpc isofs aesni_intel aes_x86_64 crypto_simd cryptd glue_helper ata_piix sch_fq_codel autofs4 [ 12.676952] CPU: 2 PID: 0 Comm: swapper/2 Not tainted 4.14.2-pc64-dirty #1 [ 12.679225] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.10.2-2.fc27 04/01/2014 [ 12.682095] task: 8c2a761edb80 task.stack: a41cc0cac000 [ 12.684087] RIP: 0010:tipc_node_calculate_timer.isra.12+0x45/0x60 [tipc] [ 12.686486] RSP: 0018:8c2a7fc838a0 EFLAGS: 00010246 [ 12.688451] RAX: RBX: 8c2a5b382600 RCX: [ 12.691197] RDX: RSI: 8c2a5b382600 RDI: 8c2a5b382600 [ 12.693945] RBP: 8c2a7fc838b0 R08: 0001 R09: 0001 [ 12.696632] R10: R11: R12: 8c2a5d8949d8 [ 12.699491] R13: 95ede400 R14: R15: 8c2a5d894800 [ 12.702338] FS: () GS:8c2a7fc8() knlGS: [ 12.705099] CS: 0010 DS: ES: CR0: 80050033 [ 12.706776] CR2: 01bb9440 CR3: bd009001 CR4: 003606e0 [ 12.708847] DR0: DR1: DR2: [ 12.711016] DR3: DR6: fffe0ff0 DR7: 0400 [ 12.712627] Call Trace: [ 12.713390] [ 12.714011] tipc_node_check_dest+0x2e8/0x350 [tipc] [ 12.715286] tipc_disc_rcv+0x14d/0x1d0 [tipc] [ 12.716370] tipc_rcv+0x8b0/0xd40 [tipc] [ 12.717396] ? minmax_running_min+0x2f/0x60 [ 12.718248] ? dst_alloc+0x4c/0xa0 [ 12.718964] ? tcp_ack+0xaf1/0x10b0 [ 12.719658] ? tipc_udp_is_known_peer+0xa0/0xa0 [tipc] [ 12.720634] tipc_udp_recv+0x71/0x1d0 [tipc] [ 12.721459] ? dst_alloc+0x4c/0xa0 [ 12.722130] udp_queue_rcv_skb+0x264/0x490 [ 12.722924] __udp4_lib_rcv+0x21e/0x990 [ 12.723670] ? ip_route_input_rcu+0x2dd/0xbf0 [ 12.724442] ? tcp_v4_rcv+0x958/0xa40 [ 12.725039] udp_rcv+0x1a/0x20 [ 12.725587] ip_local_deliver_finish+0x97/0x1d0 [ 12.726323] ip_local_deliver+0xaf/0xc0 [ 12.726959] ? ip_route_input_noref+0x19/0x20 [ 12.727689] ip_rcv_finish+0xdd/0x3b0 [ 12.728307] ip_rcv+0x2ac/0x360 [ 12.728839] __netif_receive_skb_core+0x6fb/0xa90 [ 12.729580] ? udp4_gro_receive+0x1a7/0x2c0 [ 12.730274] __netif_receive_skb+0x1d/0x60 [ 12.730953] ? __netif_receive_skb+0x1d/0x60 [ 12.731637] netif_receive_skb_internal+0x37/0xd0 [ 12.732371] napi_gro_receive+0xc7/0xf0 [ 12.732920] receive_buf+0x3c3/0xd40 [ 12.733441] virtnet_poll+0xb1/0x250 [ 12.733944] net_rx_action+0x23e/0x370 [ 12.734476] __do_softirq+0xc5/0x2f8 [ 12.734922] irq_exit+0xfa/0x100 [ 12.735315] do_IRQ+0x4f/0xd0 [ 12.735680] common_interrupt+0xa2/0xa2 [ 12.736126] [ 12.736416] RIP: 0010:native_safe_halt+0x6/0x10 [ 12.736925] RSP: 0018:a41cc0cafe90 EFLAGS
Re: tipc_udp_send_msg oops in 4.4 when setting link tolerance
On 14.11.2017 13:35, Jon Maloy wrote: Found it, the missing patch is this one (9b3009604b8e does not help): commit d01332f1acacc0cb43a61f4244dd2b846d4cd585 Author: Richard Alpe Date: Mon Feb 1 08:19:56 2016 +0100 tipc: fix link attribute propagation bug It does not apply as-is to 4.4, so backported it, see below. Does it look good? I can send it forward to Greg for inclusion in 4.4. Yes. I would be very grateful if you do. OK, sent the patch to sta...@vger.kernel.org But with this patch included, I can easily reproduce the "BUG: Bad page state in process git" issue also in 4.4 like this: $ tipc link set tolerance 100 link $LINKNAME $ cd /tmp && git clone /path/to/linux-stable I can try to debug that a bit more to see if I can figure it out. I would appreciate that. If this turns out to also be an already fixed bug (most likely) you can send that directly to GKH too. If you find a bug that requires a new patch send it to tipc-discussion for review first. Finally, I want to say that I very much appreciate users like you, who try solving the problems themselves and contribute to the code. We are right now in an upgrade phase of TIPC, where we have been/are adding new features to TIPC (overlapping ring neighbor monitoring, group communication, new addressing model, performance improvements...) which inevitably entail some new bugs, but I expect the frequency of these changes/improvements to TIPC go down in a few months, and with that the number of fixes needed. Thanks! After more testing, I noticed that I could only reproduce the "BUG: Bad page state" in a single machine, so maybe something wrong with the machine, and not with tipc. -Tommi
Re: tipc_udp_send_msg oops in 4.4 when setting link tolerance
On 13.11.2017 23:25, Jon Maloy wrote: > Hi Tommi, > I am not sure, but is seems like the following patch is what you need: > commit 9b3009604b8e ("tipc: add net device to skb before UDP xmit") > This was applied in tipc 4.5. Found it, the missing patch is this one (9b3009604b8e does not help): commit d01332f1acacc0cb43a61f4244dd2b846d4cd585 Author: Richard Alpe Date: Mon Feb 1 08:19:56 2016 +0100 tipc: fix link attribute propagation bug It does not apply as-is to 4.4, so backported it, see below. Does it look good? I can send it forward to Greg for inclusion in 4.4. But with this patch included, I can easily reproduce the "BUG: Bad page state in process git" issue also in 4.4 like this: $ tipc link set tolerance 100 link $LINKNAME $ cd /tmp && git clone /path/to/linux-stable I can try to debug that a bit more to see if I can figure it out. -Tommi From e1857e6c60355296fd1cbe6e376d8a7265c2b289 Mon Sep 17 00:00:00 2001 From: Richard Alpe Date: Tue, 14 Nov 2017 11:09:50 +0200 Subject: [PATCH] tipc: fix link attribute propagation bug commit d01332f1acacc0cb43a61f4244dd2b846d4cd585 upstream. [backported to 4.4 by Tommi Rantala] Changing certain link attributes (link tolerance and link priority) from the TIPC management tool is supposed to automatically take effect at both endpoints of the affected link. Currently the media address is not instantiated for the link and is used uninstantiated when crafting protocol messages designated for the peer endpoint. This means that changing a link property currently results in the property being changed on the local machine but the protocol message designated for the peer gets lost. Resulting in property discrepancy between the endpoints. In this patch we resolve this by using the media address from the link entry and using the bearer transmit function to send it. Hence, we can now eliminate the redundant function tipc_link_prot_xmit() and the redundant field tipc_link::media_addr. Fixes: 2af5ae372a4b (tipc: clean up unused code and structures) Reviewed-by: Jon Maloy Reported-by: Jason Hu Signed-off-by: Richard Alpe Signed-off-by: David S. Miller Signed-off-by: Tommi Rantala --- net/tipc/link.c | 28 ++-- net/tipc/link.h | 1 - 2 files changed, 6 insertions(+), 23 deletions(-) diff --git a/net/tipc/link.c b/net/tipc/link.c index 72268eac4ec7..736fffb28ab6 100644 --- a/net/tipc/link.c +++ b/net/tipc/link.c @@ -1084,25 +1084,6 @@ drop: return rc; } -/* - * Send protocol message to the other endpoint. - */ -void tipc_link_proto_xmit(struct tipc_link *l, u32 msg_typ, int probe_msg, - u32 gap, u32 tolerance, u32 priority) -{ - struct sk_buff *skb = NULL; - struct sk_buff_head xmitq; - - __skb_queue_head_init(&xmitq); - tipc_link_build_proto_msg(l, msg_typ, probe_msg, gap, - tolerance, priority, &xmitq); - skb = __skb_dequeue(&xmitq); - if (!skb) - return; - tipc_bearer_xmit_skb(l->net, l->bearer_id, skb, l->media_addr); - l->rcv_unacked = 0; -} - static void tipc_link_build_proto_msg(struct tipc_link *l, int mtyp, bool probe, u16 rcvgap, int tolerance, int priority, struct sk_buff_head *xmitq) @@ -1636,9 +1617,12 @@ int tipc_nl_link_set(struct sk_buff *skb, struct genl_info *info) char *name; struct tipc_link *link; struct tipc_node *node; + struct sk_buff_head xmitq; struct nlattr *attrs[TIPC_NLA_LINK_MAX + 1]; struct net *net = sock_net(skb->sk); + __skb_queue_head_init(&xmitq); + if (!info->attrs[TIPC_NLA_LINK]) return -EINVAL; @@ -1683,14 +1667,14 @@ int tipc_nl_link_set(struct sk_buff *skb, struct genl_info *info) tol = nla_get_u32(props[TIPC_NLA_PROP_TOL]); link->tolerance = tol; - tipc_link_proto_xmit(link, STATE_MSG, 0, 0, tol, 0); + tipc_link_build_proto_msg(link, STATE_MSG, 0, 0, tol, 0, &xmitq); } if (props[TIPC_NLA_PROP_PRIO]) { u32 prio; prio = nla_get_u32(props[TIPC_NLA_PROP_PRIO]); link->priority = prio; - tipc_link_proto_xmit(link, STATE_MSG, 0, 0, 0, prio); + tipc_link_build_proto_msg(link, STATE_MSG, 0, 0, 0, prio, &xmitq); } if (props[TIPC_NLA_PROP_WIN]) { u32 win; @@ -1702,7 +1686,7 @@ int tipc_nl_link_set(struct sk_buff *skb, struct genl_info *info) out: tipc_node_unlock(node); - + tipc_bearer_xmit(net, bearer_id, &xmitq, &node->lin
tipc_udp_send_msg oops in 4.4 when setting link tolerance
Hi, I always get an instant TIPC oops in 4.4, when I try to set the link tolerance (with LINKNAME != "broadcast-link"): $ tipc link set tolerance 1000 link $LINKNAME Any idea what's going on? Some tipc patch missing in 4.4? In 4.9 the "tipc" command executes just fine, but I've seen a few times that later some random process crashes with "BUG: Bad page state". KASAN does not report anything before it happens. 4.14 is OK, could not reproduce these problems with it. tipc_udp_send_msg+0x102/0x4f0 matches to: tipc_udp_send_msg at linux-stable/net/tipc/udp_media.c:172 static int tipc_udp_send_msg(struct net *net, struct sk_buff *skb, struct tipc_bearer *b, struct tipc_media_addr *dest) { int ttl, err = 0; struct udp_bearer *ub; struct udp_media_addr *dst = (struct udp_media_addr *)&dest->value; struct udp_media_addr *src = (struct udp_media_addr *)&b->addr.value; struct rtable *rt; if (skb_headroom(skb) < UDP_MIN_HEADROOM) { err = pskb_expand_head(skb, UDP_MIN_HEADROOM, 0, GFP_ATOMIC); if (err) goto tx_error; } skb_set_inner_protocol(skb, htons(ETH_P_TIPC)); ub = rcu_dereference_rtnl(b->media_ptr); if (!ub) { err = -ENODEV; goto tx_error; } if (dst->proto == htons(ETH_P_IP)) { <-- HERE [ 111.423647] == [ 111.424826] BUG: KASAN: null-ptr-deref on address (null) [ 111.425538] Read of size 2 by task tipc/2643 [ 111.426215] CPU: 3 PID: 2643 Comm: tipc Not tainted 4.4.97-pc64 #1 [ 111.428081] 880026327478 8248005e 0002 [ 111.429476] 880047ad5ac0 8800263274f8 8227f5af 000265711040 [ 111.430728] 0297 a0387fd2 02090220 [ 111.432051] Call Trace: [ 111.432472] [] dump_stack+0x86/0xc8 [ 111.433208] [] kasan_report.part.2+0x41f/0x520 [ 111.434040] [] ? tipc_udp_send_msg+0x102/0x4f0 [tipc] [ 111.434908] [] kasan_report+0x25/0x30 [ 111.435647] [] __asan_load2+0x66/0x70 [ 111.436391] [] tipc_udp_send_msg+0x102/0x4f0 [tipc] [ 111.437334] [] ? kasan_kmalloc+0x5e/0x70 [ 111.438301] [] ? kasan_slab_alloc+0xd/0x10 [ 111.439328] [] ? __kmalloc_node_track_caller+0xac/0x230 [ 111.440493] [] ? kasan_kmalloc+0x5e/0x70 [ 111.441479] [] ? tipc_udp_disable+0xe0/0xe0 [tipc] [ 111.442628] [] ? kasan_kmalloc+0x5e/0x70 [ 111.443598] [] ? kasan_krealloc+0x62/0x80 [ 111.444610] [] ? memset+0x28/0x30 [ 111.445539] [] ? __alloc_skb+0x2b3/0x310 [ 111.446560] [] ? skb_complete_tx_timestamp+0x110/0x110 [ 111.447695] [] ? __module_text_address+0x16/0xa0 [ 111.448735] [] ? skb_put+0x8b/0xd0 [ 111.449608] [] ? memcpy+0x36/0x40 [ 111.450524] [] ? tipc_link_build_proto_msg+0x398/0x4c0 [tipc] [ 111.451946] [] tipc_bearer_xmit_skb+0xa0/0xb0 [tipc] [ 111.453078] [] tipc_link_proto_xmit+0x11b/0x160 [tipc] [ 111.454218] [] ? tipc_link_build_reset_msg+0x50/0x50 [tipc] [ 111.455542] [] tipc_nl_link_set+0x1ee/0x3b0 [tipc] [ 111.456659] [] ? tipc_nl_parse_link_prop+0xd0/0xd0 [tipc] [ 111.457831] [] ? is_ftrace_trampoline+0x59/0x90 [ 111.458884] [] ? __kernel_text_address+0x65/0x80 [ 111.459931] [] ? nla_parse+0xb6/0x140 [ 111.460892] [] genl_family_rcv_msg+0x37e/0x5e0 [ 111.461948] [] ? set_orig_addr.isra.53+0xe5/0x120 [tipc] [ 111.463107] [] ? genl_rcv+0x40/0x40 [ 111.463987] [] ? alloc_debug_processing+0x154/0x180 [ 111.465048] [] ? ___slab_alloc+0x43d/0x460 [ 111.465986] [] ? alloc_debug_processing+0x154/0x180 [ 111.467045] [] ? netlink_lookup+0x19c/0x220 [ 111.468067] [] genl_rcv_msg+0xd8/0x110 [ 111.468994] [] netlink_rcv_skb+0x14b/0x180 [ 111.469939] [] ? genl_family_rcv_msg+0x5e0/0x5e0 [ 111.470954] [] genl_rcv+0x28/0x40 [ 111.471798] [] netlink_unicast+0x2e7/0x3a0 [ 111.472806] [] ? netlink_attachskb+0x330/0x330 [ 111.473845] [] ? copy_from_iter+0xf1/0x3b0 [ 111.474847] [] netlink_sendmsg+0x4ad/0x620 [ 111.475788] [] ? netlink_unicast+0x3a0/0x3a0 [ 111.476793] [] ? __fdget+0x13/0x20 [ 111.477723] [] ? sockfd_lookup_light+0x95/0xb0 [ 111.478773] [] SYSC_sendto+0x1bc/0x290 [ 111.479659] [] ? sock_write_iter+0x200/0x200 [ 111.480692] [] ? __fdget+0x13/0x20 [ 111.481559] [] ? sockfd_lookup_light+0x95/0xb0 [ 111.482591] [] ? netlink_getname+0xb1/0x110 [ 111.483570] [] ? move_addr_to_user+0x5c/0x70 [ 111.484539] [] ? SYSC_getsockname+0x176/0x190 [ 111.485540] [] ? sockfd_lookup_light+0xb0/0xb0 [ 111.486558] [] ? SYSC_bind+0xe5/0x180 [ 111.487548] [] ? __sock_recv_ts_and_drops+0x260/0x260 [ 111.488700] [] ? fd_install+0x3b/0x50 [ 111.489596] [] ? sock_map_fd+0x44/0x70 [ 111.490553] [] ? SyS_socket+0xcc/0x120 [ 111.491437] [] ? move_addr_to_kernel+0x40/0x40 [ 111.492505]
4.9.30 NULL pointer dereference in __remove_shared_vm_struct
Hi, I have hit this kernel bug twice with 4.9.30 while running trinity, any ideas? It's not easily reproducible. Perhaps I should enable some more debug options to see if they reveal anything... (note that I had different kernel builds, so the IP addresses are different in the logs below) $ scripts/faddr2line vmlinux __remove_shared_vm_struct+0x16/0x40 __remove_shared_vm_struct+0x16/0x40: atomic_inc at arch/x86/include/asm/atomic.h:91 (inlined by) __remove_shared_vm_struct at mm/mmap.c:137 (gdb) disassemble __remove_shared_vm_struct Dump of assembler code for function __remove_shared_vm_struct: 0x8218e7a0 <+0>: callq 0x825db650 <__fentry__> 0x8218e7a5 <+5>: mov0x50(%rdi),%rax 0x8218e7a9 <+9>: push %rbp 0x8218e7aa <+10>:mov%rsp,%rbp 0x8218e7ad <+13>:test $0x8,%ah 0x8218e7b0 <+16>:je 0x8218e7c1 <__remove_shared_vm_struct+33> 0x8218e7b2 <+18>:mov0x20(%rsi),%rax 0x8218e7b6 <+22>:lock incl 0x158(%rax) 0x8218e7bd <+29>:mov0x50(%rdi),%rax 0x8218e7c1 <+33>:test $0x8,%al 0x8218e7c3 <+35>:je 0x8218e7c9 <__remove_shared_vm_struct+41> 0x8218e7c5 <+37>:lock decl 0x1c(%rdx) 0x8218e7c9 <+41>:lea0x20(%rdx),%rsi 0x8218e7cd <+45>:callq 0x82183460 0x8218e7d2 <+50>:pop%rbp 0x8218e7d3 <+51>:retq [16076.230255] BUG: unable to handle kernel NULL pointer dereference at 0158 [16076.231566] IP: [] __remove_shared_vm_struct+0x16/0x40 [16076.232533] PGD 0 [16076.233125] Oops: 0002 [#1] SMP [16076.233631] Modules linked in: fuse tun bridge hmac 8021q garp stp llc2 af_key llc rds xfrm_user xfrm_algo nfnetlink dccp_ipv6 sctp libcrc32c dccp_ipv4 dccp iptable_filter ip_tables x_tables isofs ata_piix autofs4 [16076.236688] CPU: 10 PID: 10753 Comm: trinity-main Not tainted 4.9.30 #1 [16076.238917] task: 880285b58000 task.stack: c90108d4c000 [16076.239741] RIP: 0010:[] [] __remove_shared_vm_struct+0x16/0x40 [16076.241085] RSP: 0018:c90108d4fd38 EFLAGS: 00010202 [16076.241841] RAX: RBX: 8801568867e8 RCX: [16076.242807] RDX: 88032c7581d8 RSI: 88012af34a00 RDI: 8801568867e8 [16076.243773] RBP: c90108d4fd38 R08: 880156886b80 R09: 7fffcf5d4000 [16076.244737] R10: R11: 0001 R12: 88012af34a00 [16076.245698] R13: 88032c758200 R14: 88032c7581d8 R15: 8801568868a0 [16076.246659] FS: () GS:88033348() knlGS: [16076.247864] CS: 0010 DS: ES: CR0: 80050033 [16076.248667] CR2: 0158 CR3: 00c07000 CR4: 06e0 [16076.249634] DR0: 7f54c4cae000 DR1: 7ff1276c9000 DR2: [16076.250599] DR3: DR6: fffe0ff0 DR7: 0600 [16076.251563] Stack: [16076.251942] c90108d4fd68 8038ef91 8801568867e8 0040 [16076.253139] c90108d4fdc0 c90108d4fda8 80387893 [16076.254335] 8801d1126c00 [16076.255528] Call Trace: [16076.255959] [] unlink_file_vma+0x41/0x60 [16076.256746] [] free_pgtables+0x43/0x120 [16076.257520] [] exit_mmap+0xb2/0x150 [16076.258258] [] mmput+0x3b/0x100 [16076.258953] [] do_exit+0x255/0xb20 [16076.259673] [] ? syscall_trace_enter+0x1c1/0x2d0 [16076.260538] [] do_group_exit+0x43/0xb0 [16076.261303] [] SyS_exit_group+0x14/0x20 [16076.262078] [] do_syscall_64+0x7e/0x1a0 [16076.262852] [] entry_SYSCALL64_slow_path+0x25/0x25 [16076.263736] Code: 3d 00 20 00 00 48 0f 47 c2 48 89 05 cd dc 95 00 31 c0 c3 66 90 0f 1f 44 00 00 48 8b 47 50 55 48 89 e5 f6 c4 08 74 0f 48 8b 46 20 ff 80 58 01 00 00 48 8b 47 50 a8 08 74 04 f0 ff 4a 1c 48 8d [16076.267481] RIP [] __remove_shared_vm_struct+0x16/0x40 [16076.268424] RSP [16076.268973] CR2: 0158 [16076.269844] ---[ end trace 98a1bbd8d9e50234 ]--- [16076.270565] Fixing recursive fault but reboot is needed! [69086.066173] Out of memory: Kill process 2485 (trinity-c309) score 503 or sacrifice child [69086.067383] Killed process 2485 (trinity-c309) total-vm:73816kB, anon-rss:7196kB, file-rss:3940kB, shmem-rss:17248kB [69086.071158] oom_reaper: reaped process 2485 (trinity-c309), now anon-rss:0kB, file-rss:0kB, shmem-rss:17248kB [69089.763240] scsi_nl_rcv_msg: discarding partial skb [69093.568099] scsi_nl_rcv_msg: discarding partial skb [69095.925546] BUG: unable to handle kernel NULL pointer dereference at 0158 [69095.926875] IP: [] __remove_shared_vm_struct+0x16/0x40 [69095.927836] PGD 0 [69095.928411] Oops: 0002 [#1] SMP [69095.928934] Modules linked in: fuse tun 8021q xfrm_user garp dccp_ipv6 dccp_ipv4 dccp sctp bridge llc2 rds st
Re: nouveau "eDP-1: EDID is invalid" regression after 4.11 with HP ZBook 15 G3
2017-05-15 8:20 GMT+03:00 Tommi Rantala : > 2017-05-15 3:03 GMT+03:00 Ben Skeggs : >> On 05/15/2017 01:10 AM, Tommi Rantala wrote: >>> >>> Hi, >> >> Hey Tommi, >> >> Thanks for bisecting this. It's rather unexpected that you should be seeing >> problems here, but, the commit makes sense for it at least. >> >> Are you able to get me new kernel logs of both before and after this patch >> with "log_buf_len=8M drm.debug=0x14 >> nouveau.debug=disp=trace,i2c=trace,bios=trace" please? > > Hi Ben, > > Before: > https://www.dropbox.com/s/b2namqtqvzv5ppp/trace.4.10.0-tr-10409-g5c68d91?dl=1 > > After: > https://www.dropbox.com/s/9url8qdo15959fy/trace.4.10.0-tr-10410-gdf8dc97?dl=1 Hi, any comments to this? -Tommi > -Tommi > >> Thanks, >> Ben. >> >> >>> >>> Bisected this to: >>> >>> commit df8dc97cd17269474344d73cc02739532c468d04 >>> Author: Ben Skeggs >>> Date: Wed Mar 1 09:42:04 2017 +1000 >>> >>> drm/nouveau/kms/nv50: use drm core i2c-over-aux algorithm >>> >>> I'm not entirely sure NVKM needs to support this now, but I haven't >>> removed it as of yet just in case it's needed from DEVINIT scripts >>> where DRM isn't available. >>> >>> Signed-off-by: Ben Skeggs >>> >>> >>> dmesg after boot with drm.debug enabled: >>> >>> v4.10-10409-g5c68d91 (still works): >>> http://termbin.com/b0is >>> >>> v4.10-10410-gdf8dc97 (failure): >>> http://termbin.com/j6lq >>> >>> >>> Tommi >>> >>> >>> 2017-05-10 11:24 GMT+03:00 Tommi Rantala : >>>> >>>> Hi, >>>> >>>> The HP ZBook 15 G3 laptop builtin display (eDP-1) does not work >>>> correctly with v4.11-11413-g2868b25. >>>> >>>> When booting the laptop, the resolution seems to be limited to >>>> 1024x768, and gnome-session segfaults. >>>> >>>> Up to 4.11 the display works just fine in 1920x1080 mode. >>>> >>>> I'm seeing this in the kernel logs: >>>> >>>> nouveau :01:00.0: eDP-1: EDID is invalid: >>>> [00] BAD 00 ff ff ff ff ff ff 00 ff ff ff ff ff ff ff ff >>>> [00] BAD ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff >>>> [00] BAD ff ff ff ff ff ff ff ff ff ff ff ff ff 84 53 54 >>>> [00] BAD 66 69 50 55 57 66 74 49 48 ff ff ff ff ff ff ff >>>> [00] BAD ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff >>>> [00] BAD ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff >>>> [00] BAD ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff >>>> [00] BAD ff ff ff ff ff ff ff ff ff ff ff 00 00 ff 00 ff >>>> nouveau :01:00.0: DRM: DDC responded, but no EDID for eDP-1 >>>> [drm] Cannot find any crtc or sizes - going 1024x768 >>>> >>>> >>>> $ lspci | grep NVIDIA >>>> 01:00.0 VGA compatible controller: NVIDIA Corporation GM107GLM [Quadro >>>> M2000M] (rev a2) >>>> >>>> Any ideas, or should I bisect? >>>> >>>> 4.11 dmesg & xrandr output: >>>> https://pastebin.com/raw/P9LGP7e1 >>>> >>>> 4.11-11413-g2868b25 dmesg: >>>> https://pastebin.com/raw/QBT9mMua >>>> >>>> -Tommi
Re: (radeon?) WARNING: drivers/gpu/drm/drm_irq.c:1195 drm_vblank_put (v4.11-12441-g56868a4)
2017-05-22 10:32 GMT+03:00 Daniel Vetter : > Yeah I think the locking stuff we've fixed, at least if you don't see it > in 4.12 it should be all good. And I think I spotted the bug you've > bisected to, patch is on dri-devel, pls test. Thanks! I'll test it later today. -Tommi
Re: (radeon?) WARNING: drivers/gpu/drm/drm_irq.c:1195 drm_vblank_put (v4.11-12441-g56868a4)
2017-05-11 5:51 GMT+03:00 Michel Dänzer : > On 11/05/17 04:33 AM, Tommi Rantala wrote: >> Complete kernel log: >> http://termbin.com/dzy5 >> >> [ 249.952546] [ cut here ] >> [ 249.952593] WARNING: CPU: 5 PID: 0 at >> /home/ttrantal/git/linux/drivers/gpu/drm/drm_irq.c:1195 >> drm_vblank_put+0xc4/0x120 [drm] >> [ 249.952596] Modules linked in: fuse tun bridge stp llc af_packet >> pl2303 usbserial shpchp acpi_cpufreq binfmt_misc amdgpu hid_generic >> uhci_hcd radeon 3c59x mii tg3 ehci_pci ehci_hcd i2c_algo_bit >> drm_kms_helper syscopyarea sysfillrect sysimgblt fb_sys_fops ttm drm >> agpgart unix autofs4 >> [ 249.952675] CPU: 5 PID: 0 Comm: swapper/5 Tainted: GW >> 4.11.0+ #4 >> [ 249.952678] Hardware name: Hewlett-Packard HP xw6600 >> Workstation/0A9Ch, BIOS 786F4 v01.46 09/20/2012 >> [ 249.952681] task: 88080aea task.stack: c900031b >> [ 249.952695] RIP: 0010:drm_vblank_put+0xc4/0x120 [drm] >> [ 249.952698] RSP: 0018:88080f003d70 EFLAGS: 00010046 >> [ 249.952703] RAX: RBX: 880801d53000 RCX: >> >> [ 249.952706] RDX: RSI: RDI: >> 88080a4ac000 >> [ 249.952709] RBP: 88080f003d88 R08: 0001 R09: >> 0003 >> [ 249.952711] R10: 88080f003d08 R11: 001da540 R12: >> 88080a4ac000 >> [ 249.952714] R13: R14: 0086 R15: >> 8808019a >> [ 249.952717] FS: () GS:88080f00() >> knlGS: >> [ 249.952720] CS: 0010 DS: ES: CR0: 80050033 >> [ 249.952723] CR2: 7f8bcc3a5810 CR3: 000808789000 CR4: >> 06e0 >> [ 249.952726] Call Trace: >> [ 249.952731] >> [ 249.952746] drm_crtc_vblank_put+0x1b/0x30 [drm] >> [ 249.952813] radeon_crtc_handle_flip+0xdc/0x140 [radeon] >> [ 249.952843] si_irq_process+0x610/0x1e90 [radeon] >> [ 249.952872] radeon_driver_irq_handler_kms+0x39/0xc0 [radeon] >> [ 249.952881] __handle_irq_event_percpu+0x60/0x580 >> [ 249.952887] handle_irq_event_percpu+0x20/0x90 >> [ 249.952892] handle_irq_event+0x46/0xb0 >> [ 249.952897] handle_edge_irq+0x13d/0x370 >> [ 249.952903] handle_irq+0x66/0x210 >> [ 249.952908] ? __local_bh_enable+0x34/0x50 >> [ 249.952914] do_IRQ+0x7e/0x1b0 >> [ 249.952920] common_interrupt+0x95/0x95 > > Weird, not sure how this could happen. Can you bisect? Hi, Bisection points to this (also manually applied commit 9739e74646 while testing, got kernel oops otherwise): commit 29dc0d1de18239cf3ef8bab578b8321ed340d81c Author: Daniel Vetter Date: Wed Mar 22 22:50:49 2017 +0100 drm: Roll out acquire context for the page_flip ioctl Again just prep work. Reviewed-by: Harry Wentland Signed-off-by: Daniel Vetter Link: http://patchwork.freedesktop.org/patch/msgid/20170322215058.8671-11-daniel.vet...@ffwll.ch I'm also seeing some more warnings in this version: May 18 19:21:55 xw6600 kernel: IPv6: ADDRCONF(NETDEV_CHANGE): enp14s0: link becomes ready May 18 19:21:57 xw6600 kernel: [ cut here ] May 18 19:21:57 xw6600 kernel: WARNING: CPU: 5 PID: 4607 at /home/ttrantal/git/linux/drivers/gpu/drm/drm_modeset_lock.c:193 drm_modeset_lock_crtc+0xe5/0x100 [drm] May 18 19:21:57 xw6600 kernel: Modules linked in: tun bridge stp llc af_packet pl2303 usbserial shpchp acpi_cpufreq binfmt_misc amdgpu hid_generic uhci_hcd radeon 3c59x mii i2c_algo_bit drm_kms_helper tg3 syscopyarea sysfillrect sysimgblt May 18 19:21:57 xw6600 kernel: CPU: 5 PID: 4607 Comm: gnome-shell Not tainted 4.11.0-rc3-00944-g29dc0d1-dirty #30 May 18 19:21:57 xw6600 kernel: Hardware name: Hewlett-Packard HP xw6600 Workstation/0A9Ch, BIOS 786F4 v01.46 09/20/2012 May 18 19:21:57 xw6600 kernel: Call Trace: May 18 19:21:57 xw6600 kernel: dump_stack+0x69/0x9b May 18 19:21:57 xw6600 kernel: __warn+0xff/0x140 May 18 19:21:57 xw6600 kernel: warn_slowpath_null+0x18/0x20 May 18 19:21:57 xw6600 kernel: drm_modeset_lock_crtc+0xe5/0x100 [drm] May 18 19:21:57 xw6600 kernel: drm_mode_cursor_common+0xbd/0x200 [drm] May 18 19:21:57 xw6600 kernel: drm_mode_cursor_ioctl+0x3c/0x40 [drm] May 18 19:21:57 xw6600 kernel: drm_ioctl+0x3ea/0x870 [drm] May 18 19:21:57 xw6600 kernel: ? drm_mode_setplane+0x1a0/0x1a0 [drm] May 18 19:21:57 xw6600 kernel: ? trace_hardirqs_on_caller+0x1ad/0x2c0 May 18 19:21:57 xw6600 kernel: ? trace_hardirqs_on+0xd/0x10 May 18 19:21:57 xw6600 kernel: radeon_drm_ioctl+0x6e/0x110 [radeon] May 18 19:21:57 xw6600 kernel: do_vfs_ioctl+0xac/0x9d0 May 18 19:21:57 xw6600 kernel: ? security_file_ioctl+0x4c/0x80 May 18 19:21:57 xw6600 kernel: SyS_ioctl+0x74/0x8
Re: nouveau "eDP-1: EDID is invalid" regression after 4.11 with HP ZBook 15 G3
2017-05-15 3:03 GMT+03:00 Ben Skeggs : > On 05/15/2017 01:10 AM, Tommi Rantala wrote: >> >> Hi, > > Hey Tommi, > > Thanks for bisecting this. It's rather unexpected that you should be seeing > problems here, but, the commit makes sense for it at least. > > Are you able to get me new kernel logs of both before and after this patch > with "log_buf_len=8M drm.debug=0x14 > nouveau.debug=disp=trace,i2c=trace,bios=trace" please? Hi Ben, Before: https://www.dropbox.com/s/b2namqtqvzv5ppp/trace.4.10.0-tr-10409-g5c68d91?dl=1 After: https://www.dropbox.com/s/9url8qdo15959fy/trace.4.10.0-tr-10410-gdf8dc97?dl=1 -Tommi > Thanks, > Ben. > > >> >> Bisected this to: >> >> commit df8dc97cd17269474344d73cc02739532c468d04 >> Author: Ben Skeggs >> Date: Wed Mar 1 09:42:04 2017 +1000 >> >> drm/nouveau/kms/nv50: use drm core i2c-over-aux algorithm >> >> I'm not entirely sure NVKM needs to support this now, but I haven't >> removed it as of yet just in case it's needed from DEVINIT scripts >> where DRM isn't available. >> >> Signed-off-by: Ben Skeggs >> >> >> dmesg after boot with drm.debug enabled: >> >> v4.10-10409-g5c68d91 (still works): >> http://termbin.com/b0is >> >> v4.10-10410-gdf8dc97 (failure): >> http://termbin.com/j6lq >> >> >> Tommi >> >> >> 2017-05-10 11:24 GMT+03:00 Tommi Rantala : >>> >>> Hi, >>> >>> The HP ZBook 15 G3 laptop builtin display (eDP-1) does not work >>> correctly with v4.11-11413-g2868b25. >>> >>> When booting the laptop, the resolution seems to be limited to >>> 1024x768, and gnome-session segfaults. >>> >>> Up to 4.11 the display works just fine in 1920x1080 mode. >>> >>> I'm seeing this in the kernel logs: >>> >>> nouveau :01:00.0: eDP-1: EDID is invalid: >>> [00] BAD 00 ff ff ff ff ff ff 00 ff ff ff ff ff ff ff ff >>> [00] BAD ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff >>> [00] BAD ff ff ff ff ff ff ff ff ff ff ff ff ff 84 53 54 >>> [00] BAD 66 69 50 55 57 66 74 49 48 ff ff ff ff ff ff ff >>> [00] BAD ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff >>> [00] BAD ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff >>> [00] BAD ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff >>> [00] BAD ff ff ff ff ff ff ff ff ff ff ff 00 00 ff 00 ff >>> nouveau :01:00.0: DRM: DDC responded, but no EDID for eDP-1 >>> [drm] Cannot find any crtc or sizes - going 1024x768 >>> >>> >>> $ lspci | grep NVIDIA >>> 01:00.0 VGA compatible controller: NVIDIA Corporation GM107GLM [Quadro >>> M2000M] (rev a2) >>> >>> Any ideas, or should I bisect? >>> >>> 4.11 dmesg & xrandr output: >>> https://pastebin.com/raw/P9LGP7e1 >>> >>> 4.11-11413-g2868b25 dmesg: >>> https://pastebin.com/raw/QBT9mMua >>> >>> -Tommi
Re: nouveau "eDP-1: EDID is invalid" regression after 4.11 with HP ZBook 15 G3
Hi, Bisected this to: commit df8dc97cd17269474344d73cc02739532c468d04 Author: Ben Skeggs Date: Wed Mar 1 09:42:04 2017 +1000 drm/nouveau/kms/nv50: use drm core i2c-over-aux algorithm I'm not entirely sure NVKM needs to support this now, but I haven't removed it as of yet just in case it's needed from DEVINIT scripts where DRM isn't available. Signed-off-by: Ben Skeggs dmesg after boot with drm.debug enabled: v4.10-10409-g5c68d91 (still works): http://termbin.com/b0is v4.10-10410-gdf8dc97 (failure): http://termbin.com/j6lq Tommi 2017-05-10 11:24 GMT+03:00 Tommi Rantala : > Hi, > > The HP ZBook 15 G3 laptop builtin display (eDP-1) does not work > correctly with v4.11-11413-g2868b25. > > When booting the laptop, the resolution seems to be limited to > 1024x768, and gnome-session segfaults. > > Up to 4.11 the display works just fine in 1920x1080 mode. > > I'm seeing this in the kernel logs: > > nouveau :01:00.0: eDP-1: EDID is invalid: > [00] BAD 00 ff ff ff ff ff ff 00 ff ff ff ff ff ff ff ff > [00] BAD ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff > [00] BAD ff ff ff ff ff ff ff ff ff ff ff ff ff 84 53 54 > [00] BAD 66 69 50 55 57 66 74 49 48 ff ff ff ff ff ff ff > [00] BAD ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff > [00] BAD ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff > [00] BAD ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff > [00] BAD ff ff ff ff ff ff ff ff ff ff ff 00 00 ff 00 ff > nouveau :01:00.0: DRM: DDC responded, but no EDID for eDP-1 > [drm] Cannot find any crtc or sizes - going 1024x768 > > > $ lspci | grep NVIDIA > 01:00.0 VGA compatible controller: NVIDIA Corporation GM107GLM [Quadro > M2000M] (rev a2) > > Any ideas, or should I bisect? > > 4.11 dmesg & xrandr output: > https://pastebin.com/raw/P9LGP7e1 > > 4.11-11413-g2868b25 dmesg: > https://pastebin.com/raw/QBT9mMua > > -Tommi
Re: [PATCH] hwmon: (coretemp) Handle frozen hotplug state correctly
2017-05-10 23:09 GMT+03:00 Guenter Roeck : > On Wed, May 10, 2017 at 10:16:33PM +0300, Tommi Rantala wrote: >> 2017-05-10 17:30 GMT+03:00 Thomas Gleixner : >> > The recent conversion to the hotplug state machine missed that the original >> > hotplug notifiers did not execute in the frozen state, which is used on >> > suspend on resume. >> > >> > This does not matter on single socket machines, but on multi socket systems >> > this breaks when the device for a non-boot socket is removed when the last >> > CPU of that socket is brought offline. The device removal locks up the >> > machine hard w/o any debug output. >> > >> > Prevent executing the hotplug callbacks when cpuhp_tasks_frozen is true. >> > >> > Thanks to Tommi for providing debug information patiently while I failed to >> > spot the obvious. >> > >> > Fixes: e00ca5df37ad ("hwmon: (coretemp) Convert to hotplug state machine") >> > Reported-by: Tommi Rantala >> > Signed-off-by: Thomas Gleixner >> >> Many thanks, I can confirm that it works well! >> > Ok if I add your Tested-by: ? Sure! Tested-by: Tommi Rantala > Thanks, > Guenter > >> -Tommi >> >> > --- >> > drivers/hwmon/coretemp.c | 14 ++ >> > 1 file changed, 14 insertions(+) >> > >> > --- a/drivers/hwmon/coretemp.c >> > +++ b/drivers/hwmon/coretemp.c >> > @@ -605,6 +605,13 @@ static int coretemp_cpu_online(unsigned >> > struct platform_data *pdata; >> > >> > /* >> > +* Don't execute this on resume as the offline callback did >> > +* not get executed on suspend. >> > +*/ >> > + if (cpuhp_tasks_frozen) >> > + return 0; >> > + >> > + /* >> > * CPUID.06H.EAX[0] indicates whether the CPU has thermal >> > * sensors. We check this bit only, all the early CPUs >> > * without thermal sensors will be filtered out. >> > @@ -654,6 +661,13 @@ static int coretemp_cpu_offline(unsigned >> > struct temp_data *tdata; >> > int indx, target; >> > >> > + /* >> > +* Don't execute this on suspend as the device remove locks >> > +* up the machine. >> > +*/ >> > + if (cpuhp_tasks_frozen) >> > + return 0; >> > + >> > /* If the physical CPU device does not exist, just return */ >> > if (!pdev) >> > return 0;
(radeon?) WARNING: drivers/gpu/drm/drm_irq.c:1195 drm_vblank_put (v4.11-12441-g56868a4)
Hi, I just tested v4.11-12441-g56868a4 on HP xw6600 with radeon graphics, and I'm seeing the following WARNING triggered constantly. I have not seen this earlier e.g. with the distro kernel 4.10.13-200.fc25.x86_64 $ lspci|grep -i amd 60:00.0 VGA compatible controller: Advanced Micro Devices, Inc. [AMD/ATI] Curacao PRO [Radeon R7 370 / R9 270/370 OEM] 60:00.1 Audio device: Advanced Micro Devices, Inc. [AMD/ATI] Cape Verde/Pitcairn HDMI Audio [Radeon HD 7700/7800 Series] Complete kernel log: http://termbin.com/dzy5 [ 249.952546] [ cut here ] [ 249.952593] WARNING: CPU: 5 PID: 0 at /home/ttrantal/git/linux/drivers/gpu/drm/drm_irq.c:1195 drm_vblank_put+0xc4/0x120 [drm] [ 249.952596] Modules linked in: fuse tun bridge stp llc af_packet pl2303 usbserial shpchp acpi_cpufreq binfmt_misc amdgpu hid_generic uhci_hcd radeon 3c59x mii tg3 ehci_pci ehci_hcd i2c_algo_bit drm_kms_helper syscopyarea sysfillrect sysimgblt fb_sys_fops ttm drm agpgart unix autofs4 [ 249.952675] CPU: 5 PID: 0 Comm: swapper/5 Tainted: GW 4.11.0+ #4 [ 249.952678] Hardware name: Hewlett-Packard HP xw6600 Workstation/0A9Ch, BIOS 786F4 v01.46 09/20/2012 [ 249.952681] task: 88080aea task.stack: c900031b [ 249.952695] RIP: 0010:drm_vblank_put+0xc4/0x120 [drm] [ 249.952698] RSP: 0018:88080f003d70 EFLAGS: 00010046 [ 249.952703] RAX: RBX: 880801d53000 RCX: [ 249.952706] RDX: RSI: RDI: 88080a4ac000 [ 249.952709] RBP: 88080f003d88 R08: 0001 R09: 0003 [ 249.952711] R10: 88080f003d08 R11: 001da540 R12: 88080a4ac000 [ 249.952714] R13: R14: 0086 R15: 8808019a [ 249.952717] FS: () GS:88080f00() knlGS: [ 249.952720] CS: 0010 DS: ES: CR0: 80050033 [ 249.952723] CR2: 7f8bcc3a5810 CR3: 000808789000 CR4: 06e0 [ 249.952726] Call Trace: [ 249.952731] [ 249.952746] drm_crtc_vblank_put+0x1b/0x30 [drm] [ 249.952813] radeon_crtc_handle_flip+0xdc/0x140 [radeon] [ 249.952843] si_irq_process+0x610/0x1e90 [radeon] [ 249.952872] radeon_driver_irq_handler_kms+0x39/0xc0 [radeon] [ 249.952881] __handle_irq_event_percpu+0x60/0x580 [ 249.952887] handle_irq_event_percpu+0x20/0x90 [ 249.952892] handle_irq_event+0x46/0xb0 [ 249.952897] handle_edge_irq+0x13d/0x370 [ 249.952903] handle_irq+0x66/0x210 [ 249.952908] ? __local_bh_enable+0x34/0x50 [ 249.952914] do_IRQ+0x7e/0x1b0 [ 249.952920] common_interrupt+0x95/0x95 [ 249.952924] RIP: 0010:mwait_idle+0x9c/0x3c0 [ 249.952927] RSP: 0018:c900031b3e68 EFLAGS: 0246 ORIG_RAX: ff4d [ 249.952932] RAX: RBX: 88080aea RCX: [ 249.952935] RDX: 0001 RSI: 0001 RDI: 88080aea [ 249.952938] RBP: c900031b3e98 R08: 0006 R09: [ 249.952941] R10: R11: R12: 88080aea [ 249.952943] R13: 0005 R14: 839ca0c8 R15: [ 249.952946] [ 249.952955] ? mwait_idle+0x93/0x3c0 [ 249.952961] arch_cpu_idle+0xa/0x10 [ 249.952965] default_idle_call+0x24/0x40 [ 249.952971] do_idle+0x154/0x1f0 [ 249.952976] cpu_startup_entry+0x18/0x20 [ 249.952981] start_secondary+0x159/0x1f0 [ 249.952987] secondary_startup_64+0x9f/0x9f [ 249.952995] Code: 0d 32 c7 9a e2 f7 ea 41 c1 fc 1f 48 8d 7b 60 c1 fa 06 44 29 e2 48 63 f2 48 01 ce e8 37 7d 1e e1 eb be 0f ff 5b 41 5c 41 5d 5d c3 <0f> ff eb b1 48 89 df e8 40 fe ff ff eb a7 41 0f b6 f4 48 c7 c7 [ 249.953135] ---[ end trace 399ab7917ed3b208 ]--- [ 251.185850] [ cut here ] [ 251.185896] WARNING: CPU: 5 PID: 4425 at /home/ttrantal/git/linux/drivers/gpu/drm/drm_irq.c:1195 drm_vblank_put+0xc4/0x120 [drm] [ 251.185899] Modules linked in: fuse tun bridge stp llc af_packet pl2303 usbserial shpchp acpi_cpufreq binfmt_misc amdgpu hid_generic uhci_hcd radeon 3c59x mii tg3 ehci_pci ehci_hcd i2c_algo_bit drm_kms_helper syscopyarea sysfillrect sysimgblt fb_sys_fops ttm drm agpgart unix autofs4 [ 251.185979] CPU: 5 PID: 4425 Comm: in:imjournal Tainted: GW 4.11.0+ #4 [ 251.185982] Hardware name: Hewlett-Packard HP xw6600 Workstation/0A9Ch, BIOS 786F4 v01.46 09/20/2012 [ 251.185984] task: 880802f8b280 task.stack: c900034c8000 [ 251.185998] RIP: 0010:drm_vblank_put+0xc4/0x120 [drm] [ 251.186001] RSP: :88080f003d70 EFLAGS: 00010046 [ 251.186006] RAX: RBX: 880801d53000 RCX: [ 251.186009] RDX: RSI: RDI: 88080a4ac000 [ 251.186012] RBP: 88080f003d88 R08: 0001 R09: 0003 [ 251.186015] R10: 88080f003d08 R11: 001da540 R12: 88080a4ac000 [ 251.186017] R13: R14: 0086 R15: 8808019a [ 251.18
Re: [PATCH] hwmon: (coretemp) Handle frozen hotplug state correctly
2017-05-10 17:30 GMT+03:00 Thomas Gleixner : > The recent conversion to the hotplug state machine missed that the original > hotplug notifiers did not execute in the frozen state, which is used on > suspend on resume. > > This does not matter on single socket machines, but on multi socket systems > this breaks when the device for a non-boot socket is removed when the last > CPU of that socket is brought offline. The device removal locks up the > machine hard w/o any debug output. > > Prevent executing the hotplug callbacks when cpuhp_tasks_frozen is true. > > Thanks to Tommi for providing debug information patiently while I failed to > spot the obvious. > > Fixes: e00ca5df37ad ("hwmon: (coretemp) Convert to hotplug state machine") > Reported-by: Tommi Rantala > Signed-off-by: Thomas Gleixner Many thanks, I can confirm that it works well! -Tommi > --- > drivers/hwmon/coretemp.c | 14 ++ > 1 file changed, 14 insertions(+) > > --- a/drivers/hwmon/coretemp.c > +++ b/drivers/hwmon/coretemp.c > @@ -605,6 +605,13 @@ static int coretemp_cpu_online(unsigned > struct platform_data *pdata; > > /* > +* Don't execute this on resume as the offline callback did > +* not get executed on suspend. > +*/ > + if (cpuhp_tasks_frozen) > + return 0; > + > + /* > * CPUID.06H.EAX[0] indicates whether the CPU has thermal > * sensors. We check this bit only, all the early CPUs > * without thermal sensors will be filtered out. > @@ -654,6 +661,13 @@ static int coretemp_cpu_offline(unsigned > struct temp_data *tdata; > int indx, target; > > + /* > +* Don't execute this on suspend as the device remove locks > +* up the machine. > +*/ > + if (cpuhp_tasks_frozen) > + return 0; > + > /* If the physical CPU device does not exist, just return */ > if (!pdev) > return 0;
Re: [patch 0/6] hwmon/coretemp: Hotplug fixes, cleanups and state machine conversion
2017-05-10 17:01 GMT+03:00 Thomas Gleixner : > On Wed, 10 May 2017, Tommi Rantala wrote: >> 2017-05-09 10:16 GMT+03:00 Thomas Gleixner : >> > On Thu, 4 May 2017, Tommi Rantala wrote: >> >> Here's the trace output, does it help? >> > >> > Not much. Can you please try the following: >> > >> > 1) Offline all CPUs except CPU0 before suspend/resume >> >> it works! >> >> > 2) Offline all CPUs except CPU0 and CPU1 before suspend/resume >> >> now it breaks. >> >> > 3) Offline all CPUs except CPU0 and CPU2 before suspend/resume >> >> works again! >> >> (Also works with CPUs 0,2,4,6 onlined.) > > Output from /proc/cpuinfo please. http://termbin.com/vec2
Re: [patch 0/6] hwmon/coretemp: Hotplug fixes, cleanups and state machine conversion
2017-05-09 10:16 GMT+03:00 Thomas Gleixner : > On Thu, 4 May 2017, Tommi Rantala wrote: >> Here's the trace output, does it help? > > Not much. Can you please try the following: > > 1) Offline all CPUs except CPU0 before suspend/resume it works! > 2) Offline all CPUs except CPU0 and CPU1 before suspend/resume now it breaks. > 3) Offline all CPUs except CPU0 and CPU2 before suspend/resume works again! (Also works with CPUs 0,2,4,6 onlined.) -Tommi
nouveau "eDP-1: EDID is invalid" regression after 4.11 with HP ZBook 15 G3
Hi, The HP ZBook 15 G3 laptop builtin display (eDP-1) does not work correctly with v4.11-11413-g2868b25. When booting the laptop, the resolution seems to be limited to 1024x768, and gnome-session segfaults. Up to 4.11 the display works just fine in 1920x1080 mode. I'm seeing this in the kernel logs: nouveau :01:00.0: eDP-1: EDID is invalid: [00] BAD 00 ff ff ff ff ff ff 00 ff ff ff ff ff ff ff ff [00] BAD ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff [00] BAD ff ff ff ff ff ff ff ff ff ff ff ff ff 84 53 54 [00] BAD 66 69 50 55 57 66 74 49 48 ff ff ff ff ff ff ff [00] BAD ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff [00] BAD ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff [00] BAD ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff [00] BAD ff ff ff ff ff ff ff ff ff ff ff 00 00 ff 00 ff nouveau :01:00.0: DRM: DDC responded, but no EDID for eDP-1 [drm] Cannot find any crtc or sizes - going 1024x768 $ lspci | grep NVIDIA 01:00.0 VGA compatible controller: NVIDIA Corporation GM107GLM [Quadro M2000M] (rev a2) Any ideas, or should I bisect? 4.11 dmesg & xrandr output: https://pastebin.com/raw/P9LGP7e1 4.11-11413-g2868b25 dmesg: https://pastebin.com/raw/QBT9mMua -Tommi
Re: [patch 0/6] hwmon/coretemp: Hotplug fixes, cleanups and state machine conversion
2017-04-23 18:01 GMT+03:00 Thomas Gleixner : > On Sat, 15 Apr 2017, Tommi Rantala wrote: > >> Testing with 4.10.8-200.fc25.x86_64: freezer, devices and platform are >> OK, it breaks at "processors". >> The screen stays off, and the machine no longer answers to ping. >> >> (Without coretemp loaded, the machine survives all the states. There >> are some graphics glitches and radeon error messages) > > That's odd. I tried on a similar machine (w/o a radeon card) and it just > works with the coretemp module loaded. > > Can you please do a CPU hotplug cycle (just one CPU) with the cpuhp events > in the tracer enabled. Send me the trace output so I might be able to spot > whats different and what interdependencies between other callbacks might be > there. Hi, Here's the trace output, does it help? http://termbin.com/qugr -Tommi
Re: [patch 0/6] hwmon/coretemp: Hotplug fixes, cleanups and state machine conversion
2017-04-14 20:35 GMT+03:00 Thomas Gleixner : > On Wed, 12 Apr 2017, Thomas Gleixner wrote: >> >> Can you please try the following: >> >> # for STATE in freezer devices platform processors core; do \ >> echo $STATE; \ >> echo $STATE >/sys/power/pm_test; \ >> echo mem >/sys/power/state >> >> That should give us at least a hint in which area to dig. > > Any news on that? Sorry, was traveling. Testing with 4.10.8-200.fc25.x86_64: freezer, devices and platform are OK, it breaks at "processors". The screen stays off, and the machine no longer answers to ping. (Without coretemp loaded, the machine survives all the states. There are some graphics glitches and radeon error messages) -Tommi
Re: [patch 0/6] hwmon/coretemp: Hotplug fixes, cleanups and state machine conversion
2017-04-12 13:52 GMT+03:00 Thomas Gleixner : > On Wed, 12 Apr 2017, Tommi Rantala wrote: >> 2017-04-12 12:28 GMT+03:00 Thomas Gleixner : >> > On Wed, 12 Apr 2017, Tommi Rantala wrote: >> >> Resume-from-suspend stopped working in HP xw6600 in fedora kernel >> >> 4.10.8-200.fc25.x86_64, while it worked just fine in >> >> 4.9.9-200.fc25.x86_64. >> >> >> >> When powering on the suspended PC, there is no video output, and to >> >> recover, I need to reset the machine. >> > >> > Is there just no video output or is the machine completely frozen? If it's >> > not completely dead, then you might be able to ssh into it. >> >> It's completely hosed: not possible to ssh, does not respond to ping either. >> >> I made a quick test with netconsole. After booting with >> no_console_suspend=1, and setting the netconsole parameters, I can get >> kernel messages (to my android phone) when suspending the machine. But >> no messages after the failed resume. > > Let's do something else first. > > Can you please try to offline/online CPUs from the console? > > # echo 0 >/sys/devices/system/cpu1/online > # echo 1 >/sys/devices/system/cpu1/online ok, that works. > If that works, then try to offline all CPUs (except 0) in the same order as > suspend (1 ... 7) and then online them again in the same order? Seems to work without problems: # for i in $(seq 1 7) ; do echo 0 > /sys/devices/system/cpu/cpu$i/online ; done [ 1237.317537] intel_powerclamp: No package C-state available [ 1308.997620] smpboot: CPU 1 is now offline [ 1309.007167] intel_powerclamp: No package C-state available [ 1309.032563] smpboot: CPU 2 is now offline [ 1309.038118] intel_powerclamp: No package C-state available [ 1309.072495] smpboot: CPU 3 is now offline [ 1309.077807] intel_powerclamp: No package C-state available [ 1309.099545] Broke affinity for irq 29 [ 1309.100587] smpboot: CPU 4 is now offline [ 1309.105346] intel_powerclamp: No package C-state available [ 1309.135530] Broke affinity for irq 22 [ 1309.135540] Broke affinity for irq 29 [ 1309.136579] smpboot: CPU 5 is now offline [ 1309.141653] intel_powerclamp: No package C-state available [ 1309.171517] Broke affinity for irq 22 [ 1309.171526] Broke affinity for irq 29 [ 1309.171535] Broke affinity for irq 31 [ 1309.172586] smpboot: CPU 6 is now offline [ 1309.176967] intel_powerclamp: No package C-state available [ 1309.209122] Broke affinity for irq 19 [ 1309.209126] Broke affinity for irq 22 [ 1309.209135] Broke affinity for irq 29 [ 1309.209145] Broke affinity for irq 31 [ 1309.212071] smpboot: CPU 7 is now offline # for i in $(seq 1 7) ; do echo 1 > /sys/devices/system/cpu/cpu$i/online ; done [ 1309.217476] intel_powerclamp: No package C-state available [ 1380.624184] x86: Booting SMP configuration: [ 1380.624186] smpboot: Booting Node 0 Processor 1 APIC 0x4 [ 1380.659810] intel_powerclamp: No package C-state available [ 1380.659957] smpboot: Booting Node 0 Processor 2 APIC 0x2 [ 1380.671198] microcode: sig=0x10676, pf=0x40, revision=0x60f [ 1380.672088] smpboot: Booting Node 0 Processor 3 APIC 0x6 [ 1380.677952] intel_powerclamp: No package C-state available [ 1380.686260] microcode: sig=0x1067a, pf=0x40, revision=0xa0b [ 1380.687098] smpboot: Booting Node 0 Processor 4 APIC 0x1 [ 1380.699214] microcode: sig=0x10676, pf=0x40, revision=0x60f [ 1380.699742] intel_powerclamp: No package C-state available [ 1380.700267] smpboot: Booting Node 0 Processor 5 APIC 0x5 [ 1380.715207] microcode: sig=0x1067a, pf=0x40, revision=0xa0b [ 1380.716202] smpboot: Booting Node 0 Processor 6 APIC 0x3 [ 1380.730264] microcode: sig=0x10676, pf=0x40, revision=0x60f [ 1380.730567] intel_powerclamp: No package C-state available [ 1380.731267] smpboot: Booting Node 0 Processor 7 APIC 0x7 [ 1380.748276] microcode: sig=0x1067a, pf=0x40, revision=0xa0b
Re: [patch 0/6] hwmon/coretemp: Hotplug fixes, cleanups and state machine conversion
2017-04-12 12:28 GMT+03:00 Thomas Gleixner : > On Wed, 12 Apr 2017, Tommi Rantala wrote: >> Resume-from-suspend stopped working in HP xw6600 in fedora kernel >> 4.10.8-200.fc25.x86_64, while it worked just fine in >> 4.9.9-200.fc25.x86_64. >> >> When powering on the suspended PC, there is no video output, and to >> recover, I need to reset the machine. > > Is there just no video output or is the machine completely frozen? If it's > not completely dead, then you might be able to ssh into it. It's completely hosed: not possible to ssh, does not respond to ping either. I made a quick test with netconsole. After booting with no_console_suspend=1, and setting the netconsole parameters, I can get kernel messages (to my android phone) when suspending the machine. But no messages after the failed resume. Hmm, might I be able to capture messages over USB serial port...? -Tommi
Re: [patch 0/6] hwmon/coretemp: Hotplug fixes, cleanups and state machine conversion
2016-11-23 17:28 GMT+02:00 Guenter Roeck : > > On 11/22/2016 09:42 AM, Thomas Gleixner wrote: >> >> After the first attempt to convert the coretemp driver to the hotplug state >> machine failed, we had a deeper look and went a bit farther. >> >> The driver has quite some interesting concepts vs. the package, core and >> sysfs file management and a bug in the package temperature sysfs interface >> vs. cpu hotplug. >> >> The following series fixes that bug and simplifies the package/core >> management and at the end converts it to the hotplug state machine. >> >> Along with the source size the binary size shrinks as well: >>textdata bss dec hex >>4068360 20 4448 1160 Before >>3801180 364017 fb1 After >> >> Thanks, >> >> tglx >> - >> coretemp.c | 321 >> + >> 1 file changed, 113 insertions(+), 208 deletions(-) Hi, Resume-from-suspend stopped working in HP xw6600 in fedora kernel 4.10.8-200.fc25.x86_64, while it worked just fine in 4.9.9-200.fc25.x86_64. When powering on the suspended PC, there is no video output, and to recover, I need to reset the machine. Nothing is recorded in the journal logs for the resume, last lines are from the suspend: Apr 08 15:41:49 xw6600 systemd[1]: Reached target Sleep. Apr 08 15:41:49 xw6600 systemd[1]: Starting Suspend... Apr 08 15:41:49 xw6600 systemd-sleep[6675]: Suspending system... Also tested 4.11-rc5, but it fails the same way. Bisection leads to commit: commit e00ca5df37adc68052ea699cbd010ee4e19e39e4 Author: Thomas Gleixner Date: Tue Nov 22 17:42:04 2016 + hwmon: (coretemp) Convert to hotplug state machine Install the callbacks via the state machine. Setup and teardown are handled by the hotplug core. Signed-off-by: Sebastian Andrzej Siewior Cc: linux-hw...@vger.kernel.org Cc: Fenghua Yu Cc: Jean Delvare Cc: r...@linuxtronix.de Cc: Guenter Roeck Link: http://lkml.kernel.org/r/20161117183541.8588-5-bige...@linutronix.de Signed-off-by: Guenter Roeck If I do "modprobe -r coretemp", then the resume works OK with 4.10.8-200.fc25.x86_64. Any ideas? 4.9.9-200.fc25.x86_64 dmesg: http://termbin.com/3kcl 4.10.8-200.fc25.x86_64 dmesg: http://termbin.com/62d9 -Tommi
Re: [RFC][PATCH] mm: Tighten x86 /dev/mem with zeroing
On 06.04.2017 03:00, Kees Cook wrote: This changes the x86 exception for the low 1MB by reading back zeros for RAM areas instead of blindly allowing them. (It may be possible for heap to end up getting allocated in low 1MB RAM, and then read out, possibly tripping hardened usercopy.) Unfinished: this still needs mmap support. Reported-by: Tommi Rantala Signed-off-by: Kees Cook --- Tommi, can you check and see if this fixes what you're seeing? I want to make sure this actually works first. (x86info uses seek/read not mmap.) Hi, I can confirm that it works (after adding CONFIG_STRICT_DEVMEM), no more kernel bugs when running x86info. open("/dev/mem", O_RDONLY) = 3 lseek(3, 1038, SEEK_SET)= 1038 read(3, "\300\235", 2) = 2 lseek(3, 646144, SEEK_SET) = 646144 read(3, "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"..., 1024) = 1024 lseek(3, 1043, SEEK_SET)= 1043 read(3, "w\2", 2) = 2 lseek(3, 645120, SEEK_SET) = 645120 read(3, "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"..., 1024) = 1024 lseek(3, 654336, SEEK_SET) = 654336 read(3, "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"..., 1024) = 1024 lseek(3, 983040, SEEK_SET) = 983040 read(3, "IFE$\245S\0\0\1\0\0\0\0\360y\0\0\360\220\260\30\237{=\23\10\17\\276\17\0"..., 65536) = 65536 lseek(3, 917504, SEEK_SET) = 917504 read(3, "\377\377\377\377\377\377\377\377\377\377\377\377\377\377\377\377\377\377\377\377\377\377\377\377\377\377\377\377\377\377\377\377"..., 65536) = 65536 lseek(3, 524288, SEEK_SET) = 524288 read(3, "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"..., 65536) = 65536 lseek(3, 589824, SEEK_SET) = 589824 read(3, "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"..., 65536) = 65536 dd works too: # LANG=C dd if=/dev/mem of=/dev/null bs=4096 count=256 256+0 records in 256+0 records out 1048576 bytes (1.0 MB, 1.0 MiB) copied, 0.0874073 s, 12.0 MB/s --- arch/x86/mm/init.c | 41 +++ drivers/char/mem.c | 82 ++ 2 files changed, 82 insertions(+), 41 deletions(-)
Re: sudo x86info -a => kernel BUG at mm/usercopy.c:78!
On 31.03.2017 21:26, Linus Torvalds wrote: Hmm. Thinking more about this, we do allow access to the first 1MB of physical memory unconditionally (see devmem_is_allowed() in arch/x86/mm/init.c). And I think we only _reserve_ the first 64kB or something. So I guess even STRICT_DEVMEM isn't actually all that strict. So this should be visible even *with* STRICT_DEVMEM. Does a simple sudo dd if=/dev/mem of=/dev/null bs=4096 count=256 also show the same issue? Maybe regardless of STRICT_DEVMEM? Yep, it is enough to trigger the bug. Also crashes with the fedora kernel that has STRICT_DEVMEM: $ sudo dd if=/dev/mem of=/dev/null bs=4096 count=256 Segmentation fault [ 73.224025] usercopy: kernel memory exposure attempt detected from 893a80059000 (dma-kmalloc-16) (4096 bytes) [ 73.224049] [ cut here ] [ 73.224056] kernel BUG at mm/usercopy.c:75! [ 73.224060] invalid opcode: [#1] SMP [ 73.224237] CPU: 5 PID: 2860 Comm: dd Not tainted 4.9.14-200.fc25.x86_64 #1 Maybe we should change devmem_is_allowed() to return a ternary value, and then have it be "allow access" (for reserved pages), "disallow access" (for various random stuff), and "just read zero" (for pages in the low 1M that aren't marked reserved). That way things like that read the low 1M (like x86info) will hopefully not be unhappy, but also won't be reading random kernel data. Linus
Re: sudo x86info -a => kernel BUG at mm/usercopy.c:78!
On 31.03.2017 08:40, Tommi Rantala wrote: The only thing that I can think of would be a rogue ptr in the bios table, but that seems unlikely. Tommi, can you put strace of x86info -mp somewhere? That will confirm/deny whether we're at least asking the kernel to do sane things. Indeed the bug happens when reading from /dev/mem: https://pastebin.com/raw/ZEJGQP1X # strace -f -y x86info -mp [...] open("/dev/mem", O_RDONLY) = 3 lseek(3, 1038, SEEK_SET) = 1038 read(3, "\300\235", 2)= 2 lseek(3, 646144, SEEK_SET)= 646144 read(3, "\1\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"..., 1024) = 1024 lseek(3, 1043, SEEK_SET) = 1043 read(3, "w\2", 2) = 2 lseek(3, 645120, SEEK_SET)= 645120 read(3, "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"..., 1024) = 1024 lseek(3, 654336, SEEK_SET)= 654336 read(3, "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"..., 1024) = 1024 lseek(3, 983040, SEEK_SET)= 983040 read(3, "IFE$\245S\0\0\1\0\0\0\0\360y\0\0\360\220\260\30\237{=\23\10\17\\276\17\0"..., 65536) = 65536 lseek(3, 917504, SEEK_SET)= 917504 read(3, "\377\377\377\377\377\377\377\377\377\377\377\377\377\377\377\377\377\377\377\377\377\377\377\377\377\377\377\377\377\377\377\377"..., 65536) = 65536 lseek(3, 524288, SEEK_SET)= 524288 read(3, )= ? +++ killed by SIGSEGV +++ That last read is done in mptable.c:347, trying to read GROPE_AREA1. # ./x86info --debug x86info v1.31pre get_intel_topology: Siblings: 2 Physical Processor ID: 0 Processor Core ID: 0 get_intel_topology: Siblings: 2 Physical Processor ID: 0 Processor Core ID: 1 get_intel_topology: Siblings: 2 Physical Processor ID: 0 Processor Core ID: 2 get_intel_topology: Siblings: 2 Physical Processor ID: 0 Processor Core ID: 3 get_intel_topology: Siblings: 2 Physical Processor ID: 0 Processor Core ID: 0 get_intel_topology: Siblings: 2 Physical Processor ID: 0 Processor Core ID: 1 get_intel_topology: Siblings: 2 Physical Processor ID: 0 Processor Core ID: 2 get_intel_topology: Siblings: 2 Physical Processor ID: 0 Processor Core ID: 3 Found 8 identical CPUs EBDA points to: 9dc0 EBDA segment ptr: 9dc00 Segmentation fault If I comment out the GROPE_AREA1 read, the same kernel bug still happens with the GROPE_AREA2 read. Removing both GROPE_AREA1 and GROPE_AREA2 reads avoids the crash: $ git diff diff --git a/mptable.c b/mptable.c index 480f19b..00fff35 100644 --- a/mptable.c +++ b/mptable.c @@ -342,6 +342,7 @@ static int apic_probe(unsigned long* paddr) } /* search additional memory */ + /* target = GROPE_AREA1; seekEntry(target); if (readEntry(buffer, GROPE_SIZE)) { @@ -371,6 +372,7 @@ static int apic_probe(unsigned long* paddr) return 6; } } + */ *paddr = (unsigned long)0; return 0; # ./x86info -mp x86info v1.31pre Found 8 identical CPUs Extended Family: 0 Extended Model: 5 Family: 6 Model: 94 Stepping: 3 Type: 0 (Original OEM) CPU Model (x86info's best guess): Unknown model. Processor name string (BIOS programmed): Intel(R) Core(TM) i7-6820HQ CPU @ 2.70GHz Total processor threads: 8 This system has 1 quad-core processor with hyper-threading (2 threads per core) running at an estimated 2.70GHz # -Tommi
Re: sudo x86info -a => kernel BUG at mm/usercopy.c:78!
On 30.03.2017 20:44, Laura Abbott wrote: On 03/30/2017 10:37 AM, Kees Cook wrote: Reads out of /dev/mem should be restricted to non-RAM on Fedora, yes? Tommi, do your kernels have CONFIG_STRICT_DEVMEM=y ? -Kees CONFIG_STRICT_DEVMEM should be on in all Fedora kernels. Yes, the fedora kernels do have it enabled: $ grep STRICT_DEVMEM /boot/config-4.9.14-200.fc25.x86_64 CONFIG_STRICT_DEVMEM=y CONFIG_IO_STRICT_DEVMEM=y But I do not have it in my own build: $ grep STRICT_DEVMEM .config # CONFIG_STRICT_DEVMEM is not set -Tommi
Re: sudo x86info -a => kernel BUG at mm/usercopy.c:78!
On 30.03.2017 23:01, Dave Jones wrote: On Thu, Mar 30, 2017 at 12:52:31PM -0700, Kees Cook wrote: > On Thu, Mar 30, 2017 at 12:41 PM, Dave Jones wrote: > > On Thu, Mar 30, 2017 at 09:45:26AM -0700, Kees Cook wrote: > > > On Wed, Mar 29, 2017 at 11:44 PM, Tommi Rantala > > > wrote: > > > > Hi, > > > > > > > > Running: > > > > > > > > $ sudo x86info -a > > > > > > > > On this HP ZBook 15 G3 laptop kills the x86info process with segfault and > > > > produces the following kernel BUG. > > > > > > > > $ git describe > > > > v4.11-rc4-40-gfe82203 > > > > > > > > It is also reproducible with the fedora kernel: 4.9.14-200.fc25.x86_64 > > > > > > > > Full dmesg output here: https://pastebin.com/raw/Kur2mpZq > > > > > > > > [ 51.418954] usercopy: kernel memory exposure attempt detected from > > > > 8809 (dma-kmalloc-256) (4096 bytes) > > > > > > This seems like a real exposure: the copy is attempting to read 4096 > > > bytes from a 256 byte object. > > > > The code[1] is doing a 4k read from /dev/mem in the range 0x9 -> 0xa > > According to arch/x86/mm/init.c:devmem_is_allowed, that's still valid.. > > > > Note that the printk is using the direct mapping address. Is that what's > > being passed down to devmem_is_allowed now ? If so, that's probably what broke. > > So this is attempting to read physical memory 0x9 -> 0xa, but > that's somehow resolving to a virtual address that is claimed by > dma-kmalloc?? I'm confused how that's happening... The only thing that I can think of would be a rogue ptr in the bios table, but that seems unlikely. Tommi, can you put strace of x86info -mp somewhere? That will confirm/deny whether we're at least asking the kernel to do sane things. Indeed the bug happens when reading from /dev/mem: https://pastebin.com/raw/ZEJGQP1X # strace -f -y x86info -mp [...] open("/dev/mem", O_RDONLY) = 3 lseek(3, 1038, SEEK_SET) = 1038 read(3, "\300\235", 2)= 2 lseek(3, 646144, SEEK_SET)= 646144 read(3, "\1\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"..., 1024) = 1024 lseek(3, 1043, SEEK_SET) = 1043 read(3, "w\2", 2) = 2 lseek(3, 645120, SEEK_SET)= 645120 read(3, "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"..., 1024) = 1024 lseek(3, 654336, SEEK_SET)= 654336 read(3, "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"..., 1024) = 1024 lseek(3, 983040, SEEK_SET)= 983040 read(3, "IFE$\245S\0\0\1\0\0\0\0\360y\0\0\360\220\260\30\237{=\23\10\17\\276\17\0"..., 65536) = 65536 lseek(3, 917504, SEEK_SET)= 917504 read(3, "\377\377\377\377\377\377\377\377\377\377\377\377\377\377\377\377\377\377\377\377\377\377\377\377\377\377\377\377\377\377\377\377"..., 65536) = 65536 lseek(3, 524288, SEEK_SET)= 524288 read(3, )= ? +++ killed by SIGSEGV +++
sudo x86info -a => kernel BUG at mm/usercopy.c:78!
Hi, Running: $ sudo x86info -a On this HP ZBook 15 G3 laptop kills the x86info process with segfault and produces the following kernel BUG. $ git describe v4.11-rc4-40-gfe82203 It is also reproducible with the fedora kernel: 4.9.14-200.fc25.x86_64 Full dmesg output here: https://pastebin.com/raw/Kur2mpZq [ 51.418954] usercopy: kernel memory exposure attempt detected from 8809 (dma-kmalloc-256) (4096 bytes) [ 51.418959] [ cut here ] [ 51.418968] kernel BUG at /home/tomranta/git/linux/mm/usercopy.c:78! [ 51.418970] invalid opcode: [#1] SMP [ 51.418972] Modules linked in: fuse ccm ipt_REJECT nf_reject_ipv4 xt_tcpudp tun af_packet xt_conntrack nf_conntrack libcrc32c ebtable_nat ebtable_broute bridge ip6table_mangle ip6table_raw iptable_mangle iptable_raw ebtable_filter ebtables ip6table_filter ip6_tables iptable_filter ip_tables x_tables nls_iso8859_1 nls_cp437 vfat fat dm_mirror dm_region_hash dm_log arc4 hp_wmi sparse_keymap coretemp kvm_intel snd_hda_codec_hdmi kvm irqbypass pcbc aesni_intel aes_x86_64 crypto_simd cryptd glue_helper intel_cstate intel_uncore intel_rapl_perf iwlmvm mac80211 snd_usb_audio mousedev snd_usbmidi_lib snd_rawmidi input_leds snd_hda_codec_conexant snd_hda_codec_generic efivars iwlwifi uvcvideo videobuf2_vmalloc videobuf2_memops snd_hda_intel videobuf2_v4l2 cfg80211 videobuf2_core snd_hda_codec snd_seq snd_hwdep [ 51.419010] snd_seq_device snd_hda_core snd_pcm thermal hp_accel lis3lv02d input_polldev ac acpi_pad battery led_class evdev hp_wireless nfsd lockd grace sunrpc tg3 libphy crc32_pclmul crc32c_intel e1000e sd_mod 8021q garp stp llc mrp unix autofs4 [ 51.419025] CPU: 7 PID: 2406 Comm: x86info Not tainted 4.11.0-rc4-tommi+ #14 [ 51.419027] Hardware name: HP HP ZBook 15 G3/80D5, BIOS N81 Ver. 01.12 11/01/2016 [ 51.419030] task: 88026ce84100 task.stack: c90003b94000 [ 51.419035] RIP: 0010:__check_object_size+0xfd/0x195 [ 51.419037] RSP: 0018:c90003b97de0 EFLAGS: 00010282 [ 51.419039] RAX: 0066 RBX: 8809 RCX: [ 51.419042] RDX: 8802bddd33e8 RSI: 8802bddcc9e8 RDI: 8802bddcc9e8 [ 51.419044] RBP: c90003b97e00 R08: 0006648a R09: 048b [ 51.419046] R10: 0100 R11: 81e9a86d R12: 1000 [ 51.419049] R13: 0001 R14: 88091000 R15: 8809 [ 51.419051] FS: 7f8323436b40() GS:8802bddc() knlGS: [ 51.419054] CS: 0010 DS: ES: CR0: 80050033 [ 51.419056] CR2: 7ffcbec21000 CR3: 00026c8e8000 CR4: 003406a0 [ 51.419058] DR0: DR1: DR2: [ 51.419061] DR3: DR6: fffe0ff0 DR7: 0400 [ 51.419063] Call Trace: [ 51.419066] read_mem+0x70/0x120 [ 51.419069] __vfs_read+0x28/0x130 [ 51.419072] ? security_file_permission+0x9b/0xb0 [ 51.419075] ? rw_verify_area+0x4e/0xb0 [ 51.419077] vfs_read+0x96/0x130 [ 51.419079] SyS_read+0x46/0xb0 [ 51.419082] ? SyS_lseek+0x87/0xb0 [ 51.419085] entry_SYSCALL_64_fastpath+0x1a/0xa9 [ 51.419087] RIP: 0033:0x7f8322d56bd0 [ 51.419089] RSP: 002b:7ffcbec11c68 EFLAGS: 0246 ORIG_RAX: [ 51.419091] RAX: ffda RBX: 0006 RCX: 7f8322d56bd0 [ 51.419094] RDX: 0001 RSI: 7ffcbec11ca0 RDI: 0003 [ 51.419096] RBP: 0008 R08: 0005 R09: 0050 [ 51.419098] R10: R11: 0246 R12: 02231c00 [ 51.419100] R13: 7ffcbec11c9e R14: 7ffcbec51cf8 R15: [ 51.419103] Code: a8 81 48 c7 c2 29 69 a4 81 48 c7 c6 82 89 a5 81 48 0f 45 d0 48 c7 c0 1a 1e a6 81 48 c7 c7 d0 ed a5 81 48 0f 45 f0 e8 7f 74 f8 ff <0f> 0b 48 89 df e8 29 98 e8 ff 84 c0 0f 84 3a ff ff ff b8 00 00 [ 51.419123] RIP: __check_object_size+0xfd/0x195 RSP: c90003b97de0 [ 51.421565] ---[ end trace 441f7992ca25e39d ]---
[tip:perf/core] perf utils: Readlink /proc/self/exe to find the perf binary
Commit-ID: 55f77128e7652e537d6c226d5b56821cdb5c22de Gitweb: http://git.kernel.org/tip/55f77128e7652e537d6c226d5b56821cdb5c22de Author: Tommi Rantala AuthorDate: Wed, 22 Mar 2017 15:06:24 +0200 Committer: Arnaldo Carvalho de Melo CommitDate: Mon, 27 Mar 2017 15:37:54 -0300 perf utils: Readlink /proc/self/exe to find the perf binary Simplification: it is easier to open /proc/self/exe than /proc/$pid/exe. Signed-off-by: Tommi Rantala Cc: Alexander Shishkin Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20170322130624.21881-7-tommi.t.rant...@nokia.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/header.c | 8 ++-- 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c index cf22962..ef09f26 100644 --- a/tools/perf/util/header.c +++ b/tools/perf/util/header.c @@ -370,15 +370,11 @@ static int write_cmdline(int fd, struct perf_header *h __maybe_unused, struct perf_evlist *evlist __maybe_unused) { char buf[MAXPATHLEN]; - char proc[32]; u32 n; int i, ret; - /* -* actual atual path to perf binary -*/ - sprintf(proc, "/proc/%d/exe", getpid()); - ret = readlink(proc, buf, sizeof(buf) - 1); + /* actual path to perf binary */ + ret = readlink("/proc/self/exe", buf, sizeof(buf) - 1); if (ret <= 0) return -1;
[tip:perf/core] perf buildid: Do not assume that readlink() returns a null terminated string
Commit-ID: 5a2342111c68e623e27ee7ea3d0492d8dad6bda0 Gitweb: http://git.kernel.org/tip/5a2342111c68e623e27ee7ea3d0492d8dad6bda0 Author: Tommi Rantala AuthorDate: Wed, 22 Mar 2017 15:06:20 +0200 Committer: Arnaldo Carvalho de Melo CommitDate: Mon, 27 Mar 2017 15:35:06 -0300 perf buildid: Do not assume that readlink() returns a null terminated string Valgrind was complaining: $ valgrind ./perf list >/dev/null ==11643== Memcheck, a memory error detector ==11643== Copyright (C) 2002-2015, and GNU GPL'd, by Julian Seward et al. ==11643== Using Valgrind-3.12.0 and LibVEX; rerun with -h for copyright info ==11643== Command: ./perf list ==11643== ==11643== Conditional jump or move depends on uninitialised value(s) ==11643==at 0x4C30620: rindex (vg_replace_strmem.c:199) ==11643==by 0x49DAA9: build_id_cache__origname (build-id.c:198) ==11643==by 0x49E1C7: build_id_cache__valid_id (build-id.c:222) ==11643==by 0x49E1C7: build_id_cache__list_all (build-id.c:507) ==11643==by 0x4B9C8F: print_sdt_events (parse-events.c:2067) ==11643==by 0x4BB0B3: print_events (parse-events.c:2313) ==11643==by 0x439501: cmd_list (builtin-list.c:53) ==11643==by 0x497150: run_builtin (perf.c:359) ==11643==by 0x428CE0: handle_internal_command (perf.c:421) ==11643==by 0x428CE0: run_argv (perf.c:467) ==11643==by 0x428CE0: main (perf.c:614) [...] Additionally, a zero length result from readlink() is not very interesting. Signed-off-by: Tommi Rantala Cc: Alexander Shishkin Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20170322130624.21881-3-tommi.t.rant...@nokia.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/build-id.c | 6 +- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/tools/perf/util/build-id.c b/tools/perf/util/build-id.c index 234859f..33af675 100644 --- a/tools/perf/util/build-id.c +++ b/tools/perf/util/build-id.c @@ -182,13 +182,17 @@ char *build_id_cache__origname(const char *sbuild_id) char buf[PATH_MAX]; char *ret = NULL, *p; size_t offs = 5;/* == strlen("../..") */ + ssize_t len; linkname = build_id_cache__linkname(sbuild_id, NULL, 0); if (!linkname) return NULL; - if (readlink(linkname, buf, PATH_MAX) < 0) + len = readlink(linkname, buf, sizeof(buf) - 1); + if (len <= 0) goto out; + buf[len] = '\0'; + /* The link should be "../../" */ p = strrchr(buf, '/'); /* Cut off the "/" */ if (p && (p > buf + offs)) {
[tip:perf/core] perf utils: Null terminate buf in read_ftrace_printk()
Commit-ID: d4b364df5f6540e8d6a38008ce2693ba73a8508a Gitweb: http://git.kernel.org/tip/d4b364df5f6540e8d6a38008ce2693ba73a8508a Author: Tommi Rantala AuthorDate: Wed, 22 Mar 2017 15:06:23 +0200 Committer: Arnaldo Carvalho de Melo CommitDate: Mon, 27 Mar 2017 15:37:35 -0300 perf utils: Null terminate buf in read_ftrace_printk() Ensure that the string that we read from the data file is null terminated. Valgrind was complaining: ==31357== Invalid read of size 1 ==31357==at 0x4EC8C1: __strtok_r_1c (string2.h:200) ==31357==by 0x4EC8C1: parse_ftrace_printk (trace-event-parse.c:161) ==31357==by 0x4F82A8: read_ftrace_printk (trace-event-read.c:204) ==31357==by 0x4F82A8: trace_report (trace-event-read.c:468) ==31357==by 0x4CD552: process_tracing_data (header.c:1576) ==31357==by 0x4D3397: perf_file_section__process (header.c:2705) ==31357==by 0x4D3397: perf_header__process_sections (header.c:2488) ==31357==by 0x4D3397: perf_session__read_header (header.c:2925) ==31357==by 0x4E71E2: perf_session__open (session.c:32) ==31357==by 0x4E71E2: perf_session__new (session.c:139) ==31357==by 0x429F5D: cmd_annotate (builtin-annotate.c:472) ==31357==by 0x497150: run_builtin (perf.c:359) ==31357==by 0x428CE0: handle_internal_command (perf.c:421) ==31357==by 0x428CE0: run_argv (perf.c:467) ==31357==by 0x428CE0: main (perf.c:614) ==31357== Address 0x8ac0efb is 0 bytes after a block of size 1,963 alloc'd ==31357==at 0x4C2DB9D: malloc (vg_replace_malloc.c:299) ==31357==by 0x4F827B: read_ftrace_printk (trace-event-read.c:195) ==31357==by 0x4F827B: trace_report (trace-event-read.c:468) ==31357==by 0x4CD552: process_tracing_data (header.c:1576) ==31357==by 0x4D3397: perf_file_section__process (header.c:2705) ==31357==by 0x4D3397: perf_header__process_sections (header.c:2488) ==31357==by 0x4D3397: perf_session__read_header (header.c:2925) ==31357==by 0x4E71E2: perf_session__open (session.c:32) ==31357==by 0x4E71E2: perf_session__new (session.c:139) ==31357==by 0x429F5D: cmd_annotate (builtin-annotate.c:472) ==31357==by 0x497150: run_builtin (perf.c:359) ==31357==by 0x428CE0: handle_internal_command (perf.c:421) ==31357==by 0x428CE0: run_argv (perf.c:467) ==31357==by 0x428CE0: main (perf.c:614) Signed-off-by: Tommi Rantala Cc: Alexander Shishkin Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20170322130624.21881-6-tommi.t.rant...@nokia.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/trace-event-read.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tools/perf/util/trace-event-read.c b/tools/perf/util/trace-event-read.c index 2742015..8a9a677 100644 --- a/tools/perf/util/trace-event-read.c +++ b/tools/perf/util/trace-event-read.c @@ -192,7 +192,7 @@ static int read_ftrace_printk(struct pevent *pevent) if (!size) return 0; - buf = malloc(size); + buf = malloc(size + 1); if (buf == NULL) return -1; @@ -201,6 +201,8 @@ static int read_ftrace_printk(struct pevent *pevent) return -1; } + buf[size] = '\0'; + parse_ftrace_printk(pevent, buf, size); free(buf);
[tip:perf/core] perf tests: Do not assume that readlink() returns a null terminated string
Commit-ID: 0e6ba11511aef91ba8e2528ddc681d88922d7b0b Gitweb: http://git.kernel.org/tip/0e6ba11511aef91ba8e2528ddc681d88922d7b0b Author: Tommi Rantala AuthorDate: Wed, 22 Mar 2017 15:06:21 +0200 Committer: Arnaldo Carvalho de Melo CommitDate: Mon, 27 Mar 2017 15:35:56 -0300 perf tests: Do not assume that readlink() returns a null terminated string Ensure that the string in buf is null terminated. Signed-off-by: Tommi Rantala Cc: Alexander Shishkin Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20170322130624.21881-4-tommi.t.rant...@nokia.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/sdt.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/tests/sdt.c b/tools/perf/tests/sdt.c index f59d210..26e5b7a 100644 --- a/tools/perf/tests/sdt.c +++ b/tools/perf/tests/sdt.c @@ -43,7 +43,7 @@ static char *get_self_path(void) { char *buf = calloc(PATH_MAX, sizeof(char)); - if (buf && readlink("/proc/self/exe", buf, PATH_MAX) < 0) { + if (buf && readlink("/proc/self/exe", buf, PATH_MAX - 1) < 0) { pr_debug("Failed to get correct path of perf\n"); free(buf); return NULL;
[tip:perf/core] perf utils: use sizeof(buf) - 1 in readlink() call
Commit-ID: b7126ef78612a3d4a37aadf39125cff048cebb9b Gitweb: http://git.kernel.org/tip/b7126ef78612a3d4a37aadf39125cff048cebb9b Author: Tommi Rantala AuthorDate: Wed, 22 Mar 2017 15:06:22 +0200 Committer: Arnaldo Carvalho de Melo CommitDate: Mon, 27 Mar 2017 15:36:27 -0300 perf utils: use sizeof(buf) - 1 in readlink() call Ensure that we have space for the null byte in buf. Signed-off-by: Tommi Rantala Cc: Alexander Shishkin Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20170322130624.21881-5-tommi.t.rant...@nokia.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/header.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c index 05714d5..cf22962 100644 --- a/tools/perf/util/header.c +++ b/tools/perf/util/header.c @@ -378,7 +378,7 @@ static int write_cmdline(int fd, struct perf_header *h __maybe_unused, * actual atual path to perf binary */ sprintf(proc, "/proc/%d/exe", getpid()); - ret = readlink(proc, buf, sizeof(buf)); + ret = readlink(proc, buf, sizeof(buf) - 1); if (ret <= 0) return -1;
[tip:perf/core] perf buildid: Do not update SDT cache with null filename
Commit-ID: 2ccc220238680642be87a2d010ce07f1c40edafb Gitweb: http://git.kernel.org/tip/2ccc220238680642be87a2d010ce07f1c40edafb Author: Tommi Rantala AuthorDate: Wed, 22 Mar 2017 15:06:19 +0200 Committer: Arnaldo Carvalho de Melo CommitDate: Mon, 27 Mar 2017 15:33:36 -0300 perf buildid: Do not update SDT cache with null filename Valgrind was complaining: ==2633== Syscall param open(filename) points to unaddressable byte(s) ==2633==at 0x5281CC0: __open_nocancel (syscall-template.S:84) ==2633==by 0x537D38: open (fcntl2.h:53) ==2633==by 0x537D38: get_sdt_note_list (symbol-elf.c:2017) ==2633==by 0x5396FD: probe_cache__scan_sdt (probe-file.c:700) ==2633==by 0x49EA2C: build_id_cache__add_sdt_cache (build-id.c:625) ==2633==by 0x49EA2C: build_id_cache__add_s (build-id.c:697) ==2633==by 0x49EE72: build_id_cache__add_b (build-id.c:717) ==2633==by 0x49EE72: dso__cache_build_id (build-id.c:782) ==2633==by 0x49F190: __dsos__cache_build_ids (build-id.c:793) ==2633==by 0x49F190: machine__cache_build_ids (build-id.c:801) ==2633==by 0x49F190: perf_session__cache_build_ids (build-id.c:815) ==2633==by 0x4CD4F2: write_build_id (header.c:165) ==2633==by 0x4D26F7: do_write_feat (header.c:2296) ==2633==by 0x4D26F7: perf_header__adds_write (header.c:2335) ==2633==by 0x4D26F7: perf_session__write_header (header.c:2414) ==2633==by 0x43B324: __cmd_record (builtin-record.c:1154) ==2633==by 0x43B324: cmd_record (builtin-record.c:1839) ==2633==by 0x455A07: __cmd_record (builtin-kmem.c:1868) ==2633==by 0x455A07: cmd_kmem (builtin-kmem.c:1944) ==2633==by 0x497150: run_builtin (perf.c:359) ==2633==by 0x428CE0: handle_internal_command (perf.c:421) ==2633==by 0x428CE0: run_argv (perf.c:467) ==2633==by 0x428CE0: main (perf.c:614) ==2633== Address 0x0 is not stack'd, malloc'd or (recently) free'd Signed-off-by: Tommi Rantala Cc: Alexander Shishkin Cc: Peter Zijlstra Cc: Tommi Rantala Link: http://lkml.kernel.org/r/20170322130624.21881-2-tommi.t.rant...@nokia.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/build-id.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/util/build-id.c b/tools/perf/util/build-id.c index e528c40..234859f 100644 --- a/tools/perf/util/build-id.c +++ b/tools/perf/util/build-id.c @@ -690,7 +690,7 @@ int build_id_cache__add_s(const char *sbuild_id, const char *name, err = 0; /* Update SDT cache : error is just warned */ - if (build_id_cache__add_sdt_cache(sbuild_id, realname) < 0) + if (realname && build_id_cache__add_sdt_cache(sbuild_id, realname) < 0) pr_debug4("Failed to update/scan SDT cache for %s\n", realname); out_free:
[PATCH 1/6] perf buildid: do not update SDT cache with null filename
Valgrind was complaining: ==2633== Syscall param open(filename) points to unaddressable byte(s) ==2633==at 0x5281CC0: __open_nocancel (syscall-template.S:84) ==2633==by 0x537D38: open (fcntl2.h:53) ==2633==by 0x537D38: get_sdt_note_list (symbol-elf.c:2017) ==2633==by 0x5396FD: probe_cache__scan_sdt (probe-file.c:700) ==2633==by 0x49EA2C: build_id_cache__add_sdt_cache (build-id.c:625) ==2633==by 0x49EA2C: build_id_cache__add_s (build-id.c:697) ==2633==by 0x49EE72: build_id_cache__add_b (build-id.c:717) ==2633==by 0x49EE72: dso__cache_build_id (build-id.c:782) ==2633==by 0x49F190: __dsos__cache_build_ids (build-id.c:793) ==2633==by 0x49F190: machine__cache_build_ids (build-id.c:801) ==2633==by 0x49F190: perf_session__cache_build_ids (build-id.c:815) ==2633==by 0x4CD4F2: write_build_id (header.c:165) ==2633==by 0x4D26F7: do_write_feat (header.c:2296) ==2633==by 0x4D26F7: perf_header__adds_write (header.c:2335) ==2633==by 0x4D26F7: perf_session__write_header (header.c:2414) ==2633==by 0x43B324: __cmd_record (builtin-record.c:1154) ==2633==by 0x43B324: cmd_record (builtin-record.c:1839) ==2633==by 0x455A07: __cmd_record (builtin-kmem.c:1868) ==2633==by 0x455A07: cmd_kmem (builtin-kmem.c:1944) ==2633==by 0x497150: run_builtin (perf.c:359) ==2633==by 0x428CE0: handle_internal_command (perf.c:421) ==2633==by 0x428CE0: run_argv (perf.c:467) ==2633==by 0x428CE0: main (perf.c:614) ==2633== Address 0x0 is not stack'd, malloc'd or (recently) free'd Signed-off-by: Tommi Rantala --- tools/perf/util/build-id.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/util/build-id.c b/tools/perf/util/build-id.c index e528c40..234859f 100644 --- a/tools/perf/util/build-id.c +++ b/tools/perf/util/build-id.c @@ -690,7 +690,7 @@ int build_id_cache__add_s(const char *sbuild_id, const char *name, err = 0; /* Update SDT cache : error is just warned */ - if (build_id_cache__add_sdt_cache(sbuild_id, realname) < 0) + if (realname && build_id_cache__add_sdt_cache(sbuild_id, realname) < 0) pr_debug4("Failed to update/scan SDT cache for %s\n", realname); out_free: -- 2.9.3
[PATCH 2/6] perf buildid: do not assume that readlink() returns a null terminated string
Valgrind was complaining: $ valgrind ./perf list >/dev/null ==11643== Memcheck, a memory error detector ==11643== Copyright (C) 2002-2015, and GNU GPL'd, by Julian Seward et al. ==11643== Using Valgrind-3.12.0 and LibVEX; rerun with -h for copyright info ==11643== Command: ./perf list ==11643== ==11643== Conditional jump or move depends on uninitialised value(s) ==11643==at 0x4C30620: rindex (vg_replace_strmem.c:199) ==11643==by 0x49DAA9: build_id_cache__origname (build-id.c:198) ==11643==by 0x49E1C7: build_id_cache__valid_id (build-id.c:222) ==11643==by 0x49E1C7: build_id_cache__list_all (build-id.c:507) ==11643==by 0x4B9C8F: print_sdt_events (parse-events.c:2067) ==11643==by 0x4BB0B3: print_events (parse-events.c:2313) ==11643==by 0x439501: cmd_list (builtin-list.c:53) ==11643==by 0x497150: run_builtin (perf.c:359) ==11643==by 0x428CE0: handle_internal_command (perf.c:421) ==11643==by 0x428CE0: run_argv (perf.c:467) ==11643==by 0x428CE0: main (perf.c:614) [...] Additionally, a zero length result from readlink() is not very interesting. Signed-off-by: Tommi Rantala --- tools/perf/util/build-id.c | 6 +- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/tools/perf/util/build-id.c b/tools/perf/util/build-id.c index 234859f..9ad77b0 100644 --- a/tools/perf/util/build-id.c +++ b/tools/perf/util/build-id.c @@ -182,13 +182,17 @@ char *build_id_cache__origname(const char *sbuild_id) char buf[PATH_MAX]; char *ret = NULL, *p; size_t offs = 5;/* == strlen("../..") */ + ssize_t len; linkname = build_id_cache__linkname(sbuild_id, NULL, 0); if (!linkname) return NULL; - if (readlink(linkname, buf, PATH_MAX) < 0) + len = readlink(linkname, buf, sizeof(buf)-1); + if (len <= 0) goto out; + buf[len] = '\0'; + /* The link should be "../../" */ p = strrchr(buf, '/'); /* Cut off the "/" */ if (p && (p > buf + offs)) { -- 2.9.3
[PATCH 6/6] perf utils: readlink /proc/self/exe to find the perf binary
Signed-off-by: Tommi Rantala --- tools/perf/util/header.c | 8 ++-- 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c index ab10e9d..c6243af 100644 --- a/tools/perf/util/header.c +++ b/tools/perf/util/header.c @@ -370,15 +370,11 @@ static int write_cmdline(int fd, struct perf_header *h __maybe_unused, struct perf_evlist *evlist __maybe_unused) { char buf[MAXPATHLEN]; - char proc[32]; u32 n; int i, ret; - /* -* actual atual path to perf binary -*/ - sprintf(proc, "/proc/%d/exe", getpid()); - ret = readlink(proc, buf, sizeof(buf)-1); + /* actual path to perf binary */ + ret = readlink("/proc/self/exe", buf, sizeof(buf)-1); if (ret <= 0) return -1; -- 2.9.3
[PATCH 4/6] perf utils: use sizeof(buf)-1 in readlink() call
Ensure that we have space for the null byte in buf. Signed-off-by: Tommi Rantala --- tools/perf/util/header.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c index 05714d5..ab10e9d 100644 --- a/tools/perf/util/header.c +++ b/tools/perf/util/header.c @@ -378,7 +378,7 @@ static int write_cmdline(int fd, struct perf_header *h __maybe_unused, * actual atual path to perf binary */ sprintf(proc, "/proc/%d/exe", getpid()); - ret = readlink(proc, buf, sizeof(buf)); + ret = readlink(proc, buf, sizeof(buf)-1); if (ret <= 0) return -1; -- 2.9.3
[PATCH 5/6] perf utils: null terminate buf in read_ftrace_printk()
Ensure that the string that we read from the data file is null terminated. Valgrind was complaining: ==31357== Invalid read of size 1 ==31357==at 0x4EC8C1: __strtok_r_1c (string2.h:200) ==31357==by 0x4EC8C1: parse_ftrace_printk (trace-event-parse.c:161) ==31357==by 0x4F82A8: read_ftrace_printk (trace-event-read.c:204) ==31357==by 0x4F82A8: trace_report (trace-event-read.c:468) ==31357==by 0x4CD552: process_tracing_data (header.c:1576) ==31357==by 0x4D3397: perf_file_section__process (header.c:2705) ==31357==by 0x4D3397: perf_header__process_sections (header.c:2488) ==31357==by 0x4D3397: perf_session__read_header (header.c:2925) ==31357==by 0x4E71E2: perf_session__open (session.c:32) ==31357==by 0x4E71E2: perf_session__new (session.c:139) ==31357==by 0x429F5D: cmd_annotate (builtin-annotate.c:472) ==31357==by 0x497150: run_builtin (perf.c:359) ==31357==by 0x428CE0: handle_internal_command (perf.c:421) ==31357==by 0x428CE0: run_argv (perf.c:467) ==31357==by 0x428CE0: main (perf.c:614) ==31357== Address 0x8ac0efb is 0 bytes after a block of size 1,963 alloc'd ==31357==at 0x4C2DB9D: malloc (vg_replace_malloc.c:299) ==31357==by 0x4F827B: read_ftrace_printk (trace-event-read.c:195) ==31357==by 0x4F827B: trace_report (trace-event-read.c:468) ==31357==by 0x4CD552: process_tracing_data (header.c:1576) ==31357==by 0x4D3397: perf_file_section__process (header.c:2705) ==31357==by 0x4D3397: perf_header__process_sections (header.c:2488) ==31357==by 0x4D3397: perf_session__read_header (header.c:2925) ==31357==by 0x4E71E2: perf_session__open (session.c:32) ==31357==by 0x4E71E2: perf_session__new (session.c:139) ==31357==by 0x429F5D: cmd_annotate (builtin-annotate.c:472) ==31357==by 0x497150: run_builtin (perf.c:359) ==31357==by 0x428CE0: handle_internal_command (perf.c:421) ==31357==by 0x428CE0: run_argv (perf.c:467) ==31357==by 0x428CE0: main (perf.c:614) Signed-off-by: Tommi Rantala --- tools/perf/util/trace-event-read.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tools/perf/util/trace-event-read.c b/tools/perf/util/trace-event-read.c index 2742015..04605c0 100644 --- a/tools/perf/util/trace-event-read.c +++ b/tools/perf/util/trace-event-read.c @@ -192,7 +192,7 @@ static int read_ftrace_printk(struct pevent *pevent) if (!size) return 0; - buf = malloc(size); + buf = malloc(size+1); if (buf == NULL) return -1; @@ -201,6 +201,8 @@ static int read_ftrace_printk(struct pevent *pevent) return -1; } + buf[size] = '\0'; + parse_ftrace_printk(pevent, buf, size); free(buf); -- 2.9.3
[PATCH 3/6] perf tests: do not assume that readlink() returns a null terminated string
Ensure that the string in buf is null terminated. Signed-off-by: Tommi Rantala --- tools/perf/tests/sdt.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/tests/sdt.c b/tools/perf/tests/sdt.c index f59d210..121949a 100644 --- a/tools/perf/tests/sdt.c +++ b/tools/perf/tests/sdt.c @@ -43,7 +43,7 @@ static char *get_self_path(void) { char *buf = calloc(PATH_MAX, sizeof(char)); - if (buf && readlink("/proc/self/exe", buf, PATH_MAX) < 0) { + if (buf && readlink("/proc/self/exe", buf, PATH_MAX-1) < 0) { pr_debug("Failed to get correct path of perf\n"); free(buf); return NULL; -- 2.9.3
[PATCH 0/6] perf string handling fixes
Hi, Some small perf fixes, mostly caught with valgrind. The last patch is a simplification: it is easier to open /proc/self/exe than /proc/$pid/exe. Tommi Rantala (6): perf buildid: do not update SDT cache with null filename perf buildid: do not assume that readlink() returns a null terminated string perf tests: do not assume that readlink() returns a null terminated string perf utils: use sizeof(buf)-1 in readlink() call perf utils: null terminate buf in read_ftrace_printk() perf utils: readlink /proc/self/exe to find the perf binary tools/perf/tests/sdt.c | 2 +- tools/perf/util/build-id.c | 8 ++-- tools/perf/util/header.c | 8 ++-- tools/perf/util/trace-event-read.c | 4 +++- 4 files changed, 12 insertions(+), 10 deletions(-) -- 2.9.3
Re: nouveau: iowrite32 oops & warning at drivers/gpu/drm/nouveau/nouveau_fence.c:198
2015-11-22 22:49 GMT+02:00 Ilia Mirkin : > Not sure if these apply here but there are a couple of outstanding > locking fixes available in > http://cgit.freedesktop.org/~darktama/nouveau/ -- specifically these > two: > > http://cgit.freedesktop.org/~darktama/nouveau/commit/?id=2f3a56ad019e378a352e9cb7a559f478826f1a87 > http://cgit.freedesktop.org/~darktama/nouveau/commit/?id=4179b15c6e9fcfb253e811e5477debe46c84c395 > > Not sure if they affect this particular issue, but thought I'd point > it out. Are you fuzzing with multiple threads, or just one at a time? > Do you have a branch somewhere public with the changes to add nouveau > ioctl support to trinity? Hi! I applied those two on top of v4.4-rc2, but the same warning and oops are still easily reproducible. I can test with older kernels and/or try to bisect when I have more time, unless anyone has better ideas. I'm actually running unmodified trinity, and for this purpose only fuzzing the ioctl() syscall from multiple processes, and opening only the files from /dev/dri/: $ ./trinity -q -loff -C20 -c ioctl -V /dev/dri/ Trinity knows about a bunch of DRM ioctl commands, but the rest of the ioctl arguments will be garbage: https://github.com/kernelslacker/trinity/blob/master/ioctls/drm.c Tommi -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
nouveau: iowrite32 oops & warning at drivers/gpu/drm/nouveau/nouveau_fence.c:198
Hello, I'm seeing this warning and oops when trying to fuzz linus v4.4-rc1-290-g3ad5d7e with trinity. [ cut here ] WARNING: CPU: 1 PID: 4308 at drivers/gpu/drm/nouveau/nouveau_fence.c:198 nouveau_fence_context_new+0x22e/0x270() CPU: 1 PID: 4308 Comm: trinity-c19 Tainted: GB 4.4.0-rc1+ #1 Hardware name: Dell Inc. OptiPlex 990/0D6H9T, BIOS A06 07/25/2011 0009 8800ac4bf8b0 818a802e 8800ac4bf8f0 8111bc75 820170be 8800ac9c1050 88015b0e8cc0 8800ca703070 88016f432130 8800afb675f0 Call Trace: [] dump_stack+0x4b/0x6d [] warn_slowpath_common+0x95/0xd0 [] ? nouveau_fence_context_new+0x22e/0x270 [] warn_slowpath_null+0x15/0x20 [] nouveau_fence_context_new+0x22e/0x270 [] ? nouveau_fence_context_free+0x20/0x20 [] ? kasan_kmalloc+0x5e/0x70 [] ? kmem_cache_alloc_trace+0x123/0x290 [] ? nv84_fence_context_new+0x73/0x2d0 [] nv84_fence_context_new+0x9d/0x2d0 [] nvc0_fence_context_new+0x11/0x70 [] nouveau_channel_new+0x7e1/0xca0 [] ? create_object+0x28c/0x4d0 [] ? nouveau_channel_prep+0x4b0/0x4b0 [] ? create_object+0x406/0x4d0 [] ? kmemleak_disable+0x70/0x70 [] ? nouveau_abi16_get+0x37/0x80 [] ? trace_hardirqs_on_caller+0x16/0x280 [] ? kasan_unpoison_shadow+0x36/0x50 [] ? kasan_unpoison_shadow+0x36/0x50 [] ? kmem_cache_alloc_trace+0x123/0x290 [] ? nouveau_abi16_ioctl_channel_alloc+0xec/0x4d0 [] nouveau_abi16_ioctl_channel_alloc+0x1ae/0x4d0 [] ? nouveau_abi16_ioctl_setparam+0x10/0x10 [] drm_ioctl+0x20b/0x6b0 [] ? debug_lockdep_rcu_enabled+0x26/0x40 [] ? drm_ioctl_permit+0x120/0x120 [] ? trace_hardirqs_on+0xd/0x10 [] nouveau_drm_ioctl+0x99/0x110 [] do_vfs_ioctl+0x4ae/0x800 [] ? selinux_file_ioctl+0x1f9/0x2d0 [] ? ioctl_preallocate+0x140/0x140 [] ? selinux_parse_skb.constprop.42+0x7c0/0x7c0 [] ? do_setitimer+0x28b/0x420 [] ? alarm_setitimer+0x85/0xe0 [] ? do_setitimer+0x420/0x420 [] SyS_ioctl+0x74/0x80 [] entry_SYSCALL_64_fastpath+0x12/0x6f ---[ end trace 780046761495600a ]--- BUG: unable to handle kernel paging request at c9000188 IP: [] iowrite32+0x2e/0x40 PGD 174f36067 PUD 174f37067 PMD 16a13b067 PTE 0 Oops: 0002 [#1] SMP KASAN CPU: 3 PID: 4638 Comm: trinity-c19 Tainted: GB W 4.4.0-rc1+ #1 Hardware name: Dell Inc. OptiPlex 990/0D6H9T, BIOS A06 07/25/2011 task: 8800ab8e1a00 ti: 8800ab6c task.ti: 8800ab6c RIP: 0010:[] [] iowrite32+0x2e/0x40 RSP: 0018:8800ab6c75f8 EFLAGS: 00010292 RAX: ed001940c2b8 RBX: c9000188 RCX: 8800ca0615c7 RDX: RSI: c9000188 RDI: RBP: 8800ab6c7618 R08: R09: R10: R11: ed001443979e R12: 8800ca061590 R13: R14: 880152553390 R15: 833e0220 FS: 7f48be30a700() GS:88017580() knlGS: CS: 0010 DS: ES: CR0: 80050033 CR2: c9000188 CR3: ab455000 CR4: 000406e0 Stack: 81f58f8b 00080004 8800ca061590 880169d3a4c0 8800ab6c76b8 81fa3b68 0141c180 0001 88006000 1000 8800ab6c7820 Call Trace: [] ? nvkm_instobj_wr32+0x2b/0x40 [] gf100_fifo_gpfifo_new+0x528/0xa50 [] nvkm_fifo_class_new+0x58/0x70 [] ? nvkm_fifo_uevent_ctor+0x50/0x50 [] nvkm_udevice_child_new+0x63/0x80 [] nvkm_ioctl_new+0x2aa/0x5e0 [] ? create_object+0x8c/0x4d0 [] ? save_stack_address+0x6a/0x70 [] ? nvkm_ioctl_del+0x110/0x110 [] ? nvkm_udevice_map+0x90/0x90 [] ? create_object+0x406/0x4d0 [] nvkm_ioctl+0x1fb/0x510 [] ? trace_hardirqs_on_caller+0x16/0x280 [] ? nvkm_client_map+0x10/0x10 [] nvkm_client_ioctl+0xd/0x10 [] nvif_object_ioctl+0xad/0xe0 [] nvif_object_init+0x160/0x1e0 [] nouveau_channel_new+0x18d/0xca0 [] ? create_object+0x28c/0x4d0 [] ? nouveau_channel_prep+0x4b0/0x4b0 [] ? create_object+0x406/0x4d0 [] ? kmemleak_disable+0x70/0x70 [] ? nouveau_abi16_get+0x37/0x80 [] ? trace_hardirqs_on_caller+0x16/0x280 [] ? kasan_unpoison_shadow+0x36/0x50 [] ? kasan_unpoison_shadow+0x36/0x50 [] ? kmem_cache_alloc_trace+0x123/0x290 [] ? nouveau_abi16_ioctl_channel_alloc+0xec/0x4d0 [] nouveau_abi16_ioctl_channel_alloc+0x1ae/0x4d0 [] ? nouveau_abi16_ioctl_setparam+0x10/0x10 [] drm_ioctl+0x20b/0x6b0 [] ? debug_lockdep_rcu_enabled+0x26/0x40 [] ? drm_ioctl_permit+0x120/0x120 [] ? trace_hardirqs_on+0xd/0x10 [] nouveau_drm_ioctl+0x99/0x110 [] do_vfs_ioctl+0x4ae/0x800 [] ? selinux_file_ioctl+0x1f9/0x2d0 [] ? ioctl_preallocate+0x140/0x140 [] ? selinux_parse_skb.constprop.42+0x7c0/0x7c0 [] ? do_setitimer+0x28b/0x420 [] ? alarm_setitimer+0x85/0xe0 [] ? do_setitimer+0x420/0x420 [] SyS_ioctl+0x74/0x80 [] entry_SYSCALL_64_fastpath+0x12/0x6f Code: ff ff 03 00 77 25 48 81 fe 00 00 01 00 76 07 0f b7 d6 89 f8 ef c3 55 48 89 f7 48 c7 c6 e0 39 1e 83 48 89 e5 e8 84 fd ff ff 5d c3 <89> 3e c3 66 66 66 66 66 66 2e 0f 1f 84 00 00 00 00 00 48 81 fe RIP [] iowrite32+0x2e/0x40 RSP CR2: c9000188 ---[ end tr
nouveau: BUG: KASAN: slab-out-of-bounds in memcpy+0x1d/0x40
Hello, I'm seeing this kasan report after booting with linus v4.4-rc1-290-g3ad5d7e. BUG: KASAN: slab-out-of-bounds in memcpy+0x1d/0x40 at addr 880169e21fd0 Read of size 64 by task kworker/1:0/14 = BUG kmalloc-8192 (Not tainted): kasan: bad access detected - Disabling lock debugging due to kernel taint INFO: Allocated in register_framebuffer+0x466/0x550 age=30792 cpu=1 pid=1 ___slab_alloc+0x53b/0x560 __slab_alloc+0x3e/0x70 kmem_cache_alloc_trace+0x20f/0x290 register_framebuffer+0x466/0x550 drm_fb_helper_initial_config+0x5a1/0x800 nouveau_fbcon_init+0x148/0x180 nouveau_drm_load+0x583/0xf30 drm_dev_register+0xb9/0xd0 drm_get_pci_dev+0x176/0x370 nouveau_drm_probe+0x2f2/0x3c0 local_pci_probe+0x75/0xd0 pci_device_probe+0x19f/0x1f0 driver_probe_device+0x208/0x6c0 __driver_attach+0xb8/0xc0 bus_for_each_dev+0xe6/0x150 driver_attach+0x26/0x30 INFO: Slab 0xea0005a78800 objects=3 used=3 fp=0x (null) flags=0x2004080 INFO: Object 0x880169e2 @offset=0 fp=0x (null) Object 880169e2: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 Object 880169e20010: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ... Object 880169e20fc0: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ff ff Object 880169e20fd0: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 Object 880169e20fe0: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ff ff Object 880169e20ff0: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 CPU: 1 PID: 14 Comm: kworker/1:0 Tainted: GB 4.4.0-rc1+ #1 Hardware name: Dell Inc. OptiPlex 990/0D6H9T, BIOS A06 07/25/2011 Workqueue: events_power_efficient fb_flashcursor ea0005a78800 8801740ef7f0 818a802e 880174c04e00 8801740ef820 813030e4 880174c04e00 ea0005a78800 880169e2 880169e21fd0 8801740ef848 813063ef Call Trace: [] dump_stack+0x4b/0x6d [] print_trailer+0xf4/0x150 [] object_err+0x2f/0x40 [] kasan_report_error+0x20d/0x510 [] ? native_sched_clock+0x67/0x140 [] kasan_report+0x34/0x40 [] ? memcpy+0x1d/0x40 [] __asan_loadN+0x12a/0x180 [] memcpy+0x1d/0x40 [] OUT_RINGp+0x75/0x90 [] nvc0_fbcon_imageblit+0x462/0x6c0 [] nouveau_fbcon_imageblit+0xfd/0x110 [] soft_cursor+0x2f6/0x400 [] bit_cursor+0xb14/0xb60 [] ? update_attr.isra.0+0xc0/0xc0 [] ? fb_flashcursor+0x33/0x1b0 [] ? fb_get_color_depth+0x7f/0xb0 [] ? get_color+0xd6/0x1d0 [] ? update_attr.isra.0+0xc0/0xc0 [] fb_flashcursor+0x19f/0x1b0 [] process_one_work+0x3fe/0xae0 [] ? process_one_work+0x32e/0xae0 [] ? try_to_grab_pending+0x200/0x200 [] ? debug_lockdep_rcu_enabled+0x35/0x40 [] worker_thread+0x8a/0x7f0 [] ? process_one_work+0xae0/0xae0 [] kthread+0x185/0x1b0 [] ? __kthread_parkme+0xe0/0xe0 [] ? acpi_ps_parse_loop+0x41c/0xab8 [] ? trace_hardirqs_on_caller+0x186/0x280 [] ? ddebug_add_module+0x38/0x130 [] ? __kthread_parkme+0xe0/0xe0 [] ret_from_fork+0x3f/0x70 [] ? __kthread_parkme+0xe0/0xe0 Memory state around the buggy address: 880169e21f00: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 880169e21f80: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 >880169e22000: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc ^ 880169e22080: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc 880169e22100: fc fc fc fc fc fc 00 00 00 00 00 00 00 00 00 00 == Some nouveau messages from the boot, if this helps: nouveau :01:00.0: NVIDIA GF119 (0d90a0a1) nouveau :01:00.0: bios: version 75.19.55.00.02 nouveau :01:00.0: fb: 1024 MiB DDR3 [TTM] Zone kernel: Available graphics memory: 2590256 kiB [TTM] Zone dma32: Available graphics memory: 2097152 kiB [TTM] Initializing pool allocator [TTM] Initializing DMA pool allocator nouveau :01:00.0: DRM: VRAM: 1024 MiB nouveau :01:00.0: DRM: GART: 1048576 MiB nouveau :01:00.0: DRM: TMDS table version 2.0 nouveau :01:00.0: DRM: DCB version 4.0 nouveau :01:00.0: DRM: DCB outp 00: 02000300 nouveau :01:00.0: DRM: DCB outp 01: 01000302 00020030 nouveau :01:00.0: DRM: DCB outp 02: 02011362 00020010 nouveau :01:00.0: DRM: DCB outp 03: 04022310 nouveau :01:00.0: DRM: DCB conn 00: 1030 nouveau :01:00.0: DRM: DCB conn 01: 2161 nouveau :01:00.0: DRM: DCB conn 02: 0200 [drm] Supports vblank timestamp caching Rev 2 (21.10.2013). [drm] Driver supports precise vblank timestamp query. nouveau :01:00.0: DRM: MM: using COPY0 for buffer copies nouveau :01:00.0: No connectors reported connected with modes [drm] Cannot find any crtc or sizes - going 1024x768 nouveau :01:00.0: DRM: allocated
[PATCH] [media] cx231xx: Add support for Terratec Grabby
Add support for the Terratec Grabby with USB ID 0ccd:00a6. Signed-off-by: Tommi Rantala --- drivers/media/usb/cx231xx/cx231xx-cards.c | 28 drivers/media/usb/cx231xx/cx231xx.h | 1 + 2 files changed, 29 insertions(+) diff --git a/drivers/media/usb/cx231xx/cx231xx-cards.c b/drivers/media/usb/cx231xx/cx231xx-cards.c index fe00da1..404e17c 100644 --- a/drivers/media/usb/cx231xx/cx231xx-cards.c +++ b/drivers/media/usb/cx231xx/cx231xx-cards.c @@ -815,6 +815,32 @@ struct cx231xx_board cx231xx_boards[] = { .gpio = NULL, } }, }, + [CX231XX_BOARD_TERRATEC_GRABBY] = { + .name = "Terratec Grabby", + .tuner_type = TUNER_ABSENT, + .decoder = CX231XX_AVDECODER, + .output_mode = OUT_MODE_VIP11, + .demod_xfer_mode = 0, + .ctl_pin_status_mask = 0xFFC4, + .agc_analog_digital_select_gpio = 0x0c, + .gpio_pin_status_mask = 0x4001000, + .norm = V4L2_STD_PAL, + .no_alt_vanc = 1, + .external_av = 1, + .input = {{ + .type = CX231XX_VMUX_COMPOSITE1, + .vmux = CX231XX_VIN_2_1, + .amux = CX231XX_AMUX_LINE_IN, + .gpio = NULL, + }, { + .type = CX231XX_VMUX_SVIDEO, + .vmux = CX231XX_VIN_1_1 | + (CX231XX_VIN_1_2 << 8) | + CX25840_SVIDEO_ON, + .amux = CX231XX_AMUX_LINE_IN, + .gpio = NULL, + } }, + }, }; const unsigned int cx231xx_bcount = ARRAY_SIZE(cx231xx_boards); @@ -880,6 +906,8 @@ struct usb_device_id cx231xx_id_table[] = { .driver_info = CX231XX_BOARD_ELGATO_VIDEO_CAPTURE_V2}, {USB_DEVICE(0x1f4d, 0x0102), .driver_info = CX231XX_BOARD_OTG102}, + {USB_DEVICE(USB_VID_TERRATEC, 0x00a6), +.driver_info = CX231XX_BOARD_TERRATEC_GRABBY}, {}, }; diff --git a/drivers/media/usb/cx231xx/cx231xx.h b/drivers/media/usb/cx231xx/cx231xx.h index 00d3bce..54790fb 100644 --- a/drivers/media/usb/cx231xx/cx231xx.h +++ b/drivers/media/usb/cx231xx/cx231xx.h @@ -77,6 +77,7 @@ #define CX231XX_BOARD_HAUPPAUGE_930C_HD_1113xx 19 #define CX231XX_BOARD_HAUPPAUGE_930C_HD_1114xx 20 #define CX231XX_BOARD_HAUPPAUGE_955Q 21 +#define CX231XX_BOARD_TERRATEC_GRABBY 22 /* Limits minimum and default number of buffers */ #define CX231XX_MIN_BUF 4 -- 1.9.3 -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[PATCH] drm/radeon: fix DRM_IOCTL_RADEON_CS oops
Passing zeroed drm_radeon_cs struct to DRM_IOCTL_RADEON_CS produces the following oops. Fix by always calling INIT_LIST_HEAD() to avoid the crash in list_sort(). -- #include #include #include #include #include static const struct drm_radeon_cs cs; int main(int argc, char **argv) { return ioctl(open(argv[1], O_RDWR), DRM_IOCTL_RADEON_CS, &cs); } -- [ttrantal@test2 ~]$ ./main /dev/dri/card0 [ 46.904650] BUG: unable to handle kernel NULL pointer dereference at (null) [ 46.905022] IP: [] list_sort+0x42/0x240 [ 46.905022] PGD 68f29067 PUD 688b5067 PMD 0 [ 46.905022] Oops: 0002 [#1] SMP [ 46.905022] CPU: 0 PID: 2413 Comm: main Not tainted 4.0.0-rc1+ #58 [ 46.905022] Hardware name: Hewlett-Packard HP Compaq dc5750 Small Form Factor/0A64h, BIOS 786E3 v02.10 01/25/2007 [ 46.905022] task: 880058e2bcc0 ti: 880058e64000 task.ti: 880058e64000 [ 46.905022] RIP: 0010:[] [] list_sort+0x42/0x240 [ 46.905022] RSP: 0018:880058e67998 EFLAGS: 00010246 [ 46.905022] RAX: RBX: RCX: [ 46.905022] RDX: 81644410 RSI: 880058e67b40 RDI: 880058e67a58 [ 46.905022] RBP: 880058e67a88 R08: R09: [ 46.905022] R10: 880058e2bcc0 R11: 828e6ca0 R12: 81644410 [ 46.905022] R13: 8800694b8018 R14: R15: 880058e679b0 [ 46.905022] FS: 7fdc65a65700() GS:88006d60() knlGS: [ 46.905022] CS: 0010 DS: ES: CR0: 80050033 [ 46.905022] CR2: CR3: 58dd9000 CR4: 06f0 [ 46.905022] DR0: DR1: DR2: [ 46.905022] DR3: DR6: 4ff0 DR7: 0400 [ 46.905022] Stack: [ 46.905022] 880058e67b40 880058e2bcc0 880058e67a78 [ 46.905022] [ 46.905022] [ 46.905022] Call Trace: [ 46.905022] [] radeon_cs_parser_fini+0x195/0x220 [ 46.905022] [] radeon_cs_ioctl+0xa9/0x960 [ 46.905022] [] drm_ioctl+0x19c/0x640 [ 46.905022] [] ? trace_hardirqs_on_caller+0xfd/0x1c0 [ 46.905022] [] ? trace_hardirqs_on+0xd/0x10 [ 46.905022] [] radeon_drm_ioctl+0x46/0x80 [ 46.905022] [] do_vfs_ioctl+0x318/0x570 [ 46.905022] [] ? selinux_file_ioctl+0x56/0x110 [ 46.905022] [] SyS_ioctl+0x81/0xa0 [ 46.905022] [] system_call_fastpath+0x12/0x17 [ 46.905022] Code: 48 89 b5 10 ff ff ff 0f 84 03 01 00 00 4c 8d bd 28 ff ff ff 31 c0 48 89 fb b9 15 00 00 00 49 89 d4 4c 89 ff f3 48 ab 48 8b 46 08 <48> c7 00 00 00 00 00 48 8b 0e 48 85 c9 0f 84 7d 00 00 00 c7 85 [ 46.905022] RIP [] list_sort+0x42/0x240 [ 46.905022] RSP [ 46.905022] CR2: [ 47.149253] ---[ end trace 09576b4e8b2c20b8 ]--- Signed-off-by: Tommi Rantala --- drivers/gpu/drm/radeon/radeon_cs.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/radeon/radeon_cs.c b/drivers/gpu/drm/radeon/radeon_cs.c index a579ed3..4d0f96c 100644 --- a/drivers/gpu/drm/radeon/radeon_cs.c +++ b/drivers/gpu/drm/radeon/radeon_cs.c @@ -256,11 +256,13 @@ int radeon_cs_parser_init(struct radeon_cs_parser *p, void *data) u32 ring = RADEON_CS_RING_GFX; s32 priority = 0; + INIT_LIST_HEAD(&p->validated); + if (!cs->num_chunks) { return 0; } + /* get chunks */ - INIT_LIST_HEAD(&p->validated); p->idx = 0; p->ib.sa_bo = NULL; p->const_ib.sa_bo = NULL; -- 1.9.3 -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[PATCH 2/2] Input: xpad - add Thrustmaster as Xbox 360 controller vendor
Add Thrustmaster as Xbox 360 controller vendor. This is required for example to make the GP XID (044f:b326) gamepad work. Signed-off-by: Tommi Rantala --- drivers/input/joystick/xpad.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/input/joystick/xpad.c b/drivers/input/joystick/xpad.c index cee4fe3..2ed7905 100644 --- a/drivers/input/joystick/xpad.c +++ b/drivers/input/joystick/xpad.c @@ -293,6 +293,7 @@ static const signed short xpad_abs_triggers[] = { static struct usb_device_id xpad_table[] = { { USB_INTERFACE_INFO('X', 'B', 0) },/* X-Box USB-IF not approved class */ + XPAD_XBOX360_VENDOR(0x044f),/* Thrustmaster X-Box 360 controllers */ XPAD_XBOX360_VENDOR(0x045e),/* Microsoft X-Box 360 controllers */ XPAD_XBOXONE_VENDOR(0x045e),/* Microsoft X-Box One controllers */ XPAD_XBOX360_VENDOR(0x046d),/* Logitech X-Box 360 style controllers */ -- 1.9.3 -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[PATCH 1/2] Input: xpad - add USB ID for Thrustmaster Ferrari 458 Racing Wheel
Add the USB ID for the Xbox 360 Thrustmaster Ferrari 458 Racing Wheel. Signed-off-by: Tommi Rantala --- drivers/input/joystick/xpad.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/input/joystick/xpad.c b/drivers/input/joystick/xpad.c index cd13c82..cee4fe3 100644 --- a/drivers/input/joystick/xpad.c +++ b/drivers/input/joystick/xpad.c @@ -209,6 +209,7 @@ static const struct xpad_device { { 0x24c6, 0x5501, "Hori Real Arcade Pro VX-SA", 0, XTYPE_XBOX360 }, { 0x24c6, 0x5506, "Hori SOULCALIBUR V Stick", 0, XTYPE_XBOX360 }, { 0x24c6, 0x5b02, "Thrustmaster, Inc. GPX Controller", 0, XTYPE_XBOX360 }, + { 0x24c6, 0x5b03, "Thrustmaster Ferrari 458 Racing Wheel", 0, XTYPE_XBOX360 }, { 0x, 0x, "Chinese-made Xbox Controller", 0, XTYPE_XBOX }, { 0x, 0x, "Generic X-Box pad", 0, XTYPE_UNKNOWN } }; -- 1.9.3 -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: f2fs get_dnode_of_data oops
2014-09-09 7:41 GMT+03:00 Jaegeuk Kim : > Hi Tommi, > > This patch should resolve this bug. > Thanks a lot. :) > > From ee24677b9917583f50f16b6f59771439f91b890c Mon Sep 17 00:00:00 2001 > From: Jaegeuk Kim > Date: Mon, 8 Sep 2014 10:59:43 -0700 > Subject: [PATCH] f2fs: fix negative value for lseek offset > Thanks, with this patch applied, I could not reproduce the lseek oops, but now I hit the following: [ 720.673788] [ cut here ] [ 720.674011] kernel BUG at fs/f2fs/node.c:1229! [ 720.674011] invalid opcode: [#1] SMP DEBUG_PAGEALLOC [ 720.674011] CPU: 0 PID: 5298 Comm: trinity-c15 Not tainted 3.17.0-rc4+ #38 [ 720.674011] Hardware name: Bochs Bochs, BIOS Bochs 01/01/2011 [ 720.674011] task: 88002c468000 ti: 88002c47 task.ti: 88002c47 [ 720.674011] RIP: 0010:[] [] f2fs_write_node_page+0x171/0x290 [ 720.674011] RSP: 0018:88002c473cb0 EFLAGS: 00010206 [ 720.674011] RAX: RBX: RCX: ea6173c0 [ 720.674011] RDX: 0001 RSI: 0001 RDI: ea6173c0 [ 720.674011] RBP: 88002c473cf8 R08: R09: [ 720.674011] R10: 0001 R11: 8800185cf000 R12: ea6173c0 [ 720.674011] R13: 8800399d4520 R14: 88002c473e68 R15: 8800185cf000 [ 720.674011] FS: 7fb4b61d4700() GS:88003fa0() knlGS: [ 720.674011] CS: 0010 DS: ES: CR0: 8005003b [ 720.674011] CR2: 0008 CR3: 2c45 CR4: 06f0 [ 720.674011] DR0: 01ee3000 DR1: 019d3000 DR2: [ 720.674011] DR3: DR6: 0ff0 DR7: 0600 [ 720.674011] Stack: [ 720.674011] 81189e75 04110001 880037cb10a8 88002c473cd8 [ 720.674011] 8800 1600 88002c473d58 [ 720.674011] 0001 88002c473df0 81778745 [ 720.674011] Call Trace: [ 720.674011] [] ? trace_hardirqs_on_caller+0x185/0x220 [ 720.674011] [] sync_node_pages+0x415/0x5f0 [ 720.674011] [] ? SyS_tee+0x390/0x390 [ 720.674011] [] write_checkpoint+0x21d/0xeb0 [ 720.674011] [] ? mark_held_locks+0x90/0xa0 [ 720.674011] [] ? mutex_lock_nested+0x435/0x4b0 [ 720.674011] [] ? trace_hardirqs_on_caller+0x185/0x220 [ 720.674011] [] ? SyS_tee+0x390/0x390 [ 720.674011] [] f2fs_sync_fs+0x100/0x180 [ 720.674011] [] sync_fs_one_sb+0x1b/0x20 [ 720.674011] [] iterate_supers+0x7f/0xe0 [ 720.674011] [] sys_sync+0x50/0x90 [ 720.674011] [] system_call_fastpath+0x16/0x1b [ 720.674011] Code: 63 00 00 48 b8 00 00 00 00 00 16 00 00 4c 01 e0 48 c1 f8 06 48 c1 e0 0c 49 01 c7 41 8b 9f e8 0f 00 00 89 d8 49 39 44 24 10 74 07 <0f> 0b 0f 1f 44 00 00 48 8d 55 c8 89 de 4c 89 ef e8 3a fb ff ff [ 720.674011] RIP [] f2fs_write_node_page+0x171/0x290 [ 720.674011] RSP [ 720.674011] [ cut here ] [ 720.674011] kernel BUG at arch/x86/mm/pageattr.c:216! [ 720.674011] invalid opcode: [#2] SMP DEBUG_PAGEALLOC [ 720.674011] CPU: 0 PID: 5298 Comm: trinity-c15 Not tainted 3.17.0-rc4+ #38 [ 720.674011] Hardware name: Bochs Bochs, BIOS Bochs 01/01/2011 [ 720.674011] task: 88002c468000 ti: 88002c47 task.ti: 88002c47 [ 720.674011] RIP: 0010:[] [] change_page_attr_set_clr+0x250/0x430 [ 720.674011] RSP: 0018:88002c4730b8 EFLAGS: 00010046 [ 720.674011] RAX: 0046 RBX: RCX: 0010 [ 720.674011] RDX: 4600 RSI: RDI: 8000 [ 720.674011] RBP: 88002c473148 R08: 0001 R09: 8800 [ 720.674011] R10: 880034780738 R11: 88000e526610 R12: [ 720.674011] R13: 0010 R14: 0004 R15: 0005 [ 720.674011] FS: 7fb4b61d4700() GS:88003fa0() knlGS: [ 720.674011] CS: 0010 DS: ES: CR0: 8005003b [ 720.674011] CR2: 0008 CR3: 2c45 CR4: 06f0 [ 720.674011] DR0: 01ee3000 DR1: 019d3000 DR2: [ 720.674011] DR3: DR6: 0ff0 DR7: 0600 [ 720.674011] Stack: [ 720.674011] 0200 [ 720.674011] 8801 0010 [ 720.674011] 00050001 5d4d 0200 [ 720.674011] Call Trace: [ 720.674011] [] _set_pages_array+0x86/0x130 [ 720.674011] [] set_pages_array_wc+0xe/0x10 [ 720.674011] [] ttm_set_pages_caching+0x47/0x70 [ 720.674011] [] ttm_alloc_new_pages.isra.4+0xf3/0x190 [ 720.674011] [] ttm_pool_populate+0x1b5/0x490 [ 720.674011] [] cirrus_ttm_tt_populate+0x9/0x10 [ 720.674011] [] ttm_bo_move_memcpy+0x183/0x640 [ 720.674011] [] cirrus_bo_move+0x13/0x20 [ 720.674011] [] ttm_bo_handle_
Re: f2fs get_dnode_of_data oops
2014-09-08 7:20 GMT+03:00 Jaegeuk Kim : > Hi, > > Thank you for the report. > Could you share a little bit more information about the file accessing > f2fs_llseek? > E.g., file size, file offset, file allocation information, or dump of that > file. Hi, I can reproduce the bug with the following. -17595150933902 is just something I saw trinity passing to lseek(). #define _GNU_SOURCE #include #include #include #include #include int main(int argc, char **argv) { int fd; if (argc < 2) { printf("give filename\n"); return 1; } fd = open(argv[1], O_RDONLY); if (fd < 0) { perror("open"); return 1; } lseek(fd, -17595150933902LL, SEEK_DATA); return 0; } {ttrantal@arkki ~}> touch /f2fs/x ; ./a.out /f2fs/x [ 73.437182] BUG: unable to handle kernel paging request at 88043368e340 [ 73.438035] IP: [] get_dnode_of_data+0x3a9/0x440 [ 73.438035] PGD 4595067 PUD 0 [ 73.438035] Oops: [#1] SMP DEBUG_PAGEALLOC [ 73.438035] CPU: 0 PID: 2933 Comm: a.out Not tainted 3.17.0-rc4+ #37 [ 73.438035] Hardware name: Bochs Bochs, BIOS Bochs 01/01/2011 [ 73.438035] task: 88003755cac0 ti: 880022734000 task.ti: 880022734000 [ 73.438035] RIP: 0010:[] [] get_dnode_of_data+0x3a9/0x440 [ 73.438035] RSP: 0018:880022737e08 EFLAGS: 00010246 [ 73.438035] RAX: 880033951000 RBX: 010b RCX: fff4f476 [ 73.438035] RDX: 880033951168 RSI: 00111932488f RDI: 880022737ef0 [ 73.438035] RBP: 880022737eb8 R08: 0148 R09: [ 73.438035] R10: 8b86 R11: 0001 R12: fffefff4f476 [ 73.438035] R13: R14: eace5440 R15: 880021c28000 [ 73.438035] FS: 7fefc2f08700() GS:88003fa0() knlGS: [ 73.438035] CS: 0010 DS: ES: CR0: 8005003b [ 73.438035] CR2: 88043368e340 CR3: 32d6b000 CR4: 06f0 [ 73.438035] Stack: [ 73.438035] 880022737ef0 81228d7c 88003d9fe7b0 880022737eb8 [ 73.438035] 81763164 0002 fff4f476 [ 73.438035] 0246 8259bd47 010b [ 73.438035] Call Trace: [ 73.438035] [] ? pagevec_lookup_tag+0x1c/0x30 [ 73.438035] [] ? __get_first_dirty_index+0x44/0x90 [ 73.438035] [] ? _raw_spin_unlock_irq+0x27/0x40 [ 73.438035] [] ? trace_hardirqs_on_caller+0x185/0x220 [ 73.438035] [] f2fs_llseek+0xf7/0x420 [ 73.438035] [] SyS_lseek+0x65/0xa0 [ 73.438035] [] system_call_fastpath+0x16/0x1b [ 73.438035] Code: ba 00 00 00 00 00 88 ff ff 48 c1 f8 06 48 c1 e0 0c 48 01 d0 8b 98 ec 0f 00 00 39 98 e8 0f 00 00 48 8d 90 68 01 00 00 48 0f 45 d0 <8b> 04 8a 89 47 24 31 c0 eb 75 41 bc e4 ff ff ff 4d 85 f6 74 19 [ 73.438035] RIP [] get_dnode_of_data+0x3a9/0x440 [ 73.438035] RSP [ 73.438035] CR2: 88043368e340 [ 73.438035] ---[ end trace e94f7065a7961f54 ]--- -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: f2fs get_dnode_of_data oops
2014-09-07 22:14 GMT+03:00 Tommi Rantala : > Hello, > > Hit this oops while fuzzing v3.17-rc3-176-g2b12164 with Trinity. > > Tommi > > > BUG: unable to handle kernel paging request at 8804338717a8 > IP: [] get_dnode_of_data+0x3a9/0x440 > PGD 4594067 PUD 0 > Oops: [#1] SMP DEBUG_PAGEALLOC > CPU: 0 PID: 4719 Comm: trinity-c3 Not tainted 3.17.0-rc3+ #33 > Hardware name: Bochs Bochs, BIOS Bochs 01/01/2011 > task: 88001563 ti: 88000724c000 task.ti: 88000724c000 > RIP: 0010:[] [] > get_dnode_of_data+0x3a9/0x440 > RSP: 0018:88000724fe08 EFLAGS: 00010246 > RAX: 880033874000 RBX: 00f8 RCX: f590 > RDX: 880033874168 RSI: 88000724fd98 RDI: 88000724fef0 > RBP: 88000724feb8 R08: R09: > R10: 0001 R11: 83b33f90 R12: f590 > R13: R14: eace1d00 R15: 8800209f8000 > FS: 7f2bd22dc700() GS:88003fa0() knlGS: > CS: 0010 DS: ES: CR0: 8005003b > CR2: 8804338717a8 CR3: 346c CR4: 06f0 > DR0: 0185d000 DR1: 0185d000 DR2: > DR3: DR6: 0ff0 DR7: 000b0602 > Stack: > 88000724fef0 88000724fe30 880036c18000 0004 > 8800209f80f0 0002f590 81189f1d 8800f590 > 0246 81189ce0 00f8 > Call Trace: > [] ? trace_hardirqs_on+0xd/0x10 > [] ? mark_held_locks+0x90/0xa0 > [] ? trace_hardirqs_on_caller+0x185/0x220 > [] f2fs_llseek+0xf7/0x420 > [] SyS_lseek+0x65/0xa0 > [] system_call_fastpath+0x16/0x1b > Code: ba 00 00 00 00 00 88 ff ff 48 c1 f8 06 48 c1 e0 0c 48 01 d0 8b > 98 ec 0f 00 00 39 98 e8 0f 00 00 48 8d 90 68 01 00 00 48 0f 45 d0 <8b> > 04 8a 89 47 24 31 c0 eb 75 41 bc e4 ff ff ff 4d 85 f6 74 19 > RIP [] get_dnode_of_data+0x3a9/0x440 > RSP > CR2: 8804338717a8 > ---[ end trace bed7b35d1c48e9c3 ]--- If it helps, here is the location of the crash: (gdb) list *0x81779039 0x81779039 is in get_dnode_of_data (fs/f2fs/f2fs.h:950). 945 { 946 struct f2fs_node *raw_node; 947 __le32 *addr_array; 948 raw_node = F2FS_NODE(node_page); 949 addr_array = blkaddr_in_node(raw_node); 950 return le32_to_cpu(addr_array[offset]); 951 } 952 953 static inline int f2fs_test_bit(unsigned int nr, char *addr) 954 { (gdb) Tommi -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
f2fs get_dnode_of_data oops
Hello, Hit this oops while fuzzing v3.17-rc3-176-g2b12164 with Trinity. Tommi BUG: unable to handle kernel paging request at 8804338717a8 IP: [] get_dnode_of_data+0x3a9/0x440 PGD 4594067 PUD 0 Oops: [#1] SMP DEBUG_PAGEALLOC CPU: 0 PID: 4719 Comm: trinity-c3 Not tainted 3.17.0-rc3+ #33 Hardware name: Bochs Bochs, BIOS Bochs 01/01/2011 task: 88001563 ti: 88000724c000 task.ti: 88000724c000 RIP: 0010:[] [] get_dnode_of_data+0x3a9/0x440 RSP: 0018:88000724fe08 EFLAGS: 00010246 RAX: 880033874000 RBX: 00f8 RCX: f590 RDX: 880033874168 RSI: 88000724fd98 RDI: 88000724fef0 RBP: 88000724feb8 R08: R09: R10: 0001 R11: 83b33f90 R12: f590 R13: R14: eace1d00 R15: 8800209f8000 FS: 7f2bd22dc700() GS:88003fa0() knlGS: CS: 0010 DS: ES: CR0: 8005003b CR2: 8804338717a8 CR3: 346c CR4: 06f0 DR0: 0185d000 DR1: 0185d000 DR2: DR3: DR6: 0ff0 DR7: 000b0602 Stack: 88000724fef0 88000724fe30 880036c18000 0004 8800209f80f0 0002f590 81189f1d 8800f590 0246 81189ce0 00f8 Call Trace: [] ? trace_hardirqs_on+0xd/0x10 [] ? mark_held_locks+0x90/0xa0 [] ? trace_hardirqs_on_caller+0x185/0x220 [] f2fs_llseek+0xf7/0x420 [] SyS_lseek+0x65/0xa0 [] system_call_fastpath+0x16/0x1b Code: ba 00 00 00 00 00 88 ff ff 48 c1 f8 06 48 c1 e0 0c 48 01 d0 8b 98 ec 0f 00 00 39 98 e8 0f 00 00 48 8d 90 68 01 00 00 48 0f 45 d0 <8b> 04 8a 89 47 24 31 c0 eb 75 41 bc e4 ff ff ff 4d 85 f6 74 19 RIP [] get_dnode_of_data+0x3a9/0x440 RSP CR2: 8804338717a8 ---[ end trace bed7b35d1c48e9c3 ]--- -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
RTNL: assertion failed at net/ipv6/addrconf.c (1699)
Hi, Was fuzzing Linus v3.17-rc2-89-g59753a8 with Trinity as the root user in qemu, when I hit the following assertion failures. Tommi [init] Started watchdog process, PID is 4841 [main] Main thread is alive. [ 77.229699] sctp: [Deprecated]: trinity-main (pid 4842) Use of int in max_burst socket option deprecated. [ 77.229699] Use struct sctp_assoc_value instead [ 77.297196] RTNL: assertion failed at net/ipv6/addrconf.c (1699) [ 77.298080] CPU: 0 PID: 4842 Comm: trinity-main Not tainted 3.17.0-rc2+ #30 [ 77.299039] Hardware name: Bochs Bochs, BIOS Bochs 01/01/2011 [ 77.299789] 88003d76a618 880026133c50 8238ba79 880037c84520 [ 77.300829] 880026133c90 820bd52b 82d86c40 [ 77.301869] f76fd1e1 8800382d8000 8800382d8220 [ 77.302906] Call Trace: [ 77.303246] [] dump_stack+0x4d/0x66 [ 77.303928] [] addrconf_join_solict+0x4b/0xb0 [ 77.304731] [] ipv6_dev_ac_inc+0x2bb/0x330 [ 77.305498] [] ? ac6_seq_start+0x260/0x260 [ 77.306257] [] ipv6_sock_ac_join+0x26e/0x360 [ 77.307046] [] ? ipv6_sock_ac_join+0x99/0x360 [ 77.307798] [] do_ipv6_setsockopt.isra.5+0xa70/0xf20 [ 77.308570] [] ? sched_clock_local+0x1d/0x80 [ 77.309260] [] ? kvm_clock_read+0x27/0x40 [ 77.309915] [] ? sched_clock+0x9/0x10 [ 77.310537] [] ? sock_has_perm+0x168/0x1e0 [ 77.311204] [] ? sched_clock_cpu+0xa8/0xf0 [ 77.311866] [] ? local_clock+0x1b/0x30 [ 77.312501] [] ? lock_release_holdtime+0x1d/0x170 [ 77.313241] [] ? sock_has_perm+0x180/0x1e0 [ 77.313905] [] ? selinux_msg_queue_alloc_security+0xa0/0xa0 [ 77.314746] [] ipv6_setsockopt+0x53/0xb0 [ 77.315397] [] udpv6_setsockopt+0x25/0x30 [ 77.316058] [] sock_common_setsockopt+0xf/0x20 [ 77.316764] [] SyS_setsockopt+0x8e/0xd0 [ 77.317406] [] system_call_fastpath+0x16/0x1b [main] 375 sockets created based on info from socket cachefile. [main] Generating file descriptors [main] Added 129 filenames from /dev [main] Added 44048 filenames from /proc [main] Added 18192 filenames from /sys [main] Enabled 9 fd providers. [watchdog] Watchdog is alive. (pid:4841) [child3:4846] finit_module (313) returned ENOSYS, marking as inactive. [child1:4844] kcmp (312) returned ENOSYS, marking as inactive. [child2:4845] uselib (134) returned ENOSYS, marking as inactive. [child1:4844] nfsservctl (180) returned ENOSYS, marking as inactive. [child2:4845] delete_module (129:[32BIT]) returned ENOSYS, marking as inactive. [child2:4845] init_module (175) returned ENOSYS, marking as inactive. [ 84.126609] trinity-c7: vm86 mode not supported on 64 bit kernel [child7:4850] vm86 (166:[32BIT]) returned ENOSYS, marking as inactive. [main] Bailing main loop because ctrl-c. [ 84.345840] RTNL: assertion failed at net/ipv6/addrconf.c (1712) [ 84.346615] CPU: 0 PID: 4842 Comm: trinity-main Not tainted 3.17.0-rc2+ #30 [ 84.347426] Hardware name: Bochs Bochs, BIOS Bochs 01/01/2011 [ 84.348102] 88003d76a618 880026133d10 8238ba79 8800382d8000 [ 84.349018] 880026133d50 820bd5db 81141555 8800382d8220 [ 84.349935] 8800382d8000 f76fd1e1 88003d76a618 8800382d8000 [ 84.350848] Call Trace: [ 84.351149] [] dump_stack+0x4d/0x66 [ 84.351751] [] addrconf_leave_solict+0x4b/0xb0 [ 84.352574] [] ? __local_bh_enable_ip+0xa5/0xf0 [ 84.353315] [] __ipv6_dev_ac_dec+0xc3/0x140 [ 84.354019] [] ipv6_dev_ac_dec+0x98/0xb0 [ 84.354687] [] ipv6_sock_ac_close+0x10d/0x1a0 [ 84.355410] [] ? ipv6_sock_ac_close+0x2e/0x1a0 [ 84.356147] [] inet6_release+0x23/0x40 [ 84.356789] [] sock_release+0x14/0x80 [ 84.357410] [] sock_close+0xd/0x20 [ 84.358042] [] __fput+0x111/0x1e0 [ 84.358622] [] fput+0x9/0x10 [ 84.359196] [] task_work_run+0x9e/0xd0 [ 84.359825] [] do_exit+0x456/0xb30 [ 84.360419] [] ? retint_swapgs+0x13/0x1b [ 84.361075] [] do_group_exit+0x84/0xd0 [ 84.361705] [] SyS_exit_group+0xf/0x10 [ 84.362338] [] system_call_fastpath+0x16/0x1b [watchdog] [4841] Watchdog exiting because ctrl-c. [init] Ran 775 syscalls. Successes: 179 Failures: 596 -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
drm_ioctl & WARNING at arch/x86/mm/ioremap.c:98
Hello, Got this warning while fuzzing v3.17-rc2-40-gff0c57a with Trinity. Was running as root in qemu. Tommi ioremap: invalid physical address 40004000 [ cut here ] WARNING: CPU: 0 PID: 2887 at arch/x86/mm/ioremap.c:98 __ioremap_caller+0x7a/0x2e0() CPU: 0 PID: 2887 Comm: trinity-c6 Not tainted 3.17.0-rc2+ #29 Hardware name: Bochs Bochs, BIOS Bochs 01/01/2011 0009 880036ee7c90 8238ba09 880036ee7cc8 8113c603 40004000 40008000 81747c7d 0010 880036ee7cd8 Call Trace: [] dump_stack+0x4d/0x66 [] warn_slowpath_common+0x73/0x90 [] ? drm_addmap_core+0x1dd/0x600 [] warn_slowpath_null+0x15/0x20 [] __ioremap_caller+0x7a/0x2e0 [] ? kmemleak_alloc+0x23/0x50 [] ? kmem_cache_alloc_trace+0x119/0x290 [] ? drm_addmap_core+0x3b/0x600 [] ioremap_nocache+0x12/0x20 [] drm_addmap_core+0x1dd/0x600 [] drm_addmap_ioctl+0x45/0x70 [] drm_ioctl+0x3fe/0x640 [] ? drm_addmap+0x30/0x30 [] ? avc_has_perm+0x20/0x2f0 [] ? sched_clock_cpu+0xa8/0xf0 [] do_vfs_ioctl+0x4d0/0x510 [] ? selinux_file_ioctl+0xf5/0x100 [] SyS_ioctl+0x4e/0x80 [] system_call_fastpath+0x16/0x1b ---[ end trace c988df0287baa491 ]--- -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: drm_mode_create_dumb_ioctl: divide error
22.8.2014 13.38 kirjoitti "David Herrmann" : > > Hi > > On Thu, Aug 21, 2014 at 8:18 PM, Tommi Rantala wrote: > > Hello, > > > > Triggered this while fuzzing v3.17-rc1-51-g372b1db with Trinity. > > > > Tommi > > > > > > [drm:drm_mode_legacy_fb_format] *ERROR* bad bpp, assuming x8r8g8b8 pixel > > format > > divide error: [#1] SMP DEBUG_PAGEALLOC > > CPU: 0 PID: 2854 Comm: trinity-c7 Not tainted 3.17.0-rc1+ #14 > > Hardware name: Bochs Bochs, BIOS Bochs 01/01/2011 > > task: 88003926cac0 ti: 8800356b4000 task.ti: 8800356b4000 > > RIP: 0010:[] [] > > drm_mode_create_dumb_ioctl+0x53/0xa0 > > RSP: 0018:8800356b7dc0 EFLAGS: 00010246 > > RAX: RBX: 88003545da68 RCX: > > RDX: RSI: 8800356b7e18 RDI: 88003d5c67b0 > > RBP: 8800356b7dc8 R08: R09: > > R10: R11: 817f6d30 R12: 00b2 > > R13: fff2 R14: 88003d5c67b0 R15: 88003545da68 > > FS: 7f06208fa700() GS:88003fa0() knlGS: > > CS: 0010 DS: ES: CR0: 80050033 > > CR2: 01903108 CR3: 36efa000 CR4: 06f0 > > Stack: > > 8800356b7e18 8800356b7ec8 8165ac60 8800356b7df8 > > 8800356b7e18 8800356b7e18 824e1440 00d52000 > > 0020 c02064b2 fff2 > > Call Trace: > > [] drm_ioctl+0x3b0/0x640 > > [] ? avc_has_perm+0x218/0x2f0 > > [] ? avc_has_perm+0x20/0x2f0 > > [] ? sched_clock_cpu+0xa8/0xf0 > > [] do_vfs_ioctl+0x4d0/0x510 > > [] ? selinux_file_ioctl+0xf5/0x100 > > [] SyS_ioctl+0x4e/0x80 > > [] system_call_fastpath+0x16/0x1b > > Code: 55 41 b9 ff ff ff ff 41 83 c0 07 44 89 c8 41 c1 e8 03 48 89 e5 > > 53 48 89 d3 31 d2 f7 f1 41 39 c0 77 46 41 0f af c8 31 d2 44 89 c8 > > f1 41 39 c2 77 36 41 0f af ca b8 ea ff ff ff 81 c1 ff 0f 00 > > RIP [] drm_mode_create_dumb_ioctl+0x53/0xa0 > > RSP > > ---[ end trace 6919129b71d9bf98 ]--- > > [drm:drm_mode_legacy_fb_format] *ERROR* bad bpp, assuming x8r8g8b8 pixel > > format > > > > > > > > (gdb) list *0x816688e3 > > 0x816688e3 is in drm_mode_create_dumb_ioctl > > (drivers/gpu/drm/drm_crtc.c:4703). > > 4698/* overflow checks for 32bit size calculations */ > > 4699cpp = DIV_ROUND_UP(args->bpp, 8); > > 4700if (cpp > 0xU / args->width) > > 4701return -EINVAL; > > 4702stride = cpp * args->width; > > 4703if (args->height > 0xU / stride) > > 4704return -EINVAL; > > Hm, this doesn't make sense to me. args->bpp/width/height are > guaranteed to be non-zero and 32bit. Therefore, DIV_ROUND_UP() cannot > return 0 and "cpp" is thus non-zero. The overflow check makes sure > "cpp * args->width" cannot overflow, both are non-zero so "stride" is > non-zero and valid. > > I cannot make much sense out of the x86 assembly below, so help welcome. Hi David, I put a BUG_ON(stride==0), and it is indeed crashing due to stride being zero. I tried to get the variables with gdb since I'm running the kernel in qemu, but for whatever reason the breakpoints do not seem to always have any effect, IOW the execution blasts right through the breakpoints. Finally I did get one instance where the breakpoint would trigger, and gdb told me this. Does it make sense? (gdb) bt #0 drm_mode_create_dumb_ioctl (dev=0x88003d634520, data=0x880034837e18, file_priv=0x8800361faa40) at drivers/gpu/drm/drm_crtc.c:4704 #1 0x8165ad20 in drm_ioctl (filp=, cmd=, arg=) at drivers/gpu/drm/drm_ioctl.c:727 #2 0x812922a0 in vfs_ioctl (arg=, cmd=, filp=) at fs/ioctl.c:43 #3 do_vfs_ioctl (filp=0x88003d634520, fd=36, cmd=, arg=) at fs/ioctl.c:598 #4 0x8129232e in SYSC_ioctl (arg=, cmd=, fd=) at fs/ioctl.c:613 #5 SyS_ioctl (fd=36, cmd=3223348402, arg=9592832) at fs/ioctl.c:604 #6 #7 0x7f69a2b0eb69 in ?? () #8 0xdead4ead6b6b0e0e in ?? () #9 0x6b6b6b6b in ?? () #10 0x in ?? () #11 0x840ba810 in ?? () #12 0x838a9e70 in lock_classes () #13 0x83924540 in lock_classes () #14 0x828ed4ef in kallsyms_token_index () #15 0x6b6b6b6b in ?? () #16 0x81279ec8 in kmemleak_scan () at mm/kmemleak.c:1410 Backtrace stopped: previous frame inner to this frame (corrupt stack?) (gdb) print *args value has been optimized out (gdb) info locals cpp = 0 stride =
drm_mode_create_dumb_ioctl: divide error
Hello, Triggered this while fuzzing v3.17-rc1-51-g372b1db with Trinity. Tommi [drm:drm_mode_legacy_fb_format] *ERROR* bad bpp, assuming x8r8g8b8 pixel format divide error: [#1] SMP DEBUG_PAGEALLOC CPU: 0 PID: 2854 Comm: trinity-c7 Not tainted 3.17.0-rc1+ #14 Hardware name: Bochs Bochs, BIOS Bochs 01/01/2011 task: 88003926cac0 ti: 8800356b4000 task.ti: 8800356b4000 RIP: 0010:[] [] drm_mode_create_dumb_ioctl+0x53/0xa0 RSP: 0018:8800356b7dc0 EFLAGS: 00010246 RAX: RBX: 88003545da68 RCX: RDX: RSI: 8800356b7e18 RDI: 88003d5c67b0 RBP: 8800356b7dc8 R08: R09: R10: R11: 817f6d30 R12: 00b2 R13: fff2 R14: 88003d5c67b0 R15: 88003545da68 FS: 7f06208fa700() GS:88003fa0() knlGS: CS: 0010 DS: ES: CR0: 80050033 CR2: 01903108 CR3: 36efa000 CR4: 06f0 Stack: 8800356b7e18 8800356b7ec8 8165ac60 8800356b7df8 8800356b7e18 8800356b7e18 824e1440 00d52000 0020 c02064b2 fff2 Call Trace: [] drm_ioctl+0x3b0/0x640 [] ? avc_has_perm+0x218/0x2f0 [] ? avc_has_perm+0x20/0x2f0 [] ? sched_clock_cpu+0xa8/0xf0 [] do_vfs_ioctl+0x4d0/0x510 [] ? selinux_file_ioctl+0xf5/0x100 [] SyS_ioctl+0x4e/0x80 [] system_call_fastpath+0x16/0x1b Code: 55 41 b9 ff ff ff ff 41 83 c0 07 44 89 c8 41 c1 e8 03 48 89 e5 53 48 89 d3 31 d2 f7 f1 41 39 c0 77 46 41 0f af c8 31 d2 44 89 c8 f1 41 39 c2 77 36 41 0f af ca b8 ea ff ff ff 81 c1 ff 0f 00 RIP [] drm_mode_create_dumb_ioctl+0x53/0xa0 RSP ---[ end trace 6919129b71d9bf98 ]--- [drm:drm_mode_legacy_fb_format] *ERROR* bad bpp, assuming x8r8g8b8 pixel format (gdb) list *0x816688e3 0x816688e3 is in drm_mode_create_dumb_ioctl (drivers/gpu/drm/drm_crtc.c:4703). 4698/* overflow checks for 32bit size calculations */ 4699cpp = DIV_ROUND_UP(args->bpp, 8); 4700if (cpp > 0xU / args->width) 4701return -EINVAL; 4702stride = cpp * args->width; 4703if (args->height > 0xU / stride) 4704return -EINVAL; 4705 4706/* test for wrap-around */ 4707size = args->height * stride; (gdb) disassemble drm_mode_create_dumb_ioctl Dump of assembler code for function drm_mode_create_dumb_ioctl: 0x81668890 <+0>: mov0x20(%rdi),%rax 0x81668894 <+4>: mov0x160(%rax),%r11 0x8166889b <+11>:test %r11,%r11 0x8166889e <+14>:je 0x8166890f 0x816688a0 <+16>:mov0x4(%rsi),%ecx 0x816688a3 <+19>:test %ecx,%ecx 0x816688a5 <+21>:je 0x81668918 0x816688a7 <+23>:mov(%rsi),%r10d 0x816688aa <+26>:test %r10d,%r10d 0x816688ad <+29>:je 0x81668918 0x816688af <+31>:mov0x8(%rsi),%r8d 0x816688b3 <+35>:test %r8d,%r8d 0x816688b6 <+38>:je 0x81668918 0x816688b8 <+40>:push %rbp 0x816688b9 <+41>:mov$0x,%r9d 0x816688bf <+47>:add$0x7,%r8d 0x816688c3 <+51>:mov%r9d,%eax 0x816688c6 <+54>:shr$0x3,%r8d 0x816688ca <+58>:mov%rsp,%rbp 0x816688cd <+61>:push %rbx 0x816688ce <+62>:mov%rdx,%rbx 0x816688d1 <+65>:xor%edx,%edx 0x816688d3 <+67>:div%ecx 0x816688d5 <+69>:cmp%eax,%r8d 0x816688d8 <+72>:ja 0x81668920 0x816688da <+74>:imul %r8d,%ecx 0x816688de <+78>:xor%edx,%edx 0x816688e0 <+80>:mov%r9d,%eax 0x816688e3 <+83>:div%ecx 0x816688e5 <+85>:cmp%eax,%r10d 0x816688e8 <+88>:ja 0x81668920 0x816688ea <+90>:imul %r10d,%ecx 0x816688ee <+94>:mov$0xffea,%eax 0x816688f3 <+99>:add$0xfff,%ecx 0x816688f9 <+105>: and$0xf000,%ecx 0x816688ff <+111>: je 0x81668928 0x81668901 <+113>: mov%rsi,%rdx 0x81668904 <+116>: mov%rdi,%rsi 0x81668907 <+119>: mov%rbx,%rdi 0x8166890a <+122>: callq *%r11 0x8166890d <+125>: jmp0x81668928 0x8166890f <+127>: mov$0xffda,%eax 0x81668914 <+132>: retq 0x81668915 <+133>: nopl (%rax) 0x81668918 <+136>: mov$0xffea,%eax 0x8166891d <+141>: retq 0x8166891e <+142>: xchg %ax,%ax 0x81668920 <+144>: mov$0xffea,%eax 0x81668925 <+149>:
/proc/asound/card0/oss_mixer stack corruption
Hello, Trinity discovered that writing 128 bytes to /proc/asound/card0/oss_mixer triggers a stack corruption. Tommi # printf %128s > /proc/asound/card0/oss_mixer ALSA: mixer_oss: invalid OSS volume '' Kernel panic - not syncing: stack-protector: Kernel stack is corrupted in: 81e193ba CPU: 0 PID: 2778 Comm: bash Not tainted 3.17.0-rc1+ #13 Hardware name: Bochs Bochs, BIOS Bochs 01/01/2011 880039fd4bf0 880034c87bd8 8229824a 828e2a40 880034c87c50 8229051d 8810 880034c87c60 880034c87c00 0020 81e193ba 0080 Call Trace: [] dump_stack+0x4d/0x66 [] panic+0xc8/0x201 [] ? snd_mixer_oss_proc_write+0x24a/0x270 [] __stack_chk_fail+0x16/0x20 [] snd_mixer_oss_proc_write+0x24a/0x270 [] ? kvm_clock_read+0x27/0x40 [] snd_info_entry_release+0x6c/0x110 [] close_pdeo+0x136/0x1a0 [] ? __lock_acquire+0x951/0xb40 [] ? kvm_clock_read+0x27/0x40 [] proc_reg_release+0x3e/0x60 [] __fput+0x111/0x1e0 [] fput+0x9/0x10 [] task_work_run+0x9e/0xd0 [] do_notify_resume+0x55/0x70 [] int_signal+0x12/0x17 Kernel Offset: 0x0 from 0x8100 (relocation range: 0x8000-0x9fff) -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
kernel BUG at security/keys/keyring.c:1003!
Hello, Hit the following BUG while fuzzing 3.14.0-rc3 with trinity. Tommi [708836.755392] [ cut here ] [708836.756044] kernel BUG at /build/linux/security/keys/keyring.c:1003! [708836.756044] invalid opcode: [#1] SMP DEBUG_PAGEALLOC [708836.756044] CPU: 0 PID: 5594 Comm: trinity-c26 Not tainted 3.14.0-rc3 #1 [708836.756044] Hardware name: Bochs Bochs, BIOS Bochs 01/01/2011 [708836.756044] task: 880036a3ca40 ti: 880003e6e000 task.ti: 880003e6e000 [708836.756044] RIP: 0010:[] [] keyring_detect_cycle_iterator+0xe/0x20 [708836.756044] RSP: :880003e6fdb0 EFLAGS: 00010206 [708836.756044] RAX: 880056025b82 RBX: 003a RCX: 0003 [708836.756044] RDX: 0003 RSI: 880003e6fe98 RDI: 880056025b80 [708836.756044] RBP: 880003e6fdb0 R08: 0064 R09: [708836.756044] R10: 880036a3ca40 R11: R12: 880003e6fe98 [708836.756044] R13: R14: 880003e6fe98 R15: 88006c950780 [708836.756044] FS: 7f88ae6bd700() GS:8800bf60() knlGS: [708836.756044] CS: 0010 DS: ES: CR0: 8005003b [708836.756044] CR2: 0004 CR3: 3ba4d000 CR4: 06f0 [708836.756044] DR0: 00899000 DR1: 0115a000 DR2: 01b66000 [708836.756044] DR3: DR6: 0ff0 DR7: 0600 [708836.756044] Stack: [708836.756044] 880003e6fe80 814ac9e2 81078869 880003e6fdf0 [708836.756044] 81179f4d 8800bf7d5a40 001d5a40 [708836.756044] 88006c950780 0002 0001 [708836.756044] Call Trace: [708836.756044] [] search_nested_keyrings+0xf2/0x340 [708836.756044] [] ? sched_clock+0x9/0x10 [708836.756044] [] ? sched_clock_local+0x1d/0x90 [708836.756044] [] ? __key_link_check_live_key+0x26/0x160 [708836.756044] [] __key_link_check_live_key+0xe3/0x160 [708836.756044] [] ? __key_link_check_live_key+0x26/0x160 [708836.756044] [] ? keyring_instantiate+0xf0/0xf0 [708836.756044] [] key_link+0x5c/0xb0 [708836.756044] [] keyctl_keyring_link+0x7e/0xb0 [708836.756044] [] SyS_keyctl+0x98/0x1a0 [708836.756044] [] ia32_do_call+0x13/0x13 [708836.756044] Code: c0 eb 12 66 2e 0f 1f 84 00 00 00 00 00 31 c0 66 0f 1f 44 00 00 5b 41 5c 5d f3 c3 66 90 48 83 e7 fc 48 39 7e 28 55 48 89 e5 74 02 <0f> 0b b8 01 00 00 00 48 c7 46 48 dd ff ff ff 5d c3 90 55 48 89 [708836.756044] RIP [] keyring_detect_cycle_iterator+0xe/0x20 [708836.756044] RSP [708836.855231] ---[ end trace e2b699c76aca5cff ]--- -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
btrfs "possible irq lock inversion dependency detected"
Hello, Saw this while fuzzing the kernel with Trinity. Tommi [ 396.136048] = [ 396.136048] [ INFO: possible irq lock inversion dependency detected ] [ 396.136048] 3.14.0-rc3 #1 Not tainted [ 396.136048] - [ 396.136048] kswapd0/1482 just changed the state of lock: [ 396.136048] (&delayed_node->mutex){+.+.-.}, at: [] __btrfs_release_delayed_node+0x4b/0x1e0 [ 396.136048] but this lock took another, RECLAIM_FS-unsafe lock in the past: [ 396.136048] (&found->groups_sem){+.} and interrupts could create inverse lock ordering between them. [ 396.136048] [ 396.136048] other info that might help us debug this: [ 396.136048] Possible interrupt unsafe locking scenario: [ 396.136048] [ 396.136048]CPU0CPU1 [ 396.136048] [ 396.136048] lock(&found->groups_sem); [ 396.136048]local_irq_disable(); [ 396.136048]lock(&delayed_node->mutex); [ 396.136048]lock(&found->groups_sem); [ 396.136048] [ 396.136048] lock(&delayed_node->mutex); [ 396.136048] [ 396.136048] *** DEADLOCK *** [ 396.136048] [ 396.136048] 2 locks held by kswapd0/1482: [ 396.136048] #0: (shrinker_rwsem){..}, at: [] shrink_slab+0x3a/0x170 [ 396.136048] #1: (&type->s_umount_key#25){.+}, at: [] grab_super_passive+0x4f/0x80 [ 396.136048] [ 396.136048] the shortest dependencies between 2nd lock and 1st lock: [ 396.136048] -> (&found->groups_sem){+.} ops: 38935 { [ 396.136048] HARDIRQ-ON-W at: [ 396.136048] [] __lock_acquire+0x88e/0x1d90 [ 396.136048] [] lock_acquire+0x182/0x210 [ 396.136048] [] down_write+0x5c/0xc0 [ 396.136048] [] __link_block_group+0x3d/0xf0 [ 396.136048] [] btrfs_read_block_groups+0x392/0x690 [ 396.136048] [] open_ctree+0x1ad7/0x2140 [ 396.136048] [] btrfs_mount+0x44e/0x8e0 [ 396.136048] [] mount_fs+0x7a/0x1a0 [ 396.136048] [] vfs_kern_mount+0x71/0x150 [ 396.136048] [] btrfs_mount+0x831/0x8e0 [ 396.136048] [] mount_fs+0x7a/0x1a0 [ 396.136048] [] vfs_kern_mount+0x71/0x150 [ 396.136048] [] do_mount+0x954/0xb90 [ 396.136048] [] SyS_mount+0x94/0xe0 [ 396.136048] [] do_mount_root+0x1a/0x93 [ 396.136048] [] mount_block_root+0xe5/0x203 [ 396.136048] [] mount_root+0xe1/0xea [ 396.136048] [] prepare_namespace+0x13c/0x174 [ 396.136048] [] kernel_init_freeable+0x242/0x251 [ 396.136048] [] kernel_init+0x9/0xf0 [ 396.136048] [] ret_from_fork+0x7c/0xb0 [ 396.136048] HARDIRQ-ON-R at: [ 396.136048] [] __lock_acquire+0x847/0x1d90 [ 396.136048] [] lock_acquire+0x182/0x210 [ 396.136048] [] down_read+0x4c/0xa0 [ 396.136048] [] btrfs_calc_num_tolerated_disk_barrier_failures+0x24a/0x310 [ 396.136048] [] open_ctree+0x1b0f/0x2140 [ 396.136048] [] btrfs_mount+0x44e/0x8e0 [ 396.136048] [] mount_fs+0x7a/0x1a0 [ 396.136048] [] vfs_kern_mount+0x71/0x150 [ 396.136048] [] btrfs_mount+0x831/0x8e0 [ 396.136048] [] mount_fs+0x7a/0x1a0 [ 396.136048] [] vfs_kern_mount+0x71/0x150 [ 396.136048] [] do_mount+0x954/0xb90 [ 396.136048] [] SyS_mount+0x94/0xe0 [ 396.136048] [] do_mount_root+0x1a/0x93 [ 396.136048] [] mount_block_root+0xe5/0x203 [ 396.136048] [] mount_root+0xe1/0xea [ 396.136048] [] prepare_namespace+0x13c/0x174 [ 396.136048] [] kernel_init_freeable+0x242/0x251 [ 396.136048] [] kernel_init+0x9/0xf0 [ 396.136048] [] ret_from_fork+0x7c/0xb0 [ 396.136048] SOFTIRQ-ON-W at: [ 396.136048] [] __lock_acquire+0x8c3/0x1d90 [ 396.136048] [] lock_acquire+0x182/0x210 [ 396.136048] [] down_write+0x5c/0xc0 [ 396.136048] [] __link_block_group+0x3d/0xf0 [ 396.136048] [] btrfs_read_block_groups+0x392/0x690 [ 396.136048] [] open_ctree+0x1ad7/0x2140 [ 396.136048] [] btrfs_mount+0x44e/0x8e0 [ 396.136048] [
BUG: Bad rss-counter state mm:ffff88005f936c00 idx:0 val:1
Hello, Noticed the following kernel message while fuzzing 3.14.0-rc2-00488-gca03339 with trinity. Should I be worried? [40879.796336] BUG: Bad rss-counter state mm:88005f936c00 idx:0 val:1 Tommi -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
BUG: Bad page state in process trinity-c19
Hello, Hit the following bug while fuzzing with trinity. I can see that Dave reported similar bad page state problems for 3.13-rc4, but this one does not seem to be AIO related. https://lkml.org/lkml/2013/12/18/932 Tommi BUG: Bad page state in process trinity-c19 pfn:2429e page:ea90a780 count:0 mapcount:0 mapping:88003a018758 index:0xed page flags: 0x108(uptodate) page dumped because: non-NULL mapping CPU: 1 PID: 28094 Comm: trinity-c19 Not tainted 3.14.0-rc2-00209-g45f7fdc #1 Hardware name: Hewlett-Packard HP Compaq dc5750 Small Form Factor/0A64h, BIOS 786E3 v02.10 01/25/2007 828f4590 880054591758 82363c9d ea90a780 880054591780 8235d165 ea90a780 ea90a780 8800545917d8 8121a010 828f457f Call Trace: [] dump_stack+0x4d/0x66 [] bad_page+0xd5/0xf2 [] free_pages_prepare+0x1f0/0x2b0 [] free_hot_cold_page+0x3b/0x150 [] free_hot_cold_page_list+0x10e/0x190 [] release_pages+0x1dc/0x210 [] pagevec_lru_move_fn+0xd3/0xf0 [] ? __put_single_page+0x20/0x20 [] __pagevec_lru_add+0x12/0x20 [] __lru_cache_add+0x66/0x90 [] lru_cache_add+0x35/0x40 [] putback_lru_page+0x4a/0xd0 [] migrate_pages+0x84b/0x880 [] ? isolate_freepages_block+0x440/0x440 [] compact_zone+0x249/0x770 [] compact_zone_order+0xb6/0xf0 [] ? native_send_call_func_single_ipi+0x31/0x40 [] try_to_compact_pages+0xb2/0x110 [] __alloc_pages_direct_compact+0xa5/0x1b5 [] __alloc_pages_slowpath+0x73a/0x79e [] ? sched_clock_local+0x1d/0x90 [] __alloc_pages_nodemask+0x226/0x3b0 [] alloc_pages_vma+0x16f/0x1e0 [] ? do_huge_pmd_anonymous_page+0x218/0x3f0 [] do_huge_pmd_anonymous_page+0x218/0x3f0 [] handle_mm_fault+0x1d7/0x320 [] __do_page_fault+0x4d0/0x540 [] ? trace_hardirqs_on_caller+0x185/0x220 [] ? trace_hardirqs_on+0xd/0x10 [] ? _raw_spin_unlock_irq+0x27/0x40 [] ? finish_task_switch+0x81/0x130 [] ? finish_task_switch+0x43/0x130 [] ? trace_hardirqs_off_thunk+0x3a/0x3c [] do_page_fault+0x9/0x10 [] page_fault+0x28/0x30 Disabling lock debugging due to kernel taint -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
lockdep: strange %s#5 lock name
Hello, Noticed a suspicious "%s#5" lock name in a lockdep splat while fuzzing with trinity. Tommi [249844.491141] INFO: task kworker/u2:2:32113 blocked for more than 120 seconds. [249844.493268] Not tainted v3.13-11268-g8a1f006 #3 [249844.494731] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message. [249844.496922] kworker/u2:2D 880074a92520 0 32113 2 0x [249844.498985] Workqueue: netns cleanup_net [249844.500188] 8800956d1c78 0046 880074a92520 001d4f80 [249844.502360] 8800956d1fd8 001d4f80 880074a9 880074a92520 [249844.504617] 8800b414dab8 82db4d20 82db4d28 0246 [249844.506647] Call Trace: [249844.507331] [] schedule+0x65/0x70 [249844.508576] [] schedule_preempt_disabled+0x11/0x20 [249844.510185] [] mutex_lock_nested+0x285/0x4a0 [249844.511777] [] ? cleanup_net+0x80/0x1e0 [249844.513422] [] ? cleanup_net+0x80/0x1e0 [249844.514998] [] cleanup_net+0x80/0x1e0 [249844.516523] [] process_one_work+0x366/0x690 [249844.518172] [] ? process_one_work+0x240/0x690 [249844.519840] [] worker_thread+0x21e/0x370 [249844.521422] [] ? rescuer_thread+0x2c0/0x2c0 [249844.523040] [] kthread+0xf0/0x100 [249844.524425] [] ? finish_task_switch+0x81/0x130 [249844.525987] [] ? insert_kthread_work+0x90/0x90 [249844.527487] [] ret_from_fork+0x7c/0xb0 [249844.528959] [] ? insert_kthread_work+0x90/0x90 [249844.530451] 3 locks held by kworker/u2:2/32113: [249844.531638] #0: (%s#5){.+.+.+}, at: [] process_one_work+0x240/0x690 [249844.533891] #1: (net_cleanup_work){+.+.+.}, at: [] process_one_work+0x240/0x690 [249844.536317] #2: (net_mutex){+.+.+.}, at: [] cleanup_net+0x80/0x1e0 [249844.538744] INFO: task trinity-c10:23911 blocked for more than 120 seconds. [249844.540723] Not tainted v3.13-11268-g8a1f006 #3 [249844.542192] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message. [249844.544785] trinity-c10 D 8800b963a520 0 23911 22465 0x0004 [249844.546929] 880020d49df8 0046 8800b963a520 001d4f80 [249844.549183] 880020d49fd8 001d4f80 8800bb42a520 8800b963a520 [249844.551401] 880024e58000 82db4d20 82db4d28 0246 [249844.553376] Call Trace: [249844.553998] [] schedule+0x65/0x70 [249844.555237] [] schedule_preempt_disabled+0x11/0x20 [249844.556844] [] mutex_lock_nested+0x285/0x4a0 [249844.558349] [] ? copy_net_ns+0x97/0x150 [249844.559748] [] ? copy_net_ns+0x97/0x150 [249844.561305] [] copy_net_ns+0x97/0x150 [249844.562789] [] create_new_namespaces+0x126/0x1c0 [249844.564551] [] ? ip_build_and_send_pkt+0x1cd/0x260 [249844.566484] [] unshare_nsproxy_namespaces+0xa7/0xe0 [249844.568325] [] SyS_unshare+0x116/0x2c0 [249844.569838] [] ? ip_build_and_send_pkt+0x1cd/0x260 [249844.571664] [] system_call_fastpath+0x16/0x1b [249844.573328] [] ? ip_build_and_send_pkt+0x1cd/0x260 [249844.574886] 1 lock held by trinity-c10/23911: [249844.576036] #0: (net_mutex){+.+.+.}, at: [] copy_net_ns+0x97/0x150 -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: BUG ip_dst_cache (Not tainted): Poison overwritten
2014-02-01 Tommi Rantala : > 2014-01-31 Eric Dumazet : >> On Fri, 2014-01-31 at 22:11 +0200, Tommi Rantala wrote: >>> Hello, >>> >>> Hit this while fuzzing v3.13-9218-g0e47c96 with trinity in a qemu >>> virtual machine. >>> >>> Tommi >> >> Hi Tommi >> >> Could you please try the following fix ? > > Thanks, giving this a spin. This does not reproduce very easily with > Trinity, I'll let you know if anything blows up. Looking good after two days of fuzzing in several virtual machines. The bug has not been reproduced, and no other ill effects visible. Thanks! Tommi -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: BUG ip_dst_cache (Not tainted): Poison overwritten
2014-01-31 Eric Dumazet : > On Fri, 2014-01-31 at 22:11 +0200, Tommi Rantala wrote: >> Hello, >> >> Hit this while fuzzing v3.13-9218-g0e47c96 with trinity in a qemu >> virtual machine. >> >> Tommi > > Hi Tommi > > Could you please try the following fix ? Thanks, giving this a spin. This does not reproduce very easily with Trinity, I'll let you know if anything blows up. Tommi > I'll send an official patch in a couple of hours > > There are two bugs : > One dst leak, and one plain bug, as rt initial NULL > value might be scratched. > > net/ipv4/ip_tunnel.c | 27 ++- > 1 file changed, 10 insertions(+), 17 deletions(-) > > diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c > index bd28f386bd02..bc6acdcb7625 100644 > --- a/net/ipv4/ip_tunnel.c > +++ b/net/ipv4/ip_tunnel.c > @@ -101,27 +101,21 @@ static void tunnel_dst_reset_all(struct ip_tunnel *t) > __tunnel_dst_set(per_cpu_ptr(t->dst_cache, i), NULL); > } > > -static struct dst_entry *tunnel_dst_get(struct ip_tunnel *t) > +static struct dst_entry *tunnel_dst_check(struct ip_tunnel *t, u32 cookie) > { > struct dst_entry *dst; > > rcu_read_lock(); > dst = rcu_dereference(this_cpu_ptr(t->dst_cache)->dst); > - if (dst) > + if (dst) { > + if (dst->obsolete && dst->ops->check(dst, cookie) == NULL) { > + rcu_read_unlock(); > + tunnel_dst_reset(t); > + return NULL; > + } > dst_hold(dst); > - rcu_read_unlock(); > - return dst; > -} > - > -static struct dst_entry *tunnel_dst_check(struct ip_tunnel *t, u32 cookie) > -{ > - struct dst_entry *dst = tunnel_dst_get(t); > - > - if (dst && dst->obsolete && dst->ops->check(dst, cookie) == NULL) { > - tunnel_dst_reset(t); > - return NULL; > } > - > + rcu_read_unlock(); > return dst; > } > > @@ -584,7 +578,7 @@ void ip_tunnel_xmit(struct sk_buff *skb, struct > net_device *dev, > struct flowi4 fl4; > u8 tos, ttl; > __be16 df; > - struct rtable *rt = NULL; /* Route to the other host */ > + struct rtable *rt; /* Route to the other host */ > unsigned int max_headroom; /* The extra header space needed */ > __be32 dst; > int err; > @@ -657,8 +651,7 @@ void ip_tunnel_xmit(struct sk_buff *skb, struct > net_device *dev, > init_tunnel_flow(&fl4, protocol, dst, tnl_params->saddr, > tunnel->parms.o_key, RT_TOS(tos), > tunnel->parms.link); > > - if (connected) > - rt = (struct rtable *)tunnel_dst_check(tunnel, 0); > + rt = (connected) ? (struct rtable *)tunnel_dst_check(tunnel, 0) : > NULL; > > if (!rt) { > rt = ip_route_output_key(tunnel->net, &fl4); > > -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
BUG ip_dst_cache (Not tainted): Poison overwritten
Hello, Hit this while fuzzing v3.13-9218-g0e47c96 with trinity in a qemu virtual machine. Tommi [ 6329.061605] = [ 6329.062014] BUG ip_dst_cache (Not tainted): Poison overwritten [ 6329.062014] - [ 6329.062014] Disabling lock debugging due to kernel taint [ 6329.062014] INFO: 0x8800b4809940-0x8800b4809940. First byte 0x6a instead of 0x6b [ 6329.062014] INFO: Allocated in dst_alloc+0x46/0x180 age=33 cpu=0 pid=6108 [ 6329.062014] __slab_alloc+0x4f8/0x58c [ 6329.062014] kmem_cache_alloc+0x94/0x290 [ 6329.062014] dst_alloc+0x46/0x180 [ 6329.062014] rt_dst_alloc+0x47/0x50 [ 6329.062014] __ip_route_output_key+0x882/0xa80 [ 6329.062014] ip_route_output_flow+0x22/0x60 [ 6329.062014] igmpv3_newpack+0xe2/0x210 [ 6329.062014] add_grhead.isra.17+0x37/0xa0 [ 6329.062014] add_grec+0x3b2/0x470 [ 6329.062014] igmp_ifc_timer_expire+0x28e/0x400 [ 6329.062014] call_timer_fn+0x146/0x320 [ 6329.062014] run_timer_softirq+0x2d4/0x360 [ 6329.062014] __do_softirq+0x217/0x4a0 [ 6329.062014] irq_exit+0x45/0xb0 [ 6329.062014] smp_apic_timer_interrupt+0x3f/0x50 [ 6329.062014] apic_timer_interrupt+0x72/0x80 [ 6329.062014] INFO: Freed in dst_destroy+0x8a/0xe0 age=33 cpu=0 pid=6108 [ 6329.062014] __slab_free+0x32/0x380 [ 6329.062014] kmem_cache_free+0x186/0x2c0 [ 6329.062014] dst_destroy+0x8a/0xe0 [ 6329.062014] dst_release+0x53/0x70 [ 6329.062014] ip_tunnel_xmit+0x50e/0xfb0 [ 6329.062014] ipip_tunnel_xmit+0x41/0x60 [ 6329.062014] dev_hard_start_xmit+0x3ed/0x950 [ 6329.062014] __dev_queue_xmit+0x621/0x890 [ 6329.062014] dev_queue_xmit+0xb/0x10 [ 6329.062014] neigh_direct_output+0xc/0x10 [ 6329.062014] ip_finish_output2+0x494/0x5d0 [ 6329.062014] ip_finish_output+0x238/0x2d0 [ 6329.062014] ip_output+0x9f/0x110 [ 6329.062014] ip_local_out+0x6e/0xa0 [ 6329.062014] igmpv3_sendpack+0x43/0x50 [ 6329.062014] igmp_ifc_timer_expire+0x395/0x400 [ 6329.062014] INFO: Slab 0xea0002d20200 objects=14 used=14 fp=0x (null) flags=0x1004080 [ 6329.062014] INFO: Object 0x8800b48098c0 @offset=6336 fp=0x8800b4809680 [ 6329.062014] Bytes b4 8800b48098b0: 5a 5a 5a 5a 5a 5a 5a 5a 5a 5a 5a 5a 5a 5a 5a 5a [ 6329.062014] Object 8800b48098c0: 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b [ 6329.062014] Object 8800b48098d0: 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b [ 6329.062014] Object 8800b48098e0: 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b [ 6329.062014] Object 8800b48098f0: 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b [ 6329.062014] Object 8800b4809900: 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b [ 6329.062014] Object 8800b4809910: 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b [ 6329.062014] Object 8800b4809920: 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b [ 6329.062014] Object 8800b4809930: 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b [ 6329.062014] Object 8800b4809940: 6a 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b jkkk [ 6329.062014] Object 8800b4809950: 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b [ 6329.062014] Object 8800b4809960: 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b [ 6329.062014] Object 8800b4809970: 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b a5 kkk. [ 6329.062014] Redzone 8800b4809980: bb bb bb bb bb bb bb bb [ 6329.062014] Padding 8800b4809ac0: 5a 5a 5a 5a 5a 5a 5a 5a 5a 5a 5a 5a 5a 5a 5a 5a [ 6329.062014] Padding 8800b4809ad0: 5a 5a 5a 5a 5a 5a 5a 5a 5a 5a 5a 5a 5a 5a 5a 5a [ 6329.062014] Padding 8800b4809ae0: 5a 5a 5a 5a 5a 5a 5a 5a 5a 5a 5a 5a 5a 5a 5a 5a [ 6329.062014] Padding 8800b4809af0: 5a 5a 5a 5a 5a 5a 5a 5a 5a 5a 5a 5a 5a 5a 5a 5a [ 6329.062014] CPU: 0 PID: 6108 Comm: trinity-main Tainted: GB 3.13.0+ #1 [ 6329.062014] Hardware name: Bochs Bochs, BIOS Bochs 01/01/2011 [ 6329.062014] 8800b48098c0 8800ab253b38 82366c34 8800baacd8c0 [ 6329.062014] 8800ab253b68 81262e41 8800b4809941 8800baacd8c0 [ 6329.062014] 006b 8800b48098c0 8800ab253bb0 81263284 [ 6329.062014] Call Trace: [ 6329.062014] [] dump_stack+0x4d/0x66 [ 6329.062014] [] print_trailer+0x131/0x140 [ 6329.062014] [] check_bytes_and_report+0xc4/0x120 [ 6329.062014] [] check_object+0x11e/0x240 [ 6329.062014] [] ? dst_alloc+0x46/0x180 [ 6329.062014] [] alloc_debug_processing+0x62/0x104 [ 6329.062014] [] __slab_alloc+0x4f8/0x58c [ 6329.062014] [] ? sched_clock_cpu+0xb8/0xe0 [ 6329.062014] [] ? kvm_clock_read+0x27/0x40 [ 6329.062014] [] ? sched_clock+0x9
sched_rr_get_interval NULL pointer OOPS
Hello, Trinity triggered the following bug in two separate qemu virtual machines after fuzzing v3.13-3995-g0dc3fd0 for a day or two. I have not been running Trinity in a while, so no idea if this is a regression or not. If I'm reading this right, it's oopsing in kernel/sched/core.c: SYSCALL_DEFINE2(sched_rr_get_interval, pid_t, pid, struct timespec __user *, interval) { ... rq = task_rq_lock(p, &flags); time_slice = p->sched_class->get_rr_interval(rq, p); <== task_rq_unlock(rq, p, &flags); ... The first trace: [21451.975552] trinity-c9: vm86 mode not supported on 64 bit kernel [21452.242792] trinity-c23: vm86 mode not supported on 64 bit kernel [21452.309518] trinity-c30: vm86 mode not supported on 64 bit kernel [21456.862415] type=1401 audit(1390484421.888:396): SELinux: unrecognized netlink message type=0 for sclass=34 [21456.862415] [21472.032599] BUG: unable to handle kernel NULL pointer dereference at (null) [21472.034764] IP: [< (null)>] (null) [21472.036117] PGD a6243067 PUD a712a067 PMD 0 [21472.037345] Oops: 0010 [#1] SMP DEBUG_PAGEALLOC [21472.038616] CPU: 0 PID: 15522 Comm: trinity-c8 Not tainted 3.13.0+ #1 [21472.040309] Hardware name: Bochs Bochs, BIOS Bochs 01/01/2011 [21472.041823] task: 88006f8f ti: 8800a101e000 task.ti: 8800a101e000 [21472.043814] RIP: 0010:[<>] [< (null)>] (null) [21472.045823] RSP: 0018:8800a101ff30 EFLAGS: 00010046 [21472.047225] RAX: 82434ae0 RBX: 8800b926ca40 RCX: 02c0 [21472.049143] RDX: 8800bf60e460 RSI: 8800b926ca40 RDI: 8800bf7d4fc0 [21472.050900] RBP: 8800a101ff78 R08: fffe8fd25bb38016 R09: 0001 [21472.052621] R10: 88006f8f R11: R12: 0004 [21472.054469] R13: 8800bf7d4fc0 R14: 0094 R15: 20008465485f [21472.056303] FS: 7f904f260700() GS:8800bf60() knlGS: [21472.058211] CS: 0010 DS: ES: CR0: 8005003b [21472.059516] CR2: CR3: 44ec3000 CR4: 06f0 [21472.061143] DR0: 0276a000 DR1: 0276aff8 DR2: [21472.062762] DR3: DR6: 0ff0 DR7: 0600 [21472.064445] Stack: [21472.064975] 81160cdf 81160c23 0282 0001 [21472.067017] 04ae 0008 0008 7f904f233de0 [21472.069053] 0094 0094 8235ba79 0246 [21472.071089] Call Trace: [21472.071761] [] ? SyS_sched_rr_get_interval+0xdf/0x230 [21472.073570] [] ? SyS_sched_rr_get_interval+0x23/0x230 [21472.075401] [] system_call_fastpath+0x16/0x1b [21472.076987] Code: Bad RIP value. [21472.077929] RIP [< (null)>] (null) [21472.079302] RSP [21472.080247] CR2: [21472.117066] ---[ end trace cc44b07941fc4905 ]--- The second trace looks more or less identical: [106143.588795] RDS: rds_bind() could not find a transport, load rds_tcp or rds_rdma? [106146.597725] trinity-c1: vm86 mode not supported on 64 bit kernel [106146.865957] trinity-c36: vm86 mode not supported on 64 bit kernel [106156.562726] BUG: unable to handle kernel NULL pointer dereference at (null) [106156.565411] IP: [< (null)>] (null) [106156.567021] PGD a61e6067 PUD a03a4067 PMD 0 [106156.568451] Oops: 0010 [#1] SMP DEBUG_PAGEALLOC [106156.569929] CPU: 0 PID: 19875 Comm: trinity-c23 Not tainted 3.13.0+ #1 [106156.571987] Hardware name: Bochs Bochs, BIOS Bochs 01/01/2011 [106156.573758] task: 8800b65d8000 ti: 880009ac8000 task.ti: 880009ac8000 [106156.576051] RIP: 0010:[<>] [< (null)>] (null) [106156.578322] RSP: 0018:880009ac9f30 EFLAGS: 00010046 [106156.579920] RAX: 82434ae0 RBX: 8800b4cb2520 RCX: 02c0 [106156.582122] RDX: 8800bf60e460 RSI: 8800b4cb2520 RDI: 8800bf7d4fc0 [106156.584225] RBP: 880009ac9f78 R08: fffe8fd25bb38016 R09: 0001 [106156.586340] R10: 8800b65d8000 R11: R12: 008c8000 [106156.588513] R13: 8800bf7d4fc0 R14: 0094 R15: 40004a1b [106156.590684] FS: 7f75c3e23700() GS:8800bf60() knlGS: [106156.593171] CS: 0010 DS: ES: CR0: 8005003b [106156.594922] CR2: CR3: a69c1000 CR4: 06f0 [106156.597114] DR0: 008c8000 DR1: 00ca5000 DR2: 024dc000 [106156.599295] DR3: 026df000 DR6: 0ff0 DR7: 00030602 [106156.601449] Stack: [106156.602085] 81160cdf 81160c23 0282 0001 [106156.604423] 0003d7dc 0017 0017 7f75c3df6de0 [106156.606758] 0094 0094 8235ba79 0246 [106156.609117] Call Trace: [106156.609913] [] ? SyS_s
kernel BUG at net/core/skbuff.c:1065!
Hello, Hit this bug while fuzzing in a qemu virtual machine as the root user. Kernel is v3.10-rc5-0-g317ddd2. Tommi [575180.874750] type=1401 audit(1371378748.322:7750): SELinux: unrecognized netlink message type=0 for sclass=36 [575180.874750] [575191.358143] [ cut here ] [575191.358498] kernel BUG at /build/linux/net/core/skbuff.c:1065! [575191.358498] invalid opcode: [#1] SMP DEBUG_PAGEALLOC [575191.358498] CPU: 0 PID: 28554 Comm: trinity-child33 Not tainted 3.10.0-rc5 #1 [575191.358498] Hardware name: Bochs Bochs, BIOS Bochs 01/01/2011 [575191.358498] task: 880005f0c7c0 ti: 88002cec6000 task.ti: 88002cec6000 [575191.358498] RIP: 0010:[] [] pskb_expand_head+0x3b/0x290 [575191.358498] RSP: 0018:88002cec79f0 EFLAGS: 00010202 [575191.358498] RAX: 0002 RBX: 880010e7cd80 RCX: 0020 [575191.358498] RDX: 003c RSI: RDI: 880010e7cd80 [575191.358498] RBP: 88002cec7a28 R08: 0001 R09: [575191.358498] R10: R11: R12: 002b [575191.358498] R13: R14: 0011 R15: 40014b89 [575191.358498] FS: 7f3b21cd6700() GS:8800bf60() knlGS: [575191.358498] CS: 0010 DS: ES: CR0: 80050033 [575191.358498] CR2: 009fb000 CR3: 1a873000 CR4: 06f0 [575191.358498] DR0: 02592d30 DR1: DR2: [575191.358498] DR3: DR6: 0ff0 DR7: 0600 [575191.358498] Stack: [575191.358498] 880090cbd668 880010e7cd80 002b [575191.358498] 0011 40014b89 88002cec7a50 [575191.358498] 81eb6fc3 88002cec7a70 0011 8800b952d498 [575191.358498] Call Trace: [575191.358498] [] skb_pad+0xa3/0x150 [575191.358498] [] e1000_xmit_frame+0x78/0xfc0 [575191.358498] [] ? dev_queue_xmit_nit+0x360/0x390 [575191.358498] [] ? get_rps_cpu+0x4a0/0x4a0 [575191.358498] [] dev_hard_start_xmit+0x2ec/0x720 [575191.358498] [] sch_direct_xmit+0x80/0x290 [575191.358498] [] dev_queue_xmit+0x4b4/0x8e0 [575191.358498] [] ? dev_hard_start_xmit+0x720/0x720 [575191.358498] [] llc_sap_action_send_test_c+0x7f/0x90 [575191.358498] [] llc_sap_state_process+0xd0/0x160 [575191.358498] [] llc_build_and_send_test_pkt+0x44/0x50 [575191.358498] [] llc_ui_sendmsg+0x1e7/0x490 [575191.358498] [] sock_sendmsg+0xa1/0xd0 [575191.358498] [] ? __do_page_fault+0x288/0x530 [575191.358498] [] SYSC_sendto+0x11c/0x160 [575191.358498] [] ? _raw_spin_unlock_irq+0x27/0x50 [575191.358498] [] ? do_setitimer+0x27c/0x330 [575191.358498] [] ? trace_hardirqs_on_caller+0x16/0x220 [575191.358498] [] ? trace_hardirqs_on_thunk+0x3a/0x3f [575191.358498] [] SyS_sendto+0x9/0x10 [575191.358498] [] system_call_fastpath+0x16/0x1b [575191.358498] Code: 48 89 fb 48 83 ec 10 8b 87 d4 00 00 00 01 f0 01 c2 85 f6 79 0b 0f 0b 66 0f 1f 84 00 00 00 00 00 8b 87 ec 00 00 00 83 f8 01 74 05 <0f> 0b 0f 1f 00 83 c2 3f 41 89 cf 83 e2 c0 f6 87 aa 00 00 00 04 [575191.358498] RIP [] pskb_expand_head+0x3b/0x290 [575191.358498] RSP [575191.518696] ---[ end trace 866084dcc0c2aa3e ]--- [575191.522588] Kernel panic - not syncing: Fatal exception in interrupt [575191.523574] drm_kms_helper: panic occurred, switching back to text console -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/