[PATCH v2] selftests: intel_pstate: ftime() is deprecated

2020-11-01 Thread Tommi Rantala
Use clock_gettime() instead of deprecated ftime().

  aperf.c: In function ‘main’:
  aperf.c:58:2: warning: ‘ftime’ is deprecated [-Wdeprecated-declarations]
 58 |  ftime(&before);
|  ^
  In file included from aperf.c:9:
  /usr/include/sys/timeb.h:39:12: note: declared here
 39 | extern int ftime (struct timeb *__timebuf)
|^

Signed-off-by: Tommi Rantala 
---
 tools/testing/selftests/intel_pstate/aperf.c | 22 ++--
 1 file changed, 16 insertions(+), 6 deletions(-)

v2: define and use NSEC_PER_MSEC and MSEC_PER_SEC

diff --git a/tools/testing/selftests/intel_pstate/aperf.c 
b/tools/testing/selftests/intel_pstate/aperf.c
index f6cd03a87493..a8acf3996973 100644
--- a/tools/testing/selftests/intel_pstate/aperf.c
+++ b/tools/testing/selftests/intel_pstate/aperf.c
@@ -10,8 +10,12 @@
 #include 
 #include 
 #include 
+#include 
 #include "../kselftest.h"
 
+#define MSEC_PER_SEC   1000L
+#define NSEC_PER_MSEC  100L
+
 void usage(char *name) {
printf ("Usage: %s cpunum\n", name);
 }
@@ -22,7 +26,7 @@ int main(int argc, char **argv) {
long long tsc, old_tsc, new_tsc;
long long aperf, old_aperf, new_aperf;
long long mperf, old_mperf, new_mperf;
-   struct timeb before, after;
+   struct timespec before, after;
long long int start, finish, total;
cpu_set_t cpuset;
 
@@ -55,7 +59,10 @@ int main(int argc, char **argv) {
return 1;
}
 
-   ftime(&before);
+   if (clock_gettime(CLOCK_MONOTONIC, &before) < 0) {
+   perror("clock_gettime");
+   return 1;
+   }
pread(fd, &old_tsc,  sizeof(old_tsc), 0x10);
pread(fd, &old_aperf,  sizeof(old_mperf), 0xe7);
pread(fd, &old_mperf,  sizeof(old_aperf), 0xe8);
@@ -64,7 +71,10 @@ int main(int argc, char **argv) {
sqrt(i);
}
 
-   ftime(&after);
+   if (clock_gettime(CLOCK_MONOTONIC, &after) < 0) {
+   perror("clock_gettime");
+   return 1;
+   }
pread(fd, &new_tsc,  sizeof(new_tsc), 0x10);
pread(fd, &new_aperf,  sizeof(new_mperf), 0xe7);
pread(fd, &new_mperf,  sizeof(new_aperf), 0xe8);
@@ -73,11 +83,11 @@ int main(int argc, char **argv) {
aperf = new_aperf-old_aperf;
mperf = new_mperf-old_mperf;
 
-   start = before.time*1000 + before.millitm;
-   finish = after.time*1000 + after.millitm;
+   start = before.tv_sec*MSEC_PER_SEC + before.tv_nsec/NSEC_PER_MSEC;
+   finish = after.tv_sec*MSEC_PER_SEC + after.tv_nsec/NSEC_PER_MSEC;
total = finish - start;
 
-   printf("runTime: %4.2f\n", 1.0*total/1000);
+   printf("runTime: %4.2f\n", 1.0*total/MSEC_PER_SEC);
printf("freq: %7.0f\n", tsc / (1.0*aperf / (1.0 * mperf)) / total);
return 0;
 }
-- 
2.26.2



[PATCH] selftests: intel_pstate: ftime() is deprecated

2020-10-16 Thread Tommi Rantala
Use clock_gettime() instead of deprecated ftime().

  aperf.c: In function ‘main’:
  aperf.c:58:2: warning: ‘ftime’ is deprecated [-Wdeprecated-declarations]
 58 |  ftime(&before);
|  ^
  In file included from aperf.c:9:
  /usr/include/sys/timeb.h:39:12: note: declared here
 39 | extern int ftime (struct timeb *__timebuf)
|^

Signed-off-by: Tommi Rantala 
---
 tools/testing/selftests/intel_pstate/aperf.c | 17 -
 1 file changed, 12 insertions(+), 5 deletions(-)

diff --git a/tools/testing/selftests/intel_pstate/aperf.c 
b/tools/testing/selftests/intel_pstate/aperf.c
index f6cd03a87493..eea9dbab459b 100644
--- a/tools/testing/selftests/intel_pstate/aperf.c
+++ b/tools/testing/selftests/intel_pstate/aperf.c
@@ -10,6 +10,7 @@
 #include 
 #include 
 #include 
+#include 
 #include "../kselftest.h"
 
 void usage(char *name) {
@@ -22,7 +23,7 @@ int main(int argc, char **argv) {
long long tsc, old_tsc, new_tsc;
long long aperf, old_aperf, new_aperf;
long long mperf, old_mperf, new_mperf;
-   struct timeb before, after;
+   struct timespec before, after;
long long int start, finish, total;
cpu_set_t cpuset;
 
@@ -55,7 +56,10 @@ int main(int argc, char **argv) {
return 1;
}
 
-   ftime(&before);
+   if (clock_gettime(CLOCK_MONOTONIC, &before) < 0) {
+   perror("clock_gettime");
+   return 1;
+   }
pread(fd, &old_tsc,  sizeof(old_tsc), 0x10);
pread(fd, &old_aperf,  sizeof(old_mperf), 0xe7);
pread(fd, &old_mperf,  sizeof(old_aperf), 0xe8);
@@ -64,7 +68,10 @@ int main(int argc, char **argv) {
sqrt(i);
}
 
-   ftime(&after);
+   if (clock_gettime(CLOCK_MONOTONIC, &after) < 0) {
+   perror("clock_gettime");
+   return 1;
+   }
pread(fd, &new_tsc,  sizeof(new_tsc), 0x10);
pread(fd, &new_aperf,  sizeof(new_mperf), 0xe7);
pread(fd, &new_mperf,  sizeof(new_aperf), 0xe8);
@@ -73,8 +80,8 @@ int main(int argc, char **argv) {
aperf = new_aperf-old_aperf;
mperf = new_mperf-old_mperf;
 
-   start = before.time*1000 + before.millitm;
-   finish = after.time*1000 + after.millitm;
+   start = before.tv_sec*1000 + before.tv_nsec/100L;
+   finish = after.tv_sec*1000 + after.tv_nsec/100L;
total = finish - start;
 
printf("runTime: %4.2f\n", 1.0*total/1000);
-- 
2.26.2



[PATCH] perf test: Implement skip_reason callback for watchpoint tests

2020-10-16 Thread Tommi Rantala
Currently reason for skipping the read only watchpoint test is only seen
when running in verbose mode:

  $ perf test watchpoint
  23: Watchpoint:
  23.1: Read Only Watchpoint: Skip
  23.2: Write Only Watchpoint   : Ok
  23.3: Read / Write Watchpoint : Ok
  23.4: Modify Watchpoint   : Ok

  $ perf test -v watchpoint
  23: Watchpoint:
  23.1: Read Only Watchpoint:
  --- start ---
  test child forked, pid 60204
  Hardware does not support read only watchpoints.
  test child finished with -2

Implement skip_reason callback for the watchpoint tests, so that it's
easy to see reason why the test is skipped:

  $ perf test watchpoint
  23: Watchpoint:
  23.1: Read Only Watchpoint: Skip (missing 
hardware support)
  23.2: Write Only Watchpoint   : Ok
  23.3: Read / Write Watchpoint : Ok
  23.4: Modify Watchpoint   : Ok

Signed-off-by: Tommi Rantala 
---
 tools/perf/tests/builtin-test.c |  1 +
 tools/perf/tests/tests.h|  1 +
 tools/perf/tests/wp.c   | 21 +++--
 3 files changed, 17 insertions(+), 6 deletions(-)

diff --git a/tools/perf/tests/builtin-test.c b/tools/perf/tests/builtin-test.c
index d328caaba45d..3bfad4ee31ae 100644
--- a/tools/perf/tests/builtin-test.c
+++ b/tools/perf/tests/builtin-test.c
@@ -142,6 +142,7 @@ static struct test generic_tests[] = {
.skip_if_fail   = false,
.get_nr = test__wp_subtest_get_nr,
.get_desc   = test__wp_subtest_get_desc,
+   .skip_reason= test__wp_subtest_skip_reason,
},
},
{
diff --git a/tools/perf/tests/tests.h b/tools/perf/tests/tests.h
index 4447a516c689..0630301087a6 100644
--- a/tools/perf/tests/tests.h
+++ b/tools/perf/tests/tests.h
@@ -66,6 +66,7 @@ int test__bp_signal_overflow(struct test *test, int subtest);
 int test__bp_accounting(struct test *test, int subtest);
 int test__wp(struct test *test, int subtest);
 const char *test__wp_subtest_get_desc(int subtest);
+const char *test__wp_subtest_skip_reason(int subtest);
 int test__wp_subtest_get_nr(void);
 int test__task_exit(struct test *test, int subtest);
 int test__mem(struct test *test, int subtest);
diff --git a/tools/perf/tests/wp.c b/tools/perf/tests/wp.c
index d262d6639829..9387fa76faa5 100644
--- a/tools/perf/tests/wp.c
+++ b/tools/perf/tests/wp.c
@@ -174,10 +174,12 @@ static bool wp_ro_supported(void)
 #endif
 }
 
-static void wp_ro_skip_msg(void)
+static const char *wp_ro_skip_msg(void)
 {
 #if defined (__x86_64__) || defined (__i386__)
-   pr_debug("Hardware does not support read only watchpoints.\n");
+   return "missing hardware support";
+#else
+   return NULL;
 #endif
 }
 
@@ -185,7 +187,7 @@ static struct {
const char *desc;
int (*target_func)(void);
bool (*is_supported)(void);
-   void (*skip_msg)(void);
+   const char *(*skip_msg)(void);
 } wp_testcase_table[] = {
{
.desc = "Read Only Watchpoint",
@@ -219,16 +221,23 @@ const char *test__wp_subtest_get_desc(int i)
return wp_testcase_table[i].desc;
 }
 
+const char *test__wp_subtest_skip_reason(int i)
+{
+   if (i < 0 || i >= (int)ARRAY_SIZE(wp_testcase_table))
+   return NULL;
+   if (!wp_testcase_table[i].skip_msg)
+   return NULL;
+   return wp_testcase_table[i].skip_msg();
+}
+
 int test__wp(struct test *test __maybe_unused, int i)
 {
if (i < 0 || i >= (int)ARRAY_SIZE(wp_testcase_table))
return TEST_FAIL;
 
if (wp_testcase_table[i].is_supported &&
-   !wp_testcase_table[i].is_supported()) {
-   wp_testcase_table[i].skip_msg();
+   !wp_testcase_table[i].is_supported())
return TEST_SKIP;
-   }
 
return !wp_testcase_table[i].target_func() ? TEST_OK : TEST_FAIL;
 }
-- 
2.26.2



[PATCH] perf tools: Fix crash with non-jited bpf progs

2020-10-16 Thread Tommi Rantala
The addr in PERF_RECORD_KSYMBOL events for non-jited bpf progs points to
the bpf interpreter, ie. within kernel text section. When processing the
unregister event, this causes unexpected removal of vmlinux_map,
crashing perf later in cleanup:

  # perf record -- timeout --signal=INT 2s /usr/share/bcc/tools/execsnoop
  PCOMMPIDPPID   RET ARGS
  [ perf record: Woken up 1 times to write data ]
  [ perf record: Captured and wrote 0.208 MB perf.data (5155 samples) ]
  perf: tools/include/linux/refcount.h:131: refcount_sub_and_test: Assertion 
`!(new > val)' failed.
  Aborted (core dumped)

  # perf script -D|grep KSYM
  0 0xa40 [0x48]: PERF_RECORD_KSYMBOL addr a9b6b530 len 0 type 1 flags 
0x0 name bpf_prog_f958f6eb72ef5af6
  0 0xab0 [0x48]: PERF_RECORD_KSYMBOL addr a9b6b530 len 0 type 1 flags 
0x0 name bpf_prog_8c42dee26e8cd4c2
  0 0xb20 [0x48]: PERF_RECORD_KSYMBOL addr a9b6b530 len 0 type 1 flags 
0x0 name bpf_prog_f958f6eb72ef5af6
  108563691893 0x33d98 [0x58]: PERF_RECORD_KSYMBOL addr a9b6b3b0 len 0 
type 1 flags 0x0 name bpf_prog_bc5697a410556fc2_syscall__execve
  108568518458 0x34098 [0x58]: PERF_RECORD_KSYMBOL addr a9b6b3f0 len 0 
type 1 flags 0x0 name bpf_prog_45e2203c2928704d_do_ret_sys_execve
  109301967895 0x34830 [0x58]: PERF_RECORD_KSYMBOL addr a9b6b3b0 len 0 
type 1 flags 0x1 name bpf_prog_bc5697a410556fc2_syscall__execve
  109302007356 0x348b0 [0x58]: PERF_RECORD_KSYMBOL addr a9b6b3f0 len 0 
type 1 flags 0x1 name bpf_prog_45e2203c2928704d_do_ret_sys_execve
  perf: tools/include/linux/refcount.h:131: refcount_sub_and_test: Assertion 
`!(new > val)' failed.

Here the addresses match the bpf interpreter:

  # grep -e a9b6b530 -e a9b6b3b0 -e a9b6b3f0 
/proc/kallsyms
  a9b6b3b0 t __bpf_prog_run224
  a9b6b3f0 t __bpf_prog_run192
  a9b6b530 t __bpf_prog_run32

Fix by not allowing vmlinux_map to be removed by PERF_RECORD_KSYMBOL
unregister event.

Signed-off-by: Tommi Rantala 
---
 tools/perf/util/machine.c | 11 ++-
 tools/perf/util/symbol.c  |  7 +++
 tools/perf/util/symbol.h  |  2 ++
 3 files changed, 19 insertions(+), 1 deletion(-)

diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c
index 85587de027a5..d93d35463c61 100644
--- a/tools/perf/util/machine.c
+++ b/tools/perf/util/machine.c
@@ -786,11 +786,20 @@ static int machine__process_ksymbol_unregister(struct 
machine *machine,
   union perf_event *event,
   struct perf_sample *sample 
__maybe_unused)
 {
+   struct symbol *sym;
struct map *map;
 
map = maps__find(&machine->kmaps, event->ksymbol.addr);
-   if (map)
+   if (!map)
+   return 0;
+
+   if (map != machine->vmlinux_map)
maps__remove(&machine->kmaps, map);
+   else {
+   sym = dso__find_symbol(map->dso, map->map_ip(map, map->start));
+   if (sym)
+   dso__delete_symbol(map->dso, sym);
+   }
 
return 0;
 }
diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c
index 5151a8c0b791..6bf8e74ea1d1 100644
--- a/tools/perf/util/symbol.c
+++ b/tools/perf/util/symbol.c
@@ -515,6 +515,13 @@ void dso__insert_symbol(struct dso *dso, struct symbol 
*sym)
}
 }
 
+void dso__delete_symbol(struct dso *dso, struct symbol *sym)
+{
+   rb_erase_cached(&sym->rb_node, &dso->symbols);
+   symbol__delete(sym);
+   dso__reset_find_symbol_cache(dso);
+}
+
 struct symbol *dso__find_symbol(struct dso *dso, u64 addr)
 {
if (dso->last_find_result.addr != addr || dso->last_find_result.symbol 
== NULL) {
diff --git a/tools/perf/util/symbol.h b/tools/perf/util/symbol.h
index 03e264a27cd3..60345691db09 100644
--- a/tools/perf/util/symbol.h
+++ b/tools/perf/util/symbol.h
@@ -130,6 +130,8 @@ int dso__load_kallsyms(struct dso *dso, const char 
*filename, struct map *map);
 
 void dso__insert_symbol(struct dso *dso,
struct symbol *sym);
+void dso__delete_symbol(struct dso *dso,
+   struct symbol *sym);
 
 struct symbol *dso__find_symbol(struct dso *dso, u64 addr);
 struct symbol *dso__find_symbol_by_name(struct dso *dso, const char *name);
-- 
2.26.2



[PATCH 12/13] selftests: clone3: use SKIP instead of XFAIL

2020-10-08 Thread Tommi Rantala
XFAIL is gone since 9847d24af95c ("selftests/harness: Refactor XFAIL
into SKIP"), use SKIP instead.

Fixes: 9847d24af95c ("selftests/harness: Refactor XFAIL into SKIP")
Signed-off-by: Tommi Rantala 
---
 tools/testing/selftests/clone3/clone3_cap_checkpoint_restore.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/testing/selftests/clone3/clone3_cap_checkpoint_restore.c 
b/tools/testing/selftests/clone3/clone3_cap_checkpoint_restore.c
index 9562425aa0a9..614091de4c54 100644
--- a/tools/testing/selftests/clone3/clone3_cap_checkpoint_restore.c
+++ b/tools/testing/selftests/clone3/clone3_cap_checkpoint_restore.c
@@ -145,7 +145,7 @@ TEST(clone3_cap_checkpoint_restore)
test_clone3_supported();
 
EXPECT_EQ(getuid(), 0)
-   XFAIL(return, "Skipping all tests as non-root\n");
+   SKIP(return, "Skipping all tests as non-root");
 
memset(&set_tid, 0, sizeof(set_tid));
 
-- 
2.26.2



[PATCH 06/13] selftests: pidfd: skip test on kcmp() ENOSYS

2020-10-08 Thread Tommi Rantala
Skip test if kcmp() is not available, for example if kernel is compiled
without CONFIG_CHECKPOINT_RESTORE=y.

Signed-off-by: Tommi Rantala 
---
 tools/testing/selftests/pidfd/pidfd_getfd_test.c | 5 -
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/tools/testing/selftests/pidfd/pidfd_getfd_test.c 
b/tools/testing/selftests/pidfd/pidfd_getfd_test.c
index 7758c98be015..0930e2411dfb 100644
--- a/tools/testing/selftests/pidfd/pidfd_getfd_test.c
+++ b/tools/testing/selftests/pidfd/pidfd_getfd_test.c
@@ -204,7 +204,10 @@ TEST_F(child, fetch_fd)
fd = sys_pidfd_getfd(self->pidfd, self->remote_fd, 0);
ASSERT_GE(fd, 0);
 
-   EXPECT_EQ(0, sys_kcmp(getpid(), self->pid, KCMP_FILE, fd, 
self->remote_fd));
+   ret = sys_kcmp(getpid(), self->pid, KCMP_FILE, fd, self->remote_fd);
+   if (ret < 0 && errno == ENOSYS)
+   SKIP(return, "kcmp() syscall not supported");
+   EXPECT_EQ(ret, 0);
 
ret = fcntl(fd, F_GETFD);
ASSERT_GE(ret, 0);
-- 
2.26.2



[PATCH 10/13] selftests: proc: fix warning: _GNU_SOURCE redefined

2020-10-08 Thread Tommi Rantala
Makefile already contains -D_GNU_SOURCE, so we can remove it from the
*.c files.

Signed-off-by: Tommi Rantala 
---
 tools/testing/selftests/proc/proc-loadavg-001.c  | 1 -
 tools/testing/selftests/proc/proc-self-syscall.c | 1 -
 tools/testing/selftests/proc/proc-uptime-002.c   | 1 -
 3 files changed, 3 deletions(-)

diff --git a/tools/testing/selftests/proc/proc-loadavg-001.c 
b/tools/testing/selftests/proc/proc-loadavg-001.c
index 471e2aa28077..fb4fe9188806 100644
--- a/tools/testing/selftests/proc/proc-loadavg-001.c
+++ b/tools/testing/selftests/proc/proc-loadavg-001.c
@@ -14,7 +14,6 @@
  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
  */
 /* Test that /proc/loadavg correctly reports last pid in pid namespace. */
-#define _GNU_SOURCE
 #include 
 #include 
 #include 
diff --git a/tools/testing/selftests/proc/proc-self-syscall.c 
b/tools/testing/selftests/proc/proc-self-syscall.c
index 9f6d000c0245..8511dcfe67c7 100644
--- a/tools/testing/selftests/proc/proc-self-syscall.c
+++ b/tools/testing/selftests/proc/proc-self-syscall.c
@@ -13,7 +13,6 @@
  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
  */
-#define _GNU_SOURCE
 #include 
 #include 
 #include 
diff --git a/tools/testing/selftests/proc/proc-uptime-002.c 
b/tools/testing/selftests/proc/proc-uptime-002.c
index 30e2b7849089..e7ceabed7f51 100644
--- a/tools/testing/selftests/proc/proc-uptime-002.c
+++ b/tools/testing/selftests/proc/proc-uptime-002.c
@@ -15,7 +15,6 @@
  */
 // Test that values in /proc/uptime increment monotonically
 // while shifting across CPUs.
-#define _GNU_SOURCE
 #undef NDEBUG
 #include 
 #include 
-- 
2.26.2



[PATCH 08/13] selftests: pidfd: drop needless linux/kcmp.h inclusion in pidfd_setns_test.c

2020-10-08 Thread Tommi Rantala
kcmp is not used in pidfd_setns_test.c, so do not include 

Signed-off-by: Tommi Rantala 
---
 tools/testing/selftests/pidfd/pidfd_setns_test.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/tools/testing/selftests/pidfd/pidfd_setns_test.c 
b/tools/testing/selftests/pidfd/pidfd_setns_test.c
index 7dca1aa4672d..3f3dc7a02a01 100644
--- a/tools/testing/selftests/pidfd/pidfd_setns_test.c
+++ b/tools/testing/selftests/pidfd/pidfd_setns_test.c
@@ -16,7 +16,6 @@
 #include 
 #include 
 #include 
-#include 
 
 #include "pidfd.h"
 #include "../clone3/clone3_selftests.h"
-- 
2.26.2



[PATCH 07/13] selftests: pidfd: add CONFIG_CHECKPOINT_RESTORE=y to config

2020-10-08 Thread Tommi Rantala
kcmp syscall is used in pidfd_getfd_test.c, so add
CONFIG_CHECKPOINT_RESTORE=y to config to ensure kcmp is available.

Signed-off-by: Tommi Rantala 
---
 tools/testing/selftests/pidfd/config | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tools/testing/selftests/pidfd/config 
b/tools/testing/selftests/pidfd/config
index bb11de90c0c9..f6f2965e17af 100644
--- a/tools/testing/selftests/pidfd/config
+++ b/tools/testing/selftests/pidfd/config
@@ -4,3 +4,4 @@ CONFIG_USER_NS=y
 CONFIG_PID_NS=y
 CONFIG_NET_NS=y
 CONFIG_CGROUPS=y
+CONFIG_CHECKPOINT_RESTORE=y
-- 
2.26.2



[PATCH 11/13] selftests: core: use SKIP instead of XFAIL in close_range_test.c

2020-10-08 Thread Tommi Rantala
XFAIL is gone since 9847d24af95c ("selftests/harness: Refactor XFAIL
into SKIP"), use SKIP instead.

Fixes: 9847d24af95c ("selftests/harness: Refactor XFAIL into SKIP")
Signed-off-by: Tommi Rantala 
---
 tools/testing/selftests/core/close_range_test.c | 8 
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/tools/testing/selftests/core/close_range_test.c 
b/tools/testing/selftests/core/close_range_test.c
index c99b98b0d461..575b391ddc78 100644
--- a/tools/testing/selftests/core/close_range_test.c
+++ b/tools/testing/selftests/core/close_range_test.c
@@ -44,7 +44,7 @@ TEST(close_range)
fd = open("/dev/null", O_RDONLY | O_CLOEXEC);
ASSERT_GE(fd, 0) {
if (errno == ENOENT)
-   XFAIL(return, "Skipping test since /dev/null 
does not exist");
+   SKIP(return, "Skipping test since /dev/null 
does not exist");
}
 
open_fds[i] = fd;
@@ -52,7 +52,7 @@ TEST(close_range)
 
EXPECT_EQ(-1, sys_close_range(open_fds[0], open_fds[100], -1)) {
if (errno == ENOSYS)
-   XFAIL(return, "close_range() syscall not supported");
+   SKIP(return, "close_range() syscall not supported");
}
 
EXPECT_EQ(0, sys_close_range(open_fds[0], open_fds[50], 0));
@@ -108,7 +108,7 @@ TEST(close_range_unshare)
fd = open("/dev/null", O_RDONLY | O_CLOEXEC);
ASSERT_GE(fd, 0) {
if (errno == ENOENT)
-   XFAIL(return, "Skipping test since /dev/null 
does not exist");
+   SKIP(return, "Skipping test since /dev/null 
does not exist");
}
 
open_fds[i] = fd;
@@ -197,7 +197,7 @@ TEST(close_range_unshare_capped)
fd = open("/dev/null", O_RDONLY | O_CLOEXEC);
ASSERT_GE(fd, 0) {
if (errno == ENOENT)
-   XFAIL(return, "Skipping test since /dev/null 
does not exist");
+   SKIP(return, "Skipping test since /dev/null 
does not exist");
}
 
open_fds[i] = fd;
-- 
2.26.2



[PATCH 09/13] selftests: android: fix multiple definition of sock_name

2020-10-08 Thread Tommi Rantala
Fix multiple definition of sock_name compilation error:

  tools/testing/selftests/android/ion/ipcsocket.h:8: multiple definition of 
`sock_name'

Signed-off-by: Tommi Rantala 
---
 tools/testing/selftests/android/ion/ipcsocket.c | 1 +
 tools/testing/selftests/android/ion/ipcsocket.h | 2 --
 2 files changed, 1 insertion(+), 2 deletions(-)

diff --git a/tools/testing/selftests/android/ion/ipcsocket.c 
b/tools/testing/selftests/android/ion/ipcsocket.c
index 7dc521002095..67ec69410d2e 100644
--- a/tools/testing/selftests/android/ion/ipcsocket.c
+++ b/tools/testing/selftests/android/ion/ipcsocket.c
@@ -10,6 +10,7 @@
 
 #include "ipcsocket.h"
 
+static char sock_name[MAX_SOCK_NAME_LEN];
 
 int opensocket(int *sockfd, const char *name, int connecttype)
 {
diff --git a/tools/testing/selftests/android/ion/ipcsocket.h 
b/tools/testing/selftests/android/ion/ipcsocket.h
index b3e84498a8a1..ec5efb23e7b0 100644
--- a/tools/testing/selftests/android/ion/ipcsocket.h
+++ b/tools/testing/selftests/android/ion/ipcsocket.h
@@ -5,8 +5,6 @@
 
 #define MAX_SOCK_NAME_LEN  64
 
-char sock_name[MAX_SOCK_NAME_LEN];
-
 /* This structure is responsible for holding the IPC data
  * data: hold the buffer fd
  * len: just the length of 32-bit integer fd
-- 
2.26.2



[PATCH 04/13] selftests/harness: prettify SKIP message whitespace again

2020-10-08 Thread Tommi Rantala
Commit 9847d24af95c ("selftests/harness: Refactor XFAIL into SKIP")
replaced XFAIL with SKIP in the output. Add one more space to make the
output aligned and pretty again.

Fixes: 9847d24af95c ("selftests/harness: Refactor XFAIL into SKIP")
Signed-off-by: Tommi Rantala 
---
 tools/testing/selftests/kselftest_harness.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/testing/selftests/kselftest_harness.h 
b/tools/testing/selftests/kselftest_harness.h
index 4f78e4805633..d8f44f4bdb3f 100644
--- a/tools/testing/selftests/kselftest_harness.h
+++ b/tools/testing/selftests/kselftest_harness.h
@@ -126,7 +126,7 @@
snprintf(_metadata->results->reason, \
 sizeof(_metadata->results->reason), fmt, ##__VA_ARGS__); \
if (TH_LOG_ENABLED) { \
-   fprintf(TH_LOG_STREAM, "#  SKIP %s\n", \
+   fprintf(TH_LOG_STREAM, "#  SKIP  %s\n", \
_metadata->results->reason); \
} \
_metadata->passed = 1; \
-- 
2.26.2



[PATCH 00/13] selftests fixes

2020-10-08 Thread Tommi Rantala
Hi, small fixes to issues I hit with selftests.

Tommi Rantala (13):
  selftests: filter kselftest headers from command in lib.mk
  selftests: pidfd: fix compilation errors due to wait.h
  selftests: add vmaccess to .gitignore
  selftests/harness: prettify SKIP message whitespace again
  selftests: pidfd: use ksft_test_result_skip() when skipping test
  selftests: pidfd: skip test on kcmp() ENOSYS
  selftests: pidfd: add CONFIG_CHECKPOINT_RESTORE=y to config
  selftests: pidfd: drop needless linux/kcmp.h inclusion in
pidfd_setns_test.c
  selftests: android: fix multiple definition of sock_name
  selftests: proc: fix warning: _GNU_SOURCE redefined
  selftests: core: use SKIP instead of XFAIL in close_range_test.c
  selftests: clone3: use SKIP instead of XFAIL
  selftests: binderfs: use SKIP instead of XFAIL

 tools/testing/selftests/android/ion/ipcsocket.c   | 1 +
 tools/testing/selftests/android/ion/ipcsocket.h   | 2 --
 .../selftests/clone3/clone3_cap_checkpoint_restore.c  | 2 +-
 tools/testing/selftests/core/close_range_test.c   | 8 
 .../selftests/filesystems/binderfs/binderfs_test.c| 8 
 tools/testing/selftests/kselftest_harness.h   | 2 +-
 tools/testing/selftests/lib.mk| 2 +-
 tools/testing/selftests/pidfd/config  | 1 +
 tools/testing/selftests/pidfd/pidfd_getfd_test.c  | 5 -
 tools/testing/selftests/pidfd/pidfd_open_test.c   | 1 -
 tools/testing/selftests/pidfd/pidfd_poll_test.c   | 1 -
 tools/testing/selftests/pidfd/pidfd_setns_test.c  | 1 -
 tools/testing/selftests/pidfd/pidfd_test.c| 2 +-
 tools/testing/selftests/proc/proc-loadavg-001.c   | 1 -
 tools/testing/selftests/proc/proc-self-syscall.c  | 1 -
 tools/testing/selftests/proc/proc-uptime-002.c| 1 -
 tools/testing/selftests/ptrace/.gitignore | 1 +
 17 files changed, 19 insertions(+), 21 deletions(-)

-- 
2.26.2



[PATCH 13/13] selftests: binderfs: use SKIP instead of XFAIL

2020-10-08 Thread Tommi Rantala
XFAIL is gone since 9847d24af95c ("selftests/harness: Refactor XFAIL
into SKIP"), use SKIP instead.

Fixes: 9847d24af95c ("selftests/harness: Refactor XFAIL into SKIP")
Signed-off-by: Tommi Rantala 
---
 .../selftests/filesystems/binderfs/binderfs_test.c| 8 
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/tools/testing/selftests/filesystems/binderfs/binderfs_test.c 
b/tools/testing/selftests/filesystems/binderfs/binderfs_test.c
index 1d27f52c61e6..477cbb042f5b 100644
--- a/tools/testing/selftests/filesystems/binderfs/binderfs_test.c
+++ b/tools/testing/selftests/filesystems/binderfs/binderfs_test.c
@@ -74,7 +74,7 @@ static int __do_binderfs_test(struct __test_metadata 
*_metadata)
ret = mount(NULL, binderfs_mntpt, "binder", 0, 0);
EXPECT_EQ(ret, 0) {
if (errno == ENODEV)
-   XFAIL(goto out, "binderfs missing");
+   SKIP(goto out, "binderfs missing");
TH_LOG("%s - Failed to mount binderfs", strerror(errno));
goto rmdir;
}
@@ -475,10 +475,10 @@ TEST(binderfs_stress)
 TEST(binderfs_test_privileged)
 {
if (geteuid() != 0)
-   XFAIL(return, "Tests are not run as root. Skipping privileged 
tests");
+   SKIP(return, "Tests are not run as root. Skipping privileged 
tests");
 
if (__do_binderfs_test(_metadata))
-   XFAIL(return, "The Android binderfs filesystem is not 
available");
+   SKIP(return, "The Android binderfs filesystem is not 
available");
 }
 
 TEST(binderfs_test_unprivileged)
@@ -511,7 +511,7 @@ TEST(binderfs_test_unprivileged)
ret = wait_for_pid(pid);
if (ret) {
if (ret == 2)
-   XFAIL(return, "The Android binderfs filesystem is not 
available");
+   SKIP(return, "The Android binderfs filesystem is not 
available");
ASSERT_EQ(ret, 0) {
TH_LOG("wait_for_pid() failed");
}
-- 
2.26.2



[PATCH 05/13] selftests: pidfd: use ksft_test_result_skip() when skipping test

2020-10-08 Thread Tommi Rantala
There's planned tests != run tests in pidfd_test when some test is
skipped:

  $ ./pidfd_test
  TAP version 13
  1..8
  [...]
  # pidfd_send_signal signal recycled pid test: Skipping test
  # Planned tests != run tests (8 != 7)
  # Totals: pass:7 fail:0 xfail:0 xpass:0 skip:0 error:0

Fix by using ksft_test_result_skip():

  $ ./pidfd_test
  TAP version 13
  1..8
  [...]
  ok 8 # SKIP pidfd_send_signal signal recycled pid test: Unsharing pid 
namespace not permitted
  # Totals: pass:7 fail:0 xfail:0 xpass:0 skip:1 error:0

Signed-off-by: Tommi Rantala 
---
 tools/testing/selftests/pidfd/pidfd_test.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/testing/selftests/pidfd/pidfd_test.c 
b/tools/testing/selftests/pidfd/pidfd_test.c
index c585aaa2acd8..529eb700ac26 100644
--- a/tools/testing/selftests/pidfd/pidfd_test.c
+++ b/tools/testing/selftests/pidfd/pidfd_test.c
@@ -330,7 +330,7 @@ static int test_pidfd_send_signal_recycled_pid_fail(void)
ksft_exit_fail_msg("%s test: Failed to recycle pid %d\n",
   test_name, PID_RECYCLE);
case PIDFD_SKIP:
-   ksft_print_msg("%s test: Skipping test\n", test_name);
+   ksft_test_result_skip("%s test: Skipping test\n", test_name);
ret = 0;
break;
case PIDFD_XFAIL:
-- 
2.26.2



[PATCH 02/13] selftests: pidfd: fix compilation errors due to wait.h

2020-10-08 Thread Tommi Rantala
Drop unneeded  header inclusion to fix pidfd compilation
errors seen in Fedora 32:

In file included from pidfd_open_test.c:9:
../../../../usr/include/linux/wait.h:17:16: error: expected identifier before 
numeric constant
   17 | #define P_ALL  0
  |^

Signed-off-by: Tommi Rantala 
---
 tools/testing/selftests/pidfd/pidfd_open_test.c | 1 -
 tools/testing/selftests/pidfd/pidfd_poll_test.c | 1 -
 2 files changed, 2 deletions(-)

diff --git a/tools/testing/selftests/pidfd/pidfd_open_test.c 
b/tools/testing/selftests/pidfd/pidfd_open_test.c
index b9fe75fc3e51..8a59438ccc78 100644
--- a/tools/testing/selftests/pidfd/pidfd_open_test.c
+++ b/tools/testing/selftests/pidfd/pidfd_open_test.c
@@ -6,7 +6,6 @@
 #include 
 #include 
 #include 
-#include 
 #include 
 #include 
 #include 
diff --git a/tools/testing/selftests/pidfd/pidfd_poll_test.c 
b/tools/testing/selftests/pidfd/pidfd_poll_test.c
index 4b115444dfe9..610811275357 100644
--- a/tools/testing/selftests/pidfd/pidfd_poll_test.c
+++ b/tools/testing/selftests/pidfd/pidfd_poll_test.c
@@ -3,7 +3,6 @@
 #define _GNU_SOURCE
 #include 
 #include 
-#include 
 #include 
 #include 
 #include 
-- 
2.26.2



[PATCH 01/13] selftests: filter kselftest headers from command in lib.mk

2020-10-08 Thread Tommi Rantala
Commit 1056d3d2c97e ("selftests: enforce local header dependency in
lib.mk") added header dependency to the rule, but as the rule uses $^,
the headers are added to the compiler command line.

This can cause unexpected precompiled header files being generated when
compilation fails:

  $ echo { >> openat2_test.c

  $ make
  gcc -Wall -O2 -g -fsanitize=address -fsanitize=undefined  openat2_test.c
tools/testing/selftests/kselftest_harness.h 
tools/testing/selftests/kselftest.h helpers.c
-o tools/testing/selftests/openat2/openat2_test
  openat2_test.c:313:1: error: expected identifier or ‘(’ before ‘{’ token
313 | {
| ^
  make: *** [../lib.mk:140: tools/testing/selftests/openat2/openat2_test] Error 
1

  $ file openat2_test*
  openat2_test:   GCC precompiled header (version 014) for C
  openat2_test.c: C source, ASCII text

Fix it by filtering out the headers, so that we'll only pass the actual
*.c files in the compiler command line.

Fixes: 1056d3d2c97e ("selftests: enforce local header dependency in lib.mk")
Signed-off-by: Tommi Rantala 
---
 tools/testing/selftests/lib.mk | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/testing/selftests/lib.mk b/tools/testing/selftests/lib.mk
index 7a17ea815736..66f3317dc365 100644
--- a/tools/testing/selftests/lib.mk
+++ b/tools/testing/selftests/lib.mk
@@ -137,7 +137,7 @@ endif
 ifeq ($(OVERRIDE_TARGETS),)
 LOCAL_HDRS := $(selfdir)/kselftest_harness.h $(selfdir)/kselftest.h
 $(OUTPUT)/%:%.c $(LOCAL_HDRS)
-   $(LINK.c) $^ $(LDLIBS) -o $@
+   $(LINK.c) $(filter-out $(LOCAL_HDRS),$^) $(LDLIBS) -o $@
 
 $(OUTPUT)/%.o:%.S
$(COMPILE.S) $^ -o $@
-- 
2.26.2



[PATCH 03/13] selftests: add vmaccess to .gitignore

2020-10-08 Thread Tommi Rantala
Commit 2de4e82318c7 ("selftests/ptrace: add test cases for dead-locks")
added vmaccess testcase, add the binary to .gitignore

Fixes: 2de4e82318c7 ("selftests/ptrace: add test cases for dead-locks")
Signed-off-by: Tommi Rantala 
---
 tools/testing/selftests/ptrace/.gitignore | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tools/testing/selftests/ptrace/.gitignore 
b/tools/testing/selftests/ptrace/.gitignore
index 7bebf9534a86..792318aaa30c 100644
--- a/tools/testing/selftests/ptrace/.gitignore
+++ b/tools/testing/selftests/ptrace/.gitignore
@@ -1,3 +1,4 @@
 # SPDX-License-Identifier: GPL-2.0-only
 get_syscall_info
 peeksiginfo
+vmaccess
-- 
2.26.2



[tip: perf/core] perf bench: Fix div-by-zero if runtime is zero

2020-05-08 Thread tip-bot2 for Tommi Rantala
The following commit has been merged into the perf/core branch of tip:

Commit-ID: 41e7c32b978974adaadd4808ba42f9026634dca3
Gitweb:
https://git.kernel.org/tip/41e7c32b978974adaadd4808ba42f9026634dca3
Author:Tommi Rantala 
AuthorDate:Fri, 17 Apr 2020 16:23:29 +03:00
Committer: Arnaldo Carvalho de Melo 
CommitterDate: Wed, 22 Apr 2020 10:01:33 -03:00

perf bench: Fix div-by-zero if runtime is zero

Fix div-by-zero if runtime is zero:

  $ perf bench futex hash --runtime=0
  # Running 'futex/hash' benchmark:
  Run summary [PID 12090]: 4 threads, each operating on 1024 [private] futexes 
for 0 secs.
  Floating point exception (core dumped)

Signed-off-by: Tommi Rantala 
Acked-by: Jiri Olsa 
Cc: Alexander Shishkin 
Cc: Darren Hart 
Cc: Mark Rutland 
Cc: Namhyung Kim 
Cc: Peter Zijlstra 
Cc: Thomas Gleixner 
Link: 
http://lore.kernel.org/lkml/20200417132330.119407-4-tommi.t.rant...@nokia.com
Signed-off-by: Arnaldo Carvalho de Melo 
---
 tools/perf/bench/epoll-wait.c| 3 ++-
 tools/perf/bench/futex-hash.c| 3 ++-
 tools/perf/bench/futex-lock-pi.c | 3 ++-
 3 files changed, 6 insertions(+), 3 deletions(-)

diff --git a/tools/perf/bench/epoll-wait.c b/tools/perf/bench/epoll-wait.c
index f938c58..cf79736 100644
--- a/tools/perf/bench/epoll-wait.c
+++ b/tools/perf/bench/epoll-wait.c
@@ -519,7 +519,8 @@ int bench_epoll_wait(int argc, const char **argv)
qsort(worker, nthreads, sizeof(struct worker), cmpworker);
 
for (i = 0; i < nthreads; i++) {
-   unsigned long t = worker[i].ops / bench__runtime.tv_sec;
+   unsigned long t = bench__runtime.tv_sec > 0 ?
+   worker[i].ops / bench__runtime.tv_sec : 0;
 
update_stats(&throughput_stats, t);
 
diff --git a/tools/perf/bench/futex-hash.c b/tools/perf/bench/futex-hash.c
index 65eebe0..915bf3d 100644
--- a/tools/perf/bench/futex-hash.c
+++ b/tools/perf/bench/futex-hash.c
@@ -205,7 +205,8 @@ int bench_futex_hash(int argc, const char **argv)
pthread_mutex_destroy(&thread_lock);
 
for (i = 0; i < nthreads; i++) {
-   unsigned long t = worker[i].ops / bench__runtime.tv_sec;
+   unsigned long t = bench__runtime.tv_sec > 0 ?
+   worker[i].ops / bench__runtime.tv_sec : 0;
update_stats(&throughput_stats, t);
if (!silent) {
if (nfutexes == 1)
diff --git a/tools/perf/bench/futex-lock-pi.c b/tools/perf/bench/futex-lock-pi.c
index 89fd8f3..bb25d8b 100644
--- a/tools/perf/bench/futex-lock-pi.c
+++ b/tools/perf/bench/futex-lock-pi.c
@@ -211,7 +211,8 @@ int bench_futex_lock_pi(int argc, const char **argv)
pthread_mutex_destroy(&thread_lock);
 
for (i = 0; i < nthreads; i++) {
-   unsigned long t = worker[i].ops / bench__runtime.tv_sec;
+   unsigned long t = bench__runtime.tv_sec > 0 ?
+   worker[i].ops / bench__runtime.tv_sec : 0;
 
update_stats(&throughput_stats, t);
if (!silent)


[tip: perf/core] perf cgroup: Avoid needless closing of unopened fd

2020-05-08 Thread tip-bot2 for Tommi Rantala
The following commit has been merged into the perf/core branch of tip:

Commit-ID: d2e7d8636fb7d3e30aa8f894003f9e293ea62eea
Gitweb:
https://git.kernel.org/tip/d2e7d8636fb7d3e30aa8f894003f9e293ea62eea
Author:Tommi Rantala 
AuthorDate:Fri, 17 Apr 2020 16:23:26 +03:00
Committer: Arnaldo Carvalho de Melo 
CommitterDate: Wed, 22 Apr 2020 10:01:33 -03:00

perf cgroup: Avoid needless closing of unopened fd

Do not bother with close() if fd is not valid, just to silence valgrind:

$ valgrind ./perf script
==59169== Memcheck, a memory error detector
==59169== Copyright (C) 2002-2017, and GNU GPL'd, by Julian Seward et al.
==59169== Using Valgrind-3.14.0 and LibVEX; rerun with -h for copyright info
==59169== Command: ./perf script
==59169==
==59169== Warning: invalid file descriptor -1 in syscall close()
==59169== Warning: invalid file descriptor -1 in syscall close()
==59169== Warning: invalid file descriptor -1 in syscall close()
==59169== Warning: invalid file descriptor -1 in syscall close()
==59169== Warning: invalid file descriptor -1 in syscall close()
==59169== Warning: invalid file descriptor -1 in syscall close()
==59169== Warning: invalid file descriptor -1 in syscall close()
==59169== Warning: invalid file descriptor -1 in syscall close()

Signed-off-by: Tommi Rantala 
Acked-by: Jiri Olsa 
Cc: Alexander Shishkin 
Cc: Mark Rutland 
Cc: Namhyung Kim 
Cc: Peter Zijlstra 
Link: 
http://lore.kernel.org/lkml/20200417132330.119407-1-tommi.t.rant...@nokia.com
Signed-off-by: Arnaldo Carvalho de Melo 
---
 tools/perf/util/cgroup.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/tools/perf/util/cgroup.c b/tools/perf/util/cgroup.c
index b73fb78..050dea9 100644
--- a/tools/perf/util/cgroup.c
+++ b/tools/perf/util/cgroup.c
@@ -107,7 +107,8 @@ found:
 
 static void cgroup__delete(struct cgroup *cgroup)
 {
-   close(cgroup->fd);
+   if (cgroup->fd >= 0)
+   close(cgroup->fd);
zfree(&cgroup->name);
free(cgroup);
 }


[tip: perf/core] perf test session topology: Fix data path

2020-05-08 Thread tip-bot2 for Tommi Rantala
The following commit has been merged into the perf/core branch of tip:

Commit-ID: dbd660e6b2884b864d2642d930a163d3bcebe4be
Gitweb:
https://git.kernel.org/tip/dbd660e6b2884b864d2642d930a163d3bcebe4be
Author:Tommi Rantala 
AuthorDate:Thu, 23 Apr 2020 14:53:40 +03:00
Committer: Arnaldo Carvalho de Melo 
CommitterDate: Thu, 23 Apr 2020 11:08:24 -03:00

perf test session topology: Fix data path

Commit 2d4f27999b88 ("perf data: Add global path holder") missed path
conversion in tests/topology.c, causing the "Session topology" testcase
to "hang" (waits forever for input from stdin) when doing "ssh $VM perf
test".

Can be reproduced by running "cat | perf test topo", and crashed by
replacing cat with true:

  $ true | perf test -v topo
  40: Session topology  :
  --- start ---
  test child forked, pid 3638
  templ file: /tmp/perf-test-QPvAch
  incompatible file format
  incompatible file format (rerun with -v to learn more)
  free(): invalid pointer
  test child interrupted
   end 
  Session topology: FAILED!

Committer testing:

Reproduced the above result before the patch and after it is back
working:

  # true | perf test -v topo
  41: Session topology  :
  --- start ---
  test child forked, pid 19374
  templ file: /tmp/perf-test-YOTEQg
  CPU 0, core 0, socket 0
  CPU 1, core 1, socket 0
  CPU 2, core 2, socket 0
  CPU 3, core 3, socket 0
  CPU 4, core 0, socket 0
  CPU 5, core 1, socket 0
  CPU 6, core 2, socket 0
  CPU 7, core 3, socket 0
  test child finished with 0
   end 
  Session topology: Ok
  #

Fixes: 2d4f27999b88 ("perf data: Add global path holder")
Signed-off-by: Tommi Rantala 
Tested-by: Arnaldo Carvalho de Melo 
Acked-by: Jiri Olsa 
Cc: Alexander Shishkin 
Cc: Mamatha Inamdar 
Cc: Mark Rutland 
Cc: Namhyung Kim 
Cc: Peter Zijlstra 
Cc: Ravi Bangoria 
Link: 
http://lore.kernel.org/lkml/20200423115341.562782-1-tommi.t.rant...@nokia.com
Signed-off-by: Arnaldo Carvalho de Melo 
---
 tools/perf/tests/topology.c | 12 
 1 file changed, 4 insertions(+), 8 deletions(-)

diff --git a/tools/perf/tests/topology.c b/tools/perf/tests/topology.c
index 4a80049..22daf2b 100644
--- a/tools/perf/tests/topology.c
+++ b/tools/perf/tests/topology.c
@@ -33,10 +33,8 @@ static int session_write_header(char *path)
 {
struct perf_session *session;
struct perf_data data = {
-   .file  = {
-   .path = path,
-   },
-   .mode  = PERF_DATA_MODE_WRITE,
+   .path = path,
+   .mode = PERF_DATA_MODE_WRITE,
};
 
session = perf_session__new(&data, false, NULL);
@@ -63,10 +61,8 @@ static int check_cpu_topology(char *path, struct 
perf_cpu_map *map)
 {
struct perf_session *session;
struct perf_data data = {
-   .file  = {
-   .path = path,
-   },
-   .mode  = PERF_DATA_MODE_READ,
+   .path = path,
+   .mode = PERF_DATA_MODE_READ,
};
int i;
 


[tip:perf/core] perf tests shell: Skip trace+probe_vfs_getname.sh if built without trace support

2019-02-27 Thread tip-bot for Tommi Rantala
Commit-ID:  83244772a4cf9490a54182be2f65f45d6b1a1ee8
Gitweb: https://git.kernel.org/tip/83244772a4cf9490a54182be2f65f45d6b1a1ee8
Author: Tommi Rantala 
AuthorDate: Fri, 15 Feb 2019 15:42:46 +0200
Committer:  Arnaldo Carvalho de Melo 
CommitDate: Fri, 15 Feb 2019 13:42:26 -0300

perf tests shell: Skip trace+probe_vfs_getname.sh if built without trace support

If perf was built without trace support, the trace+probe_vfs_getname.sh
'perf test' entry fails:

  # perf trace -h
  perf: 'trace' is not a perf-command. See 'perf --help'

  # perf test 64
  64: Check open filename arg using perf trace + vfs_getname: FAILED!

Check trace support, so that we'll skip the test in that case:

  # perf test 64
  64: Check open filename arg using perf trace + vfs_getname: Skip

Signed-off-by: Tommi Rantala 
Cc: Alexander Shishkin 
Cc: Hendrik Brueckner 
Cc: Jiri Olsa 
Cc: Kim Phillips 
Cc: Michael Petlan 
Cc: Namhyung Kim 
Cc: Peter Zijlstra 
Link: http://lkml.kernel.org/r/20190215134253.11454-1-tt.rant...@gmail.com
Signed-off-by: Arnaldo Carvalho de Melo 
---
 tools/perf/tests/shell/lib/probe.sh   | 5 +
 tools/perf/tests/shell/trace+probe_vfs_getname.sh | 1 +
 2 files changed, 6 insertions(+)

diff --git a/tools/perf/tests/shell/lib/probe.sh 
b/tools/perf/tests/shell/lib/probe.sh
index 6293cc660947..e37787be672b 100644
--- a/tools/perf/tests/shell/lib/probe.sh
+++ b/tools/perf/tests/shell/lib/probe.sh
@@ -4,3 +4,8 @@ skip_if_no_perf_probe() {
perf probe 2>&1 | grep -q 'is not a perf-command' && return 2
return 0
 }
+
+skip_if_no_perf_trace() {
+   perf trace -h 2>&1 | grep -q -e 'is not a perf-command' -e 'trace 
command not available' && return 2
+   return 0
+}
diff --git a/tools/perf/tests/shell/trace+probe_vfs_getname.sh 
b/tools/perf/tests/shell/trace+probe_vfs_getname.sh
index 50109f27ca07..147efeb6b195 100755
--- a/tools/perf/tests/shell/trace+probe_vfs_getname.sh
+++ b/tools/perf/tests/shell/trace+probe_vfs_getname.sh
@@ -12,6 +12,7 @@
 . $(dirname $0)/lib/probe.sh
 
 skip_if_no_perf_probe || exit 2
+skip_if_no_perf_trace || exit 2
 
 . $(dirname $0)/lib/probe_vfs_getname.sh
 


[PATCH] perf tests shell: Skip trace+probe_vfs_getname.sh if built without trace support

2019-02-15 Thread Tommi Rantala
From: Tommi Rantala 

If perf was built without trace support, trace+probe_vfs_getname.sh
fails:

  # perf trace -h
  perf: 'trace' is not a perf-command. See 'perf --help'

  # perf test 64
  64: Check open filename arg using perf trace + vfs_getname: FAILED!

Check trace support, so that we'll skip the test:

  # perf test 64
  64: Check open filename arg using perf trace + vfs_getname: Skip

Signed-off-by: Tommi Rantala 
---
 tools/perf/tests/shell/lib/probe.sh   | 5 +
 tools/perf/tests/shell/trace+probe_vfs_getname.sh | 1 +
 2 files changed, 6 insertions(+)

diff --git a/tools/perf/tests/shell/lib/probe.sh 
b/tools/perf/tests/shell/lib/probe.sh
index 6293cc660947..e37787be672b 100644
--- a/tools/perf/tests/shell/lib/probe.sh
+++ b/tools/perf/tests/shell/lib/probe.sh
@@ -4,3 +4,8 @@ skip_if_no_perf_probe() {
perf probe 2>&1 | grep -q 'is not a perf-command' && return 2
return 0
 }
+
+skip_if_no_perf_trace() {
+   perf trace -h 2>&1 | grep -q -e 'is not a perf-command' -e 'trace 
command not available' && return 2
+   return 0
+}
diff --git a/tools/perf/tests/shell/trace+probe_vfs_getname.sh 
b/tools/perf/tests/shell/trace+probe_vfs_getname.sh
index 50109f27ca07..147efeb6b195 100755
--- a/tools/perf/tests/shell/trace+probe_vfs_getname.sh
+++ b/tools/perf/tests/shell/trace+probe_vfs_getname.sh
@@ -12,6 +12,7 @@
 . $(dirname $0)/lib/probe.sh
 
 skip_if_no_perf_probe || exit 2
+skip_if_no_perf_trace || exit 2
 
 . $(dirname $0)/lib/probe_vfs_getname.sh
 
-- 
2.20.1



perf trace --no-syscalls -e rcu:* -- garbage in output

2018-06-26 Thread Tommi Rantala

Hi,

There some garbage in perf trace output, when tracing some rcu 
tracepoints (kernel is configured with CONFIG_RCU_TRACE=y).


For example in rcu:rcu_callback, instead of getting proper rcuname in 
the first "%s" here, there's garbage:


$ tail -1 /sys/kernel/debug/tracing/events/rcu/rcu_callback/format
print fmt: "%s rhp=%p func=%pf %ld/%ld", REC->rcuname, REC->rhp, 
REC->func, REC->qlen_lazy, REC->qlen


$ ./perf trace --no-syscalls -e 'rcu:*' -- sleep 1 2>&1 | od -t c

000   0   .   0   0   0   r   c   u   :   r
020   c   u   _   c   a   l   l   b   a   c   k   : 200 351 345 215
040 377 377 377 377   r   h   p   =   0   x   f   f   f   f   8
060   d   5   5   8   f   5   b   0   1   0   0   f   u   n   c
100   =   f   i   l   e   _   f   r   e   e   _   r   c   u   0
120   /   1  \n   0   .   0   0   8   r   c
140   u   :   r   c   u   _   d   y   n   t   i   c   k   : 373 217
160 276 215 377 377 377 377   4   0   0   0   0   0   0   0   0
200   0   0   0   0   0   0   0   4   0   0   0   0   0   0   0
220   0   0   0   0   0   0   0   2   0   x   1   d   2  \n
240   0   .   0   2   3   r   c   u   :   r   c
260   u   _   d   y   n   t   i   c   k   : 023 220 276 215 377 377
300 377 377   4   0   0   0   0   0   0   0   0   0   0   0   0
320   0   0   2   4   0   0   0   0   0   0   0   0   0   0   0
340   0   0   0   0   0   x   1   d   2  \n
360   0   .   0   2   8   r   c   u   :   r   c   u   _   d   y
400   n   t   i   c   k   : 373 217 276 215 377 377 377 377   4
420   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
440   4   0   0   0   0   0   0   0   0   0   0   0   0   0   0   2
460   0   x   1   d   2  \n   0   .   0   3
[...]


If I got it right, "pevent" in print_str_arg() is zero-initialized 
(pevent->long_size is zero...), causing "%s" format to produce garbage 
bytes instead of the proper string.


-Tommi


Re: backporting "ext4: inplace xattr block update fails to deduplicate blocks" to LTS kernels?

2018-02-22 Thread Tommi Rantala

On 21.02.2018 17:56, Theodore Ts'o wrote:

On Wed, Feb 21, 2018 at 12:40:00PM +0100, Greg Kroah-Hartman wrote:

On Mon, Feb 19, 2018 at 03:26:37PM +0200, Tommi Rantala wrote:


OK to backport it?
I tested it briefly in 4.9, seems to work.


It looks sane, but it would be nice if I can get people who are
backporting ext4 patches to make sure there are no regressions using
one of kvm-xfstests[1] or gce-xfstests[2][3].

[1] 
https://github.com/tytso/xfstests-bld/blob/master/Documentation/kvm-xfstests.md
[2] 
https://github.com/tytso/xfstests-bld/blob/master/Documentation/gce-xfstests.md
[3] https://thunk.org/gce-xfstests

I do run regression tests[4] on stable kernels when I have time, but
it scales much better when other people can help.

[4] 
https://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4.git/tag/?h=ext4-4.9.54-1


I need an ack from the ext4 maintainers before I can take this...


Greg, you can go ahead and take this, but in the future I'd appreciate
it if ext4 backporters could at least run a smoke test (which takes
less than 15 minutes on GCE) before and after the patch, and report no
test regressions.


Thanks for the instructions!

Smoke test results 4.9.82 with and without the patch (attached, to avoid 
email client mangling it), no new failures:


 Summary report
KERNEL:kernel 4.9.82-xfstests #2 SMP Thu Feb 22 14:58:27 EET 2018 x86_64
CPUS:  2
MEM:   1989.2

ext4/4k: 271 tests, 7 failures, 34 skipped, 737 seconds
  Failures: generic/081 generic/383 generic/384 generic/386
generic/441 generic/451 generic/472
Totals: 271 tests, 34 skipped, 7 failures, 0 errors, 685s


 Summary report
KERNEL:kernel 4.9.82-xfstests-1-gb98ae0251413 #1 SMP Thu Feb 22 
14:31:01 EET 2018 x86_64

CPUS:  2
MEM:   1989.2

ext4/4k: 271 tests, 7 failures, 34 skipped, 749 seconds
  Failures: generic/081 generic/383 generic/384 generic/386
generic/441 generic/451 generic/472
Totals: 271 tests, 34 skipped, 7 failures, 0 errors, 694s

FSTESTVER: e2fsprogs v1.43.6-85-g7595699d0 (Wed, 6 Sep 2017 22:04:14 -0400)
FSTESTVER: fio  fio-3.2 (Fri, 3 Nov 2017 15:23:49 -0600)
FSTESTVER: quota  4d81e8b (Mon, 16 Oct 2017 09:42:44 +0200)
FSTESTVER: stress-ng 977ae35 (Wed, 6 Sep 2017 23:45:03 -0400)
FSTESTVER: xfsprogs v4.14.0-rc2-1-g19ca9b0b (Mon, 27 Nov 2017 10:56:21 
-0600)

FSTESTVER: xfstests-bld ff7b8c2 (Wed, 13 Dec 2017 21:24:24 -0500)
FSTESTVER: xfstests linux-v3.8-1832-gafeee2d9 (Sun, 31 Dec 2017 13:35:28 
-0500)

FSTESTCFG: 4k
FSTESTSET: -g quick
FSTESTOPT: aex


Tommi
>From b98ae025141361b9e92fdd470dfd2314a64a47d0 Mon Sep 17 00:00:00 2001
From: Tahsin Erdogan 
Date: Sat, 5 Aug 2017 22:41:42 -0400
Subject: [PATCH] ext4: inplace xattr block update fails to deduplicate blocks

commit ec00022030da5761518476096626338bd67df57a upstream.

When an xattr block has a single reference, block is updated inplace
and it is reinserted to the cache. Later, a cache lookup is performed
to see whether an existing block has the same contents. This cache
lookup will most of the time return the just inserted entry so
deduplication is not achieved.

Running the following test script will produce two xattr blocks which
can be observed in "File ACL: " line of debugfs output:

  mke2fs -b 1024 -I 128 -F -O extent /dev/sdb 1G
  mount /dev/sdb /mnt/sdb

  touch /mnt/sdb/{x,y}

  setfattr -n user.1 -v aaa /mnt/sdb/x
  setfattr -n user.2 -v bbb /mnt/sdb/x

  setfattr -n user.1 -v aaa /mnt/sdb/y
  setfattr -n user.2 -v bbb /mnt/sdb/y

  debugfs -R 'stat x' /dev/sdb | cat
  debugfs -R 'stat y' /dev/sdb | cat

This patch defers the reinsertion to the cache so that we can locate
other blocks with the same contents.

Signed-off-by: Tahsin Erdogan 
Signed-off-by: Theodore Ts'o 
Reviewed-by: Andreas Dilger 
Signed-off-by: Tommi Rantala 
---
 fs/ext4/xattr.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c
index 3eeed8f0aa06..3fadfabcac39 100644
--- a/fs/ext4/xattr.c
+++ b/fs/ext4/xattr.c
@@ -837,8 +837,6 @@ ext4_xattr_block_set(handle_t *handle, struct inode *inode,
 if (!IS_LAST_ENTRY(s->first))
 	ext4_xattr_rehash(header(s->base),
 			  s->here);
-ext4_xattr_cache_insert(ext4_mb_cache,
-	bs->bh);
 			}
 			ext4_xattr_block_csum_set(inode, bs->bh);
 			unlock_buffer(bs->bh);
@@ -959,6 +957,7 @@ ext4_xattr_block_set(handle_t *handle, struct inode *inode,
 		} else if (bs->bh && s->base == bs->bh->b_data) {
 			/* We were modifying this block in-place. */
 			ea_bdebug(bs->bh, "keeping this block");
+			ext4_xattr_cache_insert(ext4_mb_cache, bs->bh);
 			new_bh = bs->bh;
 			get_bh(new_bh);
 		} else {
-- 
2.14.3



Re: net: hang in unregister_netdevice: waiting for lo to become free

2018-02-21 Thread Tommi Rantala

On 20.02.2018 18:26, Neil Horman wrote:

On Tue, Feb 20, 2018 at 09:14:41AM +0100, Dmitry Vyukov wrote:

On Tue, Feb 20, 2018 at 8:56 AM, Tommi Rantala
 wrote:

On 19.02.2018 20:59, Dmitry Vyukov wrote:

Is this meant to be fixed already? I am still seeing this on the
latest upstream tree.



These two commits are in v4.16-rc1:

commit 4a31a6b19f9ddf498c81f5c9b089742b7472a6f8
Author: Tommi Rantala 
Date:   Mon Feb 5 21:48:14 2018 +0200

 sctp: fix dst refcnt leak in sctp_v4_get_dst
...
 Fixes: 410f03831 ("sctp: add routing output fallback")
 Fixes: 0ca50d12f ("sctp: fix src address selection if using secondary
addresses")


commit 957d761cf91cdbb175ad7d8f5472336a4d54dbf2
Author: Alexey Kodanev 
Date:   Mon Feb 5 15:10:35 2018 +0300

 sctp: fix dst refcnt leak in sctp_v6_get_dst()
...
 Fixes: dbc2b5e9a09e ("sctp: fix src address selection if using secondary
addresses for ipv6")


I guess we missed something if it's still reproducible.

I can check it later this week, unless someone else beat me to it.


Hi Tommi,

Hmmm, I can't claim that it's exactly the same bug. Perhaps it's
another one then. But I am still seeing these:

[   58.799130] unregister_netdevice: waiting for lo to become free.
Usage count = 4
[   60.847138] unregister_netdevice: waiting for lo to become free.
Usage count = 4
[   62.895093] unregister_netdevice: waiting for lo to become free.
Usage count = 4
[   64.943103] unregister_netdevice: waiting for lo to become free.
Usage count = 4

on upstream tree pulled ~12 hours ago.


Can you write a systemtap script to probe dev_hold, and dev_put, printing out a
backtrace if the device name matches "lo".  That should tell us definitively if
the problem is in the same location or not


Hi Dmitry, I tested with the reproducer and the kernel .config file that 
you sent in the first email in this thread:


With 4.16-rc2 unable to reproduce.

With 4.15-rc9 bug reproducible, and I get "unregister_netdevice: waiting 
for lo to become free. Usage count = 3"


With 4.15-rc9 and Alexey's "sctp: fix dst refcnt leak in 
sctp_v6_get_dst()" cherry-picked on top, unable to reproduce.



Is syzkaller doing something else now to trigger the bug...?
Can you still trigger the bug with the same reproducer?


Tommi


Re: net: hang in unregister_netdevice: waiting for lo to become free

2018-02-19 Thread Tommi Rantala

On 19.02.2018 20:59, Dmitry Vyukov wrote:

On Sat, Feb 3, 2018 at 1:15 PM, Xin Long  wrote:

On 1/30/18 1:57 PM, David Ahern wrote:

On 1/30/18 1:08 PM, Daniel Borkmann wrote:

On 01/30/2018 07:32 PM, Cong Wang wrote:

On Tue, Jan 30, 2018 at 4:09 AM, Dmitry Vyukov  wrote:

Hello,

The following program creates a hang in unregister_netdevice.
cleanup_net work hangs there forever periodically printing
"unregister_netdevice: waiting for lo to become free. Usage count = 3"
and creation of any new network namespaces hangs forever.


Interestingly, this is not reproducible on net-next.


The most recent change on netns refcnt was 4ee806d51176 ("net: tcp: close
sock if net namespace is exiting") in net/net-next from 5 days ago, maybe
fixed due to that?



This appears to be the commit introducing the refcnt leak:

$ git bisect bad
dbc2b5e9a09e9a6664679a667ff81cff6e5f2641 is the first bad commit
commit dbc2b5e9a09e9a6664679a667ff81cff6e5f2641
Author: Xin Long 
Date:   Fri May 12 14:39:52 2017 +0800

 sctp: fix src address selection if using secondary addresses for ipv6


v4.14 is bad. Running bisect in the background while doing other things



Interesting. The commit that avoids the refcnt leak is

commit 955ec4cb3b54c7c389a9f830be7d3ae2056b9212
Author: David Ahern 
Date:   Wed Jan 24 19:45:29 2018 -0800

 net/ipv6: Do not allow route add with a device that is down

That commit does not intentionally address the problem so it is just
masking the problematic code introduced by the commit above.

Thanks, David A.

I'm still on a trip. will look into this asap.


Alexey and Tommi already had the patches for this issue on
both SCTP v4 and v6 dst_get, Thanks.




Is this meant to be fixed already? I am still seeing this on the
latest upstream tree.



These two commits are in v4.16-rc1:

commit 4a31a6b19f9ddf498c81f5c9b089742b7472a6f8
Author: Tommi Rantala 
Date:   Mon Feb 5 21:48:14 2018 +0200

sctp: fix dst refcnt leak in sctp_v4_get_dst
...
Fixes: 410f03831 ("sctp: add routing output fallback")
Fixes: 0ca50d12f ("sctp: fix src address selection if using 
secondary addresses")



commit 957d761cf91cdbb175ad7d8f5472336a4d54dbf2
Author: Alexey Kodanev 
Date:   Mon Feb 5 15:10:35 2018 +0300

sctp: fix dst refcnt leak in sctp_v6_get_dst()
...
Fixes: dbc2b5e9a09e ("sctp: fix src address selection if using 
secondary addresses for ipv6")



I guess we missed something if it's still reproducible.

I can check it later this week, unless someone else beat me to it.

Tommi


backporting "ext4: inplace xattr block update fails to deduplicate blocks" to LTS kernels?

2018-02-19 Thread Tommi Rantala

Hi,

4.9 (and earlier) LTS kernels are missing this:

commit ec00022030da5761518476096626338bd67df57a
Author: Tahsin Erdogan 
Date:   Sat Aug 5 22:41:42 2017 -0400

ext4: inplace xattr block update fails to deduplicate blocks


OK to backport it?
I tested it briefly in 4.9, seems to work.

One of our testers noticed a glusterfs performance regression when going 
from 4.4 to 4.9, caused by the duplicated blocks.


In I understand everything correctly, in 4.4 mbcache uses the block 
number in the hash table bucket calculation, and the hash table is 
populated quite evenly even if there are duplicates. So the mbcache is fast.


But in later kernels mbcache puts all the duplicate entries into a 
single bucket. As the entries are stored in one big linked list, this 
obviously makes the mbcache slow.



I tested this in 4.9 (which still has the ext4_xattr_rehash() call that 
got eliminated in commit "ext4: eliminate xattr entry e_hash 
recalculation for removes"):



diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c
index 3eeed8f0aa06..3fadfabcac39 100644
--- a/fs/ext4/xattr.c
+++ b/fs/ext4/xattr.c
@@ -837,8 +837,6 @@ ext4_xattr_block_set(handle_t *handle, struct inode 
*inode,

if (!IS_LAST_ENTRY(s->first))
ext4_xattr_rehash(header(s->base),
  s->here);
-   ext4_xattr_cache_insert(ext4_mb_cache,
-   bs->bh);
}
ext4_xattr_block_csum_set(inode, bs->bh);
unlock_buffer(bs->bh);
@@ -959,6 +957,7 @@ ext4_xattr_block_set(handle_t *handle, struct inode 
*inode,

} else if (bs->bh && s->base == bs->bh->b_data) {
/* We were modifying this block in-place. */
ea_bdebug(bs->bh, "keeping this block");
+   ext4_xattr_cache_insert(ext4_mb_cache, bs->bh);
new_bh = bs->bh;
get_bh(new_bh);
} else {



Tommi



intel_pmu_init() extra_attr memory leak

2017-12-19 Thread Tommi Rantala

Hi,

I'm seeing this kmemleak report in v4.15-rc4:

# cat /sys/kernel/debug/kmemleak
unreferenced object 0x8801f3d5d720 (size 64):
  comm "swapper/0", pid 1, jiffies 4294667312 (age 2687.423s)
  hex dump (first 32 bytes):
60 d1 41 ad ff ff ff ff 20 d1 41 ad ff ff ff ff  `.A. .A.
80 d0 41 ad ff ff ff ff 40 d0 41 ad ff ff ff ff  ..A.@.A.
  backtrace:
[] intel_pmu_init+0x1844/0x1d38
[] init_hw_perf_events+0x8c/0x66f
[] do_one_initcall+0x7b/0x1d0
[<8ee1f02a>] kernel_init_freeable+0x163/0x2f9
[] kernel_init+0xf/0x120
[<38a99264>] ret_from_fork+0x24/0x30
[] 0x


$ ./scripts/faddr2line vmlinux intel_pmu_init+0x1844/0x1d38
intel_pmu_init+0x1844/0x1d38:
intel_pmu_init at arch/x86/events/intel/core.c:4296


Which matches line:
extra_attr = merge_attr(extra_attr, skl_format_attr);


So looks like "extra_attr" is leaked here.


"git blame" points to this commit:

commit a5df70c354c26e20d5fd8eb64517f724e97ef0b2
Author: Andi Kleen 
Date:   Tue Aug 22 11:52:00 2017 -0700

perf/x86: Only show format attributes when supported



-Tommi


Re: [PATCH net v2] tipc: call tipc_rcv() only if bearer is up in tipc_udp_recv()

2017-12-01 Thread Tommi Rantala

On 01.12.2017 15:18, Ying Xue wrote:

On 11/30/2017 08:32 PM, Tommi Rantala wrote:

In my opinion, the real root cause of the issue is because we too early
set a not-yet-initialized bearer instance to ub->bearer through
rcu_assign_pointer(ub->bearer, b) in tipc_udp_enable(). Instead if we
assign the bearer pointer at the end of tipc_udp_enable() where the
bearer has been completed the initialization, the issue would be avoided.

Hi, sorry, I fail to see how that helps.

bearer->tolerance is only initialized in tipc_enable_bearer() after
calling m->enable_media() ie. tipc_udp_enable().

So even if we do "rcu_assign_pointer(ub->bearer, b)" later in
tipc_udp_enable(), bearer->tolerance will still be uninitialized, and
the crash can happen.


Sorry, I missed the point that b->tolerance is not uninitialized when we
assign bearer pointer to ub->bearer later.

But in my view the issue happened is because we enable media too early.
So it's better to change the code as belows:


Thanks, looks good to me!

Tested in 4.4 (which does not have b->up), and this fixes the oops also 
there.


-Tommi



diff --git a/net/tipc/bearer.c b/net/tipc/bearer.c
index 47ec121..ec6f02a 100644
--- a/net/tipc/bearer.c
+++ b/net/tipc/bearer.c
@@ -320,19 +320,18 @@ static int tipc_enable_bearer(struct net *net,
const char *name,

 strcpy(b->name, name);
 b->media = m;
-   res = m->enable_media(net, b, attr);
-   if (res) {
-   pr_warn("Bearer <%s> rejected, enable failure (%d)\n",
-   name, -res);
-   return -EINVAL;
-   }
-
 b->identity = bearer_id;
 b->tolerance = m->tolerance;
 b->window = m->window;
 b->domain = disc_domain;
 b->net_plane = bearer_id + 'A';
 b->priority = priority;
+   res = m->enable_media(net, b, attr);
+   if (res) {
+   pr_warn("Bearer <%s> rejected, enable failure (%d)\n",
+   name, -res);
+   return -EINVAL;
+   }
 test_and_set_bit_lock(0, &b->up);



Re: [PATCH net v2] tipc: call tipc_rcv() only if bearer is up in tipc_udp_recv()

2017-11-30 Thread Tommi Rantala

On 30.11.2017 12:57, Ying Xue wrote:

On 11/29/2017 06:48 PM, Tommi Rantala wrote:

Remove the second tipc_rcv() call in tipc_udp_recv(). We have just
checked that the bearer is not up, and calling tipc_rcv() with a bearer
that is not up leads to a TIPC div-by-zero crash in
tipc_node_calculate_timer(). The crash is rare in practice, but can
happen like this:


In my opinion, the real root cause of the issue is because we too early
set a not-yet-initialized bearer instance to ub->bearer through
rcu_assign_pointer(ub->bearer, b) in tipc_udp_enable(). Instead if we
assign the bearer pointer at the end of tipc_udp_enable() where the
bearer has been completed the initialization, the issue would be avoided.


Hi, sorry, I fail to see how that helps.

bearer->tolerance is only initialized in tipc_enable_bearer() after 
calling m->enable_media() ie. tipc_udp_enable().


So even if we do "rcu_assign_pointer(ub->bearer, b)" later in 
tipc_udp_enable(), bearer->tolerance will still be uninitialized, and 
the crash can happen.


BR,
Tommi


Thanks,
Ying



   We're enabling a bearer, but it's not yet up and fully initialized.
   At the same time we receive a discovery packet, and in tipc_udp_recv()
   we end up calling tipc_rcv() with the not-yet-initialized bearer,
   causing later the div-by-zero crash in tipc_node_calculate_timer().

Jon Maloy explains the impact of removing the second tipc_rcv() call:
   "link setup in the worst case will be delayed until the next arriving
discovery messages, 1 sec later, and this is an acceptable delay."

As the tipc_rcv() call is removed, just leave the function via the
rcu_out label, so that we will kfree_skb().

[   12.590450] Own node address <1.1.1>, network identity 1
[   12.668088] divide error:  [#1] SMP
[   12.676952] CPU: 2 PID: 0 Comm: swapper/2 Not tainted 4.14.2-dirty #1
[   12.679225] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 
1.10.2-2.fc27 04/01/2014
[   12.682095] task: 8c2a761edb80 task.stack: a41cc0cac000
[   12.684087] RIP: 0010:tipc_node_calculate_timer.isra.12+0x45/0x60 [tipc]
[   12.686486] RSP: 0018:8c2a7fc838a0 EFLAGS: 00010246
[   12.688451] RAX:  RBX: 8c2a5b382600 RCX: 
[   12.691197] RDX:  RSI: 8c2a5b382600 RDI: 8c2a5b382600
[   12.693945] RBP: 8c2a7fc838b0 R08: 0001 R09: 0001
[   12.696632] R10:  R11:  R12: 8c2a5d8949d8
[   12.699491] R13: 95ede400 R14:  R15: 8c2a5d894800
[   12.702338] FS:  () GS:8c2a7fc8() 
knlGS:
[   12.705099] CS:  0010 DS:  ES:  CR0: 80050033
[   12.706776] CR2: 01bb9440 CR3: bd009001 CR4: 003606e0
[   12.708847] DR0:  DR1:  DR2: 
[   12.711016] DR3:  DR6: fffe0ff0 DR7: 0400
[   12.712627] Call Trace:
[   12.713390]  
[   12.714011]  tipc_node_check_dest+0x2e8/0x350 [tipc]
[   12.715286]  tipc_disc_rcv+0x14d/0x1d0 [tipc]
[   12.716370]  tipc_rcv+0x8b0/0xd40 [tipc]
[   12.717396]  ? minmax_running_min+0x2f/0x60
[   12.718248]  ? dst_alloc+0x4c/0xa0
[   12.718964]  ? tcp_ack+0xaf1/0x10b0
[   12.719658]  ? tipc_udp_is_known_peer+0xa0/0xa0 [tipc]
[   12.720634]  tipc_udp_recv+0x71/0x1d0 [tipc]
[   12.721459]  ? dst_alloc+0x4c/0xa0
[   12.722130]  udp_queue_rcv_skb+0x264/0x490
[   12.722924]  __udp4_lib_rcv+0x21e/0x990
[   12.723670]  ? ip_route_input_rcu+0x2dd/0xbf0
[   12.724442]  ? tcp_v4_rcv+0x958/0xa40
[   12.725039]  udp_rcv+0x1a/0x20
[   12.725587]  ip_local_deliver_finish+0x97/0x1d0
[   12.726323]  ip_local_deliver+0xaf/0xc0
[   12.726959]  ? ip_route_input_noref+0x19/0x20
[   12.727689]  ip_rcv_finish+0xdd/0x3b0
[   12.728307]  ip_rcv+0x2ac/0x360
[   12.728839]  __netif_receive_skb_core+0x6fb/0xa90
[   12.729580]  ? udp4_gro_receive+0x1a7/0x2c0
[   12.730274]  __netif_receive_skb+0x1d/0x60
[   12.730953]  ? __netif_receive_skb+0x1d/0x60
[   12.731637]  netif_receive_skb_internal+0x37/0xd0
[   12.732371]  napi_gro_receive+0xc7/0xf0
[   12.732920]  receive_buf+0x3c3/0xd40
[   12.733441]  virtnet_poll+0xb1/0x250
[   12.733944]  net_rx_action+0x23e/0x370
[   12.734476]  __do_softirq+0xc5/0x2f8
[   12.734922]  irq_exit+0xfa/0x100
[   12.735315]  do_IRQ+0x4f/0xd0
[   12.735680]  common_interrupt+0xa2/0xa2
[   12.736126]  
[   12.736416] RIP: 0010:native_safe_halt+0x6/0x10
[   12.736925] RSP: 0018:a41cc0cafe90 EFLAGS: 0246 ORIG_RAX: 
ff4d
[   12.737756] RAX:  RBX: 8c2a761edb80 RCX: 
[   12.738504] RDX:  RSI:  RDI: 
[   12.739258] RBP: a41cc0cafe90 R08: 014b5b9795e5 R09: a41cc12c7e88
[   12.740118] R10:  R11:  R12: 0002
[   12.740964] R13: 8c2a761edb80 R14

[PATCH net v2] tipc: call tipc_rcv() only if bearer is up in tipc_udp_recv()

2017-11-29 Thread Tommi Rantala
ff8200
(relocation range: 0x8000-0xbfff)
[   12.751215] Rebooting in 60 seconds..

Fixes: c9b64d492b1f ("tipc: add replicast peer discovery")
Signed-off-by: Tommi Rantala 
Cc: Jon Maloy 
---

v2: Resorted to a minimal fix, as per feedback from David M.

 net/tipc/udp_media.c | 4 
 1 file changed, 4 deletions(-)

diff --git a/net/tipc/udp_media.c b/net/tipc/udp_media.c
index ecca64fc6a6f..3deabcab4882 100644
--- a/net/tipc/udp_media.c
+++ b/net/tipc/udp_media.c
@@ -371,10 +371,6 @@ static int tipc_udp_recv(struct sock *sk, struct sk_buff 
*skb)
goto rcu_out;
}
 
-   tipc_rcv(sock_net(sk), skb, b);
-   rcu_read_unlock();
-   return 0;
-
 rcu_out:
rcu_read_unlock();
 out:
-- 
2.14.3



Re: [PATCH] tipc: call tipc_rcv() only if bearer is up in tipc_udp_recv()

2017-11-28 Thread Tommi Rantala

On 28.11.2017 16:58, David Miller wrote:

From: Tommi Rantala 
Date: Tue, 28 Nov 2017 14:53:15 +0200


-
-   if (unlikely(msg_user(hdr) == LINK_CONFIG)) {
-   err = tipc_udp_rcast_disc(b, skb);
-   if (err)
-   goto rcu_out;
+   } else {
+   if (unlikely(b && msg_user(hdr) == LINK_CONFIG))
+   tipc_udp_rcast_disc(b, skb);
+   kfree_skb(skb);
}


Either put the 'err' propagation back or clearly explain in your
commit log message why this part of the change if absolutely essential
for this bug fix.

Thank you.



Thanks for the feedback. I'll post patch v2 soon.

-Tommi


[PATCH] tipc: call tipc_rcv() only if bearer is up in tipc_udp_recv()

2017-11-28 Thread Tommi Rantala
Call tipc_rcv() only if bearer is up in tipc_udp_recv().
Fixes a rare TIPC div-by-zero crash in tipc_node_calculate_timer():

We're enabling a bearer, but it's not yet up and fully initialized.
At the same time we receive a discovery packet, and in tipc_udp_recv()
we end up calling tipc_rcv() with the not-yet-initialized bearer,
causing later a div-by-zero crash in tipc_node_calculate_timer().

[   12.590450] Own node address <1.1.1>, network identity 1
[   12.668088] divide error:  [#1] SMP
[   12.676952] CPU: 2 PID: 0 Comm: swapper/2 Not tainted 4.14.2-dirty #1
[   12.679225] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 
1.10.2-2.fc27 04/01/2014
[   12.682095] task: 8c2a761edb80 task.stack: a41cc0cac000
[   12.684087] RIP: 0010:tipc_node_calculate_timer.isra.12+0x45/0x60 [tipc]
[   12.686486] RSP: 0018:8c2a7fc838a0 EFLAGS: 00010246
[   12.688451] RAX:  RBX: 8c2a5b382600 RCX: 
[   12.691197] RDX:  RSI: 8c2a5b382600 RDI: 8c2a5b382600
[   12.693945] RBP: 8c2a7fc838b0 R08: 0001 R09: 0001
[   12.696632] R10:  R11:  R12: 8c2a5d8949d8
[   12.699491] R13: 95ede400 R14:  R15: 8c2a5d894800
[   12.702338] FS:  () GS:8c2a7fc8() 
knlGS:
[   12.705099] CS:  0010 DS:  ES:  CR0: 80050033
[   12.706776] CR2: 01bb9440 CR3: bd009001 CR4: 003606e0
[   12.708847] DR0:  DR1:  DR2: 
[   12.711016] DR3:  DR6: fffe0ff0 DR7: 0400
[   12.712627] Call Trace:
[   12.713390]  
[   12.714011]  tipc_node_check_dest+0x2e8/0x350 [tipc]
[   12.715286]  tipc_disc_rcv+0x14d/0x1d0 [tipc]
[   12.716370]  tipc_rcv+0x8b0/0xd40 [tipc]
[   12.717396]  ? minmax_running_min+0x2f/0x60
[   12.718248]  ? dst_alloc+0x4c/0xa0
[   12.718964]  ? tcp_ack+0xaf1/0x10b0
[   12.719658]  ? tipc_udp_is_known_peer+0xa0/0xa0 [tipc]
[   12.720634]  tipc_udp_recv+0x71/0x1d0 [tipc]
[   12.721459]  ? dst_alloc+0x4c/0xa0
[   12.722130]  udp_queue_rcv_skb+0x264/0x490
[   12.722924]  __udp4_lib_rcv+0x21e/0x990
[   12.723670]  ? ip_route_input_rcu+0x2dd/0xbf0
[   12.724442]  ? tcp_v4_rcv+0x958/0xa40
[   12.725039]  udp_rcv+0x1a/0x20
[   12.725587]  ip_local_deliver_finish+0x97/0x1d0
[   12.726323]  ip_local_deliver+0xaf/0xc0
[   12.726959]  ? ip_route_input_noref+0x19/0x20
[   12.727689]  ip_rcv_finish+0xdd/0x3b0
[   12.728307]  ip_rcv+0x2ac/0x360
[   12.728839]  __netif_receive_skb_core+0x6fb/0xa90
[   12.729580]  ? udp4_gro_receive+0x1a7/0x2c0
[   12.730274]  __netif_receive_skb+0x1d/0x60
[   12.730953]  ? __netif_receive_skb+0x1d/0x60
[   12.731637]  netif_receive_skb_internal+0x37/0xd0
[   12.732371]  napi_gro_receive+0xc7/0xf0
[   12.732920]  receive_buf+0x3c3/0xd40
[   12.733441]  virtnet_poll+0xb1/0x250
[   12.733944]  net_rx_action+0x23e/0x370
[   12.734476]  __do_softirq+0xc5/0x2f8
[   12.734922]  irq_exit+0xfa/0x100
[   12.735315]  do_IRQ+0x4f/0xd0
[   12.735680]  common_interrupt+0xa2/0xa2
[   12.736126]  
[   12.736416] RIP: 0010:native_safe_halt+0x6/0x10
[   12.736925] RSP: 0018:a41cc0cafe90 EFLAGS: 0246 ORIG_RAX: 
ff4d
[   12.737756] RAX:  RBX: 8c2a761edb80 RCX: 
[   12.738504] RDX:  RSI:  RDI: 
[   12.739258] RBP: a41cc0cafe90 R08: 014b5b9795e5 R09: a41cc12c7e88
[   12.740118] R10:  R11:  R12: 0002
[   12.740964] R13: 8c2a761edb80 R14:  R15: 
[   12.741831]  default_idle+0x2a/0x100
[   12.742323]  arch_cpu_idle+0xf/0x20
[   12.742796]  default_idle_call+0x28/0x40
[   12.743312]  do_idle+0x179/0x1f0
[   12.743761]  cpu_startup_entry+0x1d/0x20
[   12.744291]  start_secondary+0x112/0x120
[   12.744816]  secondary_startup_64+0xa5/0xa5
[   12.745367] Code: b9 f4 01 00 00 48 89 c2 48 c1 ea 02 48 3d d3 07 00
00 48 0f 47 d1 49 8b 0c 24 48 39 d1 76 07 49 89 14 24 48 89 d1 31 d2 48
89 df <48> f7 f1 89 c6 e8 81 6e ff ff 5b 41 5c 5d c3 66 90 66 2e 0f 1f
[   12.747527] RIP: tipc_node_calculate_timer.isra.12+0x45/0x60 [tipc] RSP: 
8c2a7fc838a0
[   12.748555] ---[ end trace 1399ab83390650fd ]---
[   12.749296] Kernel panic - not syncing: Fatal exception in interrupt
[   12.750123] Kernel Offset: 0x1320 from 0x8200
(relocation range: 0x8000-0xbfff)
[   12.751215] Rebooting in 60 seconds..

Fixes: c9b64d492b1f ("tipc: add replicast peer discovery")
Signed-off-by: Tommi Rantala 
---
 net/tipc/udp_media.c | 29 +++--
 1 file changed, 7 insertions(+), 22 deletions(-)

diff --git a/net/tipc/udp_media.c b/net/tipc/udp_media.c
index ecca64fc6a6f..599e7be92024 100644
--- a/net/tipc/udp_media.c
+++ b/net/tipc/udp_media.c
@@ -344,42 +344,27 @@

tipc_node_calculate_timer div-by-zero

2017-11-27 Thread Tommi Rantala

Hi,

I'm seeing a rare TIPC div-by-zero crash in tipc_node_calculate_timer().

If I get it right, we're receiving a discovery packet while enabling a 
bearer. The bearer is not yet fully initialized, causing the discovery 
packet processing to use zero tolerance value, which then causes the 
div-by-zero in tipc_node_calculate_timer().


I can reproduce the issue easily by adding some sleep in 
tipc_enable_bearer(), and the machine always crashes after booting it up:


diff --git a/net/tipc/bearer.c b/net/tipc/bearer.c
index 47ec121574ce..00bdd87cd614 100644
--- a/net/tipc/bearer.c
+++ b/net/tipc/bearer.c
@@ -327,12 +327,16 @@ static int tipc_enable_bearer(struct net *net, 
const char *name,

return -EINVAL;
}

+   pr_warn("HACK: sleeping 2 seconds!");
+   usleep_range(200, 210);
+
b->identity = bearer_id;
b->tolerance = m->tolerance;
b->window = m->window;


In 4.14 removing the latter tipc_rcv() call in tipc_udp_recv() [which 
was added in in commit c9b64d492b1f (tipc: add replicast peer 
discovery)] seems to fix the crash, but I do not really understand the code:


diff --git a/net/tipc/udp_media.c b/net/tipc/udp_media.c
index ecca64fc6a6f..371653acf1f6 100644
--- a/net/tipc/udp_media.c
+++ b/net/tipc/udp_media.c
@@ -371,9 +371,11 @@ static int tipc_udp_recv(struct sock *sk, struct 
sk_buff *skb)

goto rcu_out;
}

+   /*
tipc_rcv(sock_net(sk), skb, b);
rcu_read_unlock();
return 0;
+   */

 rcu_out:
rcu_read_unlock();



I have tested 4.4 and 4.14, and the bug is reproducible in both versions.

Comments?

-Tommi



[   12.576885] tipc: Activated (version 2.0.0)
[   12.577506] NET: Registered protocol family 30
[   12.578212] tipc: Started in single node mode
[   12.589907] Started in network mode
[   12.590450] Own node address <1.1.1>, network identity 1
[   12.592022] HACK: sleeping 2 seconds!
[   12.668088] divide error:  [#1] SMP
[   12.670993] Modules linked in: tipc ip6_udp_tunnel udp_tunnel nfsd 
auth_rpcgss oid_registry nfs_acl nfs lockd grace sunrpc isofs 
aesni_intel aes_x86_64 crypto_simd cryptd glue_helper ata_piix 
sch_fq_codel autofs4
[   12.676952] CPU: 2 PID: 0 Comm: swapper/2 Not tainted 
4.14.2-pc64-dirty #1
[   12.679225] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), 
BIOS 1.10.2-2.fc27 04/01/2014

[   12.682095] task: 8c2a761edb80 task.stack: a41cc0cac000
[   12.684087] RIP: 0010:tipc_node_calculate_timer.isra.12+0x45/0x60 [tipc]
[   12.686486] RSP: 0018:8c2a7fc838a0 EFLAGS: 00010246
[   12.688451] RAX:  RBX: 8c2a5b382600 RCX: 

[   12.691197] RDX:  RSI: 8c2a5b382600 RDI: 
8c2a5b382600
[   12.693945] RBP: 8c2a7fc838b0 R08: 0001 R09: 
0001
[   12.696632] R10:  R11:  R12: 
8c2a5d8949d8
[   12.699491] R13: 95ede400 R14:  R15: 
8c2a5d894800
[   12.702338] FS:  () GS:8c2a7fc8() 
knlGS:

[   12.705099] CS:  0010 DS:  ES:  CR0: 80050033
[   12.706776] CR2: 01bb9440 CR3: bd009001 CR4: 
003606e0
[   12.708847] DR0:  DR1:  DR2: 

[   12.711016] DR3:  DR6: fffe0ff0 DR7: 
0400

[   12.712627] Call Trace:
[   12.713390]  
[   12.714011]  tipc_node_check_dest+0x2e8/0x350 [tipc]
[   12.715286]  tipc_disc_rcv+0x14d/0x1d0 [tipc]
[   12.716370]  tipc_rcv+0x8b0/0xd40 [tipc]
[   12.717396]  ? minmax_running_min+0x2f/0x60
[   12.718248]  ? dst_alloc+0x4c/0xa0
[   12.718964]  ? tcp_ack+0xaf1/0x10b0
[   12.719658]  ? tipc_udp_is_known_peer+0xa0/0xa0 [tipc]
[   12.720634]  tipc_udp_recv+0x71/0x1d0 [tipc]
[   12.721459]  ? dst_alloc+0x4c/0xa0
[   12.722130]  udp_queue_rcv_skb+0x264/0x490
[   12.722924]  __udp4_lib_rcv+0x21e/0x990
[   12.723670]  ? ip_route_input_rcu+0x2dd/0xbf0
[   12.724442]  ? tcp_v4_rcv+0x958/0xa40
[   12.725039]  udp_rcv+0x1a/0x20
[   12.725587]  ip_local_deliver_finish+0x97/0x1d0
[   12.726323]  ip_local_deliver+0xaf/0xc0
[   12.726959]  ? ip_route_input_noref+0x19/0x20
[   12.727689]  ip_rcv_finish+0xdd/0x3b0
[   12.728307]  ip_rcv+0x2ac/0x360
[   12.728839]  __netif_receive_skb_core+0x6fb/0xa90
[   12.729580]  ? udp4_gro_receive+0x1a7/0x2c0
[   12.730274]  __netif_receive_skb+0x1d/0x60
[   12.730953]  ? __netif_receive_skb+0x1d/0x60
[   12.731637]  netif_receive_skb_internal+0x37/0xd0
[   12.732371]  napi_gro_receive+0xc7/0xf0
[   12.732920]  receive_buf+0x3c3/0xd40
[   12.733441]  virtnet_poll+0xb1/0x250
[   12.733944]  net_rx_action+0x23e/0x370
[   12.734476]  __do_softirq+0xc5/0x2f8
[   12.734922]  irq_exit+0xfa/0x100
[   12.735315]  do_IRQ+0x4f/0xd0
[   12.735680]  common_interrupt+0xa2/0xa2
[   12.736126]  
[   12.736416] RIP: 0010:native_safe_halt+0x6/0x10
[   12.736925] RSP: 0018:a41cc0cafe90 EFLAGS

Re: tipc_udp_send_msg oops in 4.4 when setting link tolerance

2017-11-15 Thread Tommi Rantala

On 14.11.2017 13:35, Jon Maloy wrote:

Found it, the missing patch is this one (9b3009604b8e does not help):

commit d01332f1acacc0cb43a61f4244dd2b846d4cd585
Author: Richard Alpe 
Date:   Mon Feb 1 08:19:56 2016 +0100

  tipc: fix link attribute propagation bug


It does not apply as-is to 4.4, so backported it, see below.
Does it look good? I can send it forward to Greg for inclusion in 4.4.


Yes. I would be very grateful if you do.


OK, sent the patch to sta...@vger.kernel.org


But with this patch included, I can easily reproduce the "BUG: Bad page state
in process git" issue also in 4.4 like this:

$ tipc link set tolerance 100 link $LINKNAME $ cd /tmp && git clone
/path/to/linux-stable

I can try to debug that a bit more to see if I can figure it out.


I would appreciate that. If this turns out to also be an already fixed bug 
(most likely) you can send that directly to GKH too.
If you find a bug that requires a new patch send it to tipc-discussion for 
review first.

Finally, I want to say that I very much appreciate users like you, who try 
solving the problems themselves and contribute to the code.
We are right now in an upgrade phase of TIPC, where we have been/are adding new 
features to TIPC (overlapping ring neighbor monitoring, group communication, 
new addressing model, performance improvements...) which inevitably entail some 
new bugs, but I expect the frequency of these changes/improvements to TIPC go 
down in a few months, and with that the number of fixes needed.


Thanks!

After more testing, I noticed that I could only reproduce the "BUG: Bad 
page state" in a single machine, so maybe something wrong with the 
machine, and not with tipc.


-Tommi


Re: tipc_udp_send_msg oops in 4.4 when setting link tolerance

2017-11-14 Thread Tommi Rantala

On 13.11.2017 23:25, Jon Maloy wrote:
> Hi Tommi,
> I am not sure, but is seems like the following patch is what you need:
> commit 9b3009604b8e ("tipc: add net device to skb before UDP xmit")
> This was applied in tipc 4.5.

Found it, the missing patch is this one (9b3009604b8e does not help):

commit d01332f1acacc0cb43a61f4244dd2b846d4cd585
Author: Richard Alpe 
Date:   Mon Feb 1 08:19:56 2016 +0100

tipc: fix link attribute propagation bug


It does not apply as-is to 4.4, so backported it, see below.
Does it look good? I can send it forward to Greg for inclusion in 4.4.


But with this patch included, I can easily reproduce the "BUG: Bad page 
state in process git" issue also in 4.4 like this:


$ tipc link set tolerance 100 link $LINKNAME
$ cd /tmp && git clone /path/to/linux-stable

I can try to debug that a bit more to see if I can figure it out.

-Tommi



From e1857e6c60355296fd1cbe6e376d8a7265c2b289 Mon Sep 17 00:00:00 2001
From: Richard Alpe 
Date: Tue, 14 Nov 2017 11:09:50 +0200
Subject: [PATCH] tipc: fix link attribute propagation bug

commit d01332f1acacc0cb43a61f4244dd2b846d4cd585 upstream.

[backported to 4.4 by Tommi Rantala]

Changing certain link attributes (link tolerance and link priority)
from the TIPC management tool is supposed to automatically take
effect at both endpoints of the affected link.

Currently the media address is not instantiated for the link and is
used uninstantiated when crafting protocol messages designated for the
peer endpoint. This means that changing a link property currently
results in the property being changed on the local machine but the
protocol message designated for the peer gets lost. Resulting in
property discrepancy between the endpoints.

In this patch we resolve this by using the media address from the
link entry and using the bearer transmit function to send it. Hence,
we can now eliminate the redundant function tipc_link_prot_xmit() and
the redundant field tipc_link::media_addr.

Fixes: 2af5ae372a4b (tipc: clean up unused code and structures)
Reviewed-by: Jon Maloy 
Reported-by: Jason Hu 
Signed-off-by: Richard Alpe 
Signed-off-by: David S. Miller 
Signed-off-by: Tommi Rantala 
---
 net/tipc/link.c | 28 ++--
 net/tipc/link.h |  1 - 

 2 files changed, 6 insertions(+), 23 deletions(-) 




diff --git a/net/tipc/link.c b/net/tipc/link.c 

index 72268eac4ec7..736fffb28ab6 100644 

--- a/net/tipc/link.c 

+++ b/net/tipc/link.c 

@@ -1084,25 +1084,6 @@ drop: 

return rc; 

 } 




-/* 

- * Send protocol message to the other endpoint. 

- */ 

-void tipc_link_proto_xmit(struct tipc_link *l, u32 msg_typ, int 
probe_msg,
- u32 gap, u32 tolerance, u32 priority) 

-{ 

-   struct sk_buff *skb = NULL; 

-   struct sk_buff_head xmitq; 

- 

-   __skb_queue_head_init(&xmitq); 

-   tipc_link_build_proto_msg(l, msg_typ, probe_msg, gap, 

- tolerance, priority, &xmitq); 

-   skb = __skb_dequeue(&xmitq); 

-   if (!skb) 

-   return; 

-   tipc_bearer_xmit_skb(l->net, l->bearer_id, skb, l->media_addr); 

-   l->rcv_unacked = 0; 

-} 

- 

 static void tipc_link_build_proto_msg(struct tipc_link *l, int mtyp, 
bool probe,
  u16 rcvgap, int tolerance, int 
priority,
  struct sk_buff_head *xmitq) 

@@ -1636,9 +1617,12 @@ int tipc_nl_link_set(struct sk_buff *skb, struct 
genl_info *info)
char *name; 


struct tipc_link *link;
struct tipc_node *node;
+   struct sk_buff_head xmitq;
struct nlattr *attrs[TIPC_NLA_LINK_MAX + 1];
struct net *net = sock_net(skb->sk);

+   __skb_queue_head_init(&xmitq);
+
if (!info->attrs[TIPC_NLA_LINK])
return -EINVAL;

@@ -1683,14 +1667,14 @@ int tipc_nl_link_set(struct sk_buff *skb, struct 
genl_info *info)


tol = nla_get_u32(props[TIPC_NLA_PROP_TOL]);
link->tolerance = tol;
-   tipc_link_proto_xmit(link, STATE_MSG, 0, 0, tol, 0);
+   tipc_link_build_proto_msg(link, STATE_MSG, 0, 0, 
tol, 0, &xmitq);

}
if (props[TIPC_NLA_PROP_PRIO]) {
u32 prio;

prio = nla_get_u32(props[TIPC_NLA_PROP_PRIO]);
link->priority = prio;
-   tipc_link_proto_xmit(link, STATE_MSG, 0, 0, 0, 
prio);
+   tipc_link_build_proto_msg(link, STATE_MSG, 0, 0, 
0, prio, &xmitq);

}
if (props[TIPC_NLA_PROP_WIN]) {
u32 win;
@@ -1702,7 +1686,7 @@ int tipc_nl_link_set(struct sk_buff *skb, struct 
genl_info *info)


 out:
tipc_node_unlock(node);
-
+   tipc_bearer_xmit(net, bearer_id, &xmitq, 
&node->lin

tipc_udp_send_msg oops in 4.4 when setting link tolerance

2017-11-13 Thread Tommi Rantala

Hi,

I always get an instant TIPC oops in 4.4, when I try to set the link 
tolerance (with LINKNAME != "broadcast-link"):


 $ tipc link set tolerance 1000 link $LINKNAME

Any idea what's going on? Some tipc patch missing in 4.4?

In 4.9 the "tipc" command executes just fine, but I've seen a few times 
that later some random process crashes with "BUG: Bad page state". KASAN 
does not report anything before it happens.


4.14 is OK, could not reproduce these problems with it.




tipc_udp_send_msg+0x102/0x4f0

matches to:
tipc_udp_send_msg at linux-stable/net/tipc/udp_media.c:172

static int tipc_udp_send_msg(struct net *net, struct sk_buff *skb,
 struct tipc_bearer *b,
 struct tipc_media_addr *dest)
{
int ttl, err = 0;
struct udp_bearer *ub;
struct udp_media_addr *dst = (struct udp_media_addr *)&dest->value;
struct udp_media_addr *src = (struct udp_media_addr 
*)&b->addr.value;

struct rtable *rt;

if (skb_headroom(skb) < UDP_MIN_HEADROOM) {
err = pskb_expand_head(skb, UDP_MIN_HEADROOM, 0, 
GFP_ATOMIC);

if (err)
goto tx_error;
}

skb_set_inner_protocol(skb, htons(ETH_P_TIPC));
ub = rcu_dereference_rtnl(b->media_ptr);
if (!ub) {
err = -ENODEV;
goto tx_error;
}
if (dst->proto == htons(ETH_P_IP)) {   <-- HERE



[  111.423647] 
==

[  111.424826] BUG: KASAN: null-ptr-deref on address   (null)
[  111.425538] Read of size 2 by task tipc/2643
[  111.426215] CPU: 3 PID: 2643 Comm: tipc Not tainted 4.4.97-pc64 #1
[  111.428081]   880026327478 8248005e 
0002
[  111.429476]  880047ad5ac0 8800263274f8 8227f5af 
000265711040
[  111.430728]   0297 a0387fd2 
02090220

[  111.432051] Call Trace:
[  111.432472]  [] dump_stack+0x86/0xc8
[  111.433208]  [] kasan_report.part.2+0x41f/0x520
[  111.434040]  [] ? tipc_udp_send_msg+0x102/0x4f0 [tipc]
[  111.434908]  [] kasan_report+0x25/0x30
[  111.435647]  [] __asan_load2+0x66/0x70
[  111.436391]  [] tipc_udp_send_msg+0x102/0x4f0 [tipc]
[  111.437334]  [] ? kasan_kmalloc+0x5e/0x70
[  111.438301]  [] ? kasan_slab_alloc+0xd/0x10
[  111.439328]  [] ? 
__kmalloc_node_track_caller+0xac/0x230

[  111.440493]  [] ? kasan_kmalloc+0x5e/0x70
[  111.441479]  [] ? tipc_udp_disable+0xe0/0xe0 [tipc]
[  111.442628]  [] ? kasan_kmalloc+0x5e/0x70
[  111.443598]  [] ? kasan_krealloc+0x62/0x80
[  111.444610]  [] ? memset+0x28/0x30
[  111.445539]  [] ? __alloc_skb+0x2b3/0x310
[  111.446560]  [] ? skb_complete_tx_timestamp+0x110/0x110
[  111.447695]  [] ? __module_text_address+0x16/0xa0
[  111.448735]  [] ? skb_put+0x8b/0xd0
[  111.449608]  [] ? memcpy+0x36/0x40
[  111.450524]  [] ? 
tipc_link_build_proto_msg+0x398/0x4c0 [tipc]

[  111.451946]  [] tipc_bearer_xmit_skb+0xa0/0xb0 [tipc]
[  111.453078]  [] tipc_link_proto_xmit+0x11b/0x160 [tipc]
[  111.454218]  [] ? 
tipc_link_build_reset_msg+0x50/0x50 [tipc]

[  111.455542]  [] tipc_nl_link_set+0x1ee/0x3b0 [tipc]
[  111.456659]  [] ? tipc_nl_parse_link_prop+0xd0/0xd0 
[tipc]

[  111.457831]  [] ? is_ftrace_trampoline+0x59/0x90
[  111.458884]  [] ? __kernel_text_address+0x65/0x80
[  111.459931]  [] ? nla_parse+0xb6/0x140
[  111.460892]  [] genl_family_rcv_msg+0x37e/0x5e0
[  111.461948]  [] ? set_orig_addr.isra.53+0xe5/0x120 
[tipc]

[  111.463107]  [] ? genl_rcv+0x40/0x40
[  111.463987]  [] ? alloc_debug_processing+0x154/0x180
[  111.465048]  [] ? ___slab_alloc+0x43d/0x460
[  111.465986]  [] ? alloc_debug_processing+0x154/0x180
[  111.467045]  [] ? netlink_lookup+0x19c/0x220
[  111.468067]  [] genl_rcv_msg+0xd8/0x110
[  111.468994]  [] netlink_rcv_skb+0x14b/0x180
[  111.469939]  [] ? genl_family_rcv_msg+0x5e0/0x5e0
[  111.470954]  [] genl_rcv+0x28/0x40
[  111.471798]  [] netlink_unicast+0x2e7/0x3a0
[  111.472806]  [] ? netlink_attachskb+0x330/0x330
[  111.473845]  [] ? copy_from_iter+0xf1/0x3b0
[  111.474847]  [] netlink_sendmsg+0x4ad/0x620
[  111.475788]  [] ? netlink_unicast+0x3a0/0x3a0
[  111.476793]  [] ? __fdget+0x13/0x20
[  111.477723]  [] ? sockfd_lookup_light+0x95/0xb0
[  111.478773]  [] SYSC_sendto+0x1bc/0x290
[  111.479659]  [] ? sock_write_iter+0x200/0x200
[  111.480692]  [] ? __fdget+0x13/0x20
[  111.481559]  [] ? sockfd_lookup_light+0x95/0xb0
[  111.482591]  [] ? netlink_getname+0xb1/0x110
[  111.483570]  [] ? move_addr_to_user+0x5c/0x70
[  111.484539]  [] ? SYSC_getsockname+0x176/0x190
[  111.485540]  [] ? sockfd_lookup_light+0xb0/0xb0
[  111.486558]  [] ? SYSC_bind+0xe5/0x180
[  111.487548]  [] ? __sock_recv_ts_and_drops+0x260/0x260
[  111.488700]  [] ? fd_install+0x3b/0x50
[  111.489596]  [] ? sock_map_fd+0x44/0x70
[  111.490553]  [] ? SyS_socket+0xcc/0x120
[  111.491437]  [] ? move_addr_to_kernel+0x40/0x40
[  111.492505] 

4.9.30 NULL pointer dereference in __remove_shared_vm_struct

2017-06-07 Thread Tommi Rantala

Hi,

I have hit this kernel bug twice with 4.9.30 while running trinity, any 
ideas? It's not easily reproducible.


Perhaps I should enable some more debug options to see if they reveal 
anything...


(note that I had different kernel builds, so the IP addresses are 
different in the logs below)



$ scripts/faddr2line vmlinux __remove_shared_vm_struct+0x16/0x40
__remove_shared_vm_struct+0x16/0x40:
atomic_inc at arch/x86/include/asm/atomic.h:91
 (inlined by) __remove_shared_vm_struct at mm/mmap.c:137


(gdb) disassemble __remove_shared_vm_struct
Dump of assembler code for function __remove_shared_vm_struct:
   0x8218e7a0 <+0>: callq  0x825db650 <__fentry__>
   0x8218e7a5 <+5>: mov0x50(%rdi),%rax
   0x8218e7a9 <+9>: push   %rbp
   0x8218e7aa <+10>:mov%rsp,%rbp
   0x8218e7ad <+13>:test   $0x8,%ah
   0x8218e7b0 <+16>:je 0x8218e7c1 
<__remove_shared_vm_struct+33>

   0x8218e7b2 <+18>:mov0x20(%rsi),%rax
   0x8218e7b6 <+22>:lock incl 0x158(%rax)
   0x8218e7bd <+29>:mov0x50(%rdi),%rax
   0x8218e7c1 <+33>:test   $0x8,%al
   0x8218e7c3 <+35>:je 0x8218e7c9 
<__remove_shared_vm_struct+41>

   0x8218e7c5 <+37>:lock decl 0x1c(%rdx)
   0x8218e7c9 <+41>:lea0x20(%rdx),%rsi
   0x8218e7cd <+45>:callq  0x82183460 


   0x8218e7d2 <+50>:pop%rbp
   0x8218e7d3 <+51>:retq




[16076.230255] BUG: unable to handle kernel NULL pointer dereference at 
0158

[16076.231566] IP: [] __remove_shared_vm_struct+0x16/0x40
[16076.232533] PGD 0
[16076.233125] Oops: 0002 [#1] SMP
[16076.233631] Modules linked in: fuse tun bridge hmac 8021q garp stp 
llc2 af_key llc rds xfrm_user xfrm_algo nfnetlink dccp_ipv6 sctp 
libcrc32c dccp_ipv4 dccp iptable_filter ip_tables x_tables isofs 
ata_piix autofs4

[16076.236688] CPU: 10 PID: 10753 Comm: trinity-main Not tainted 4.9.30 #1
[16076.238917] task: 880285b58000 task.stack: c90108d4c000
[16076.239741] RIP: 0010:[]  [] 
__remove_shared_vm_struct+0x16/0x40

[16076.241085] RSP: 0018:c90108d4fd38  EFLAGS: 00010202
[16076.241841] RAX:  RBX: 8801568867e8 RCX: 

[16076.242807] RDX: 88032c7581d8 RSI: 88012af34a00 RDI: 
8801568867e8
[16076.243773] RBP: c90108d4fd38 R08: 880156886b80 R09: 
7fffcf5d4000
[16076.244737] R10:  R11: 0001 R12: 
88012af34a00
[16076.245698] R13: 88032c758200 R14: 88032c7581d8 R15: 
8801568868a0
[16076.246659] FS:  () GS:88033348() 
knlGS:

[16076.247864] CS:  0010 DS:  ES:  CR0: 80050033
[16076.248667] CR2: 0158 CR3: 00c07000 CR4: 
06e0
[16076.249634] DR0: 7f54c4cae000 DR1: 7ff1276c9000 DR2: 

[16076.250599] DR3:  DR6: fffe0ff0 DR7: 
0600

[16076.251563] Stack:
[16076.251942]  c90108d4fd68 8038ef91 8801568867e8 
0040
[16076.253139]   c90108d4fdc0 c90108d4fda8 
80387893
[16076.254335]    8801d1126c00 


[16076.255528] Call Trace:
[16076.255959]  [] unlink_file_vma+0x41/0x60
[16076.256746]  [] free_pgtables+0x43/0x120
[16076.257520]  [] exit_mmap+0xb2/0x150
[16076.258258]  [] mmput+0x3b/0x100
[16076.258953]  [] do_exit+0x255/0xb20
[16076.259673]  [] ? syscall_trace_enter+0x1c1/0x2d0
[16076.260538]  [] do_group_exit+0x43/0xb0
[16076.261303]  [] SyS_exit_group+0x14/0x20
[16076.262078]  [] do_syscall_64+0x7e/0x1a0
[16076.262852]  [] entry_SYSCALL64_slow_path+0x25/0x25
[16076.263736] Code: 3d 00 20 00 00 48 0f 47 c2 48 89 05 cd dc 95 00 31 
c0 c3 66 90 0f 1f 44 00 00 48 8b 47 50 55 48 89 e5 f6 c4 08 74 0f 48 8b 
46 20  ff 80 58 01 00 00 48 8b 47 50 a8 08 74 04 f0 ff 4a 1c 48 8d

[16076.267481] RIP  [] __remove_shared_vm_struct+0x16/0x40
[16076.268424]  RSP 
[16076.268973] CR2: 0158
[16076.269844] ---[ end trace 98a1bbd8d9e50234 ]---
[16076.270565] Fixing recursive fault but reboot is needed!





[69086.066173] Out of memory: Kill process 2485 (trinity-c309) score 503 
or sacrifice child
[69086.067383] Killed process 2485 (trinity-c309) total-vm:73816kB, 
anon-rss:7196kB, file-rss:3940kB, shmem-rss:17248kB
[69086.071158] oom_reaper: reaped process 2485 (trinity-c309), now 
anon-rss:0kB, file-rss:0kB, shmem-rss:17248kB

[69089.763240] scsi_nl_rcv_msg: discarding partial skb
[69093.568099] scsi_nl_rcv_msg: discarding partial skb
[69095.925546] BUG: unable to handle kernel NULL pointer dereference at 
0158

[69095.926875] IP: [] __remove_shared_vm_struct+0x16/0x40
[69095.927836] PGD 0
[69095.928411] Oops: 0002 [#1] SMP
[69095.928934] Modules linked in: fuse tun 8021q xfrm_user garp 
dccp_ipv6 dccp_ipv4 dccp sctp bridge llc2 rds st

Re: nouveau "eDP-1: EDID is invalid" regression after 4.11 with HP ZBook 15 G3

2017-06-03 Thread Tommi Rantala
2017-05-15 8:20 GMT+03:00 Tommi Rantala :
> 2017-05-15 3:03 GMT+03:00 Ben Skeggs :
>> On 05/15/2017 01:10 AM, Tommi Rantala wrote:
>>>
>>> Hi,
>>
>> Hey Tommi,
>>
>> Thanks for bisecting this.  It's rather unexpected that you should be seeing
>> problems here, but, the commit makes sense for it at least.
>>
>> Are you able to get me new kernel logs of both before and after this patch
>> with "log_buf_len=8M drm.debug=0x14
>> nouveau.debug=disp=trace,i2c=trace,bios=trace" please?
>
> Hi Ben,
>
> Before:
> https://www.dropbox.com/s/b2namqtqvzv5ppp/trace.4.10.0-tr-10409-g5c68d91?dl=1
>
> After:
> https://www.dropbox.com/s/9url8qdo15959fy/trace.4.10.0-tr-10410-gdf8dc97?dl=1

Hi, any comments to this?

-Tommi

> -Tommi
>
>> Thanks,
>> Ben.
>>
>>
>>>
>>> Bisected this to:
>>>
>>> commit df8dc97cd17269474344d73cc02739532c468d04
>>> Author: Ben Skeggs 
>>> Date:   Wed Mar 1 09:42:04 2017 +1000
>>>
>>> drm/nouveau/kms/nv50: use drm core i2c-over-aux algorithm
>>>
>>> I'm not entirely sure NVKM needs to support this now, but I haven't
>>> removed it as of yet just in case it's needed from DEVINIT scripts
>>> where DRM isn't available.
>>>
>>> Signed-off-by: Ben Skeggs 
>>>
>>>
>>> dmesg after boot with drm.debug enabled:
>>>
>>> v4.10-10409-g5c68d91 (still works):
>>> http://termbin.com/b0is
>>>
>>> v4.10-10410-gdf8dc97 (failure):
>>> http://termbin.com/j6lq
>>>
>>>
>>> Tommi
>>>
>>>
>>> 2017-05-10 11:24 GMT+03:00 Tommi Rantala :
>>>>
>>>> Hi,
>>>>
>>>> The HP ZBook 15 G3 laptop builtin display (eDP-1) does not work
>>>> correctly with v4.11-11413-g2868b25.
>>>>
>>>> When booting the laptop, the resolution seems to be limited to
>>>> 1024x768, and gnome-session segfaults.
>>>>
>>>> Up to 4.11 the display works just fine in 1920x1080 mode.
>>>>
>>>> I'm seeing this in the kernel logs:
>>>>
>>>> nouveau :01:00.0: eDP-1: EDID is invalid:
>>>>  [00] BAD  00 ff ff ff ff ff ff 00 ff ff ff ff ff ff ff ff
>>>>  [00] BAD  ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff
>>>>  [00] BAD  ff ff ff ff ff ff ff ff ff ff ff ff ff 84 53 54
>>>>  [00] BAD  66 69 50 55 57 66 74 49 48 ff ff ff ff ff ff ff
>>>>  [00] BAD  ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff
>>>>  [00] BAD  ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff
>>>>  [00] BAD  ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff
>>>>  [00] BAD  ff ff ff ff ff ff ff ff ff ff ff 00 00 ff 00 ff
>>>> nouveau :01:00.0: DRM: DDC responded, but no EDID for eDP-1
>>>> [drm] Cannot find any crtc or sizes - going 1024x768
>>>>
>>>>
>>>> $ lspci | grep NVIDIA
>>>> 01:00.0 VGA compatible controller: NVIDIA Corporation GM107GLM [Quadro
>>>> M2000M] (rev a2)
>>>>
>>>> Any ideas, or should I bisect?
>>>>
>>>> 4.11 dmesg & xrandr output:
>>>> https://pastebin.com/raw/P9LGP7e1
>>>>
>>>> 4.11-11413-g2868b25 dmesg:
>>>> https://pastebin.com/raw/QBT9mMua
>>>>
>>>> -Tommi


Re: (radeon?) WARNING: drivers/gpu/drm/drm_irq.c:1195 drm_vblank_put (v4.11-12441-g56868a4)

2017-05-22 Thread Tommi Rantala
2017-05-22 10:32 GMT+03:00 Daniel Vetter :
> Yeah I think the locking stuff we've fixed, at least if you don't see it
> in 4.12 it should be all good. And I think I spotted the bug you've
> bisected to, patch is on dri-devel, pls test.

Thanks! I'll test it later today.

-Tommi


Re: (radeon?) WARNING: drivers/gpu/drm/drm_irq.c:1195 drm_vblank_put (v4.11-12441-g56868a4)

2017-05-18 Thread Tommi Rantala
2017-05-11 5:51 GMT+03:00 Michel Dänzer :
> On 11/05/17 04:33 AM, Tommi Rantala wrote:
>> Complete kernel log:
>> http://termbin.com/dzy5
>>
>> [  249.952546] [ cut here ]
>> [  249.952593] WARNING: CPU: 5 PID: 0 at
>> /home/ttrantal/git/linux/drivers/gpu/drm/drm_irq.c:1195
>> drm_vblank_put+0xc4/0x120 [drm]
>> [  249.952596] Modules linked in: fuse tun bridge stp llc af_packet
>> pl2303 usbserial shpchp acpi_cpufreq binfmt_misc amdgpu hid_generic
>> uhci_hcd radeon 3c59x mii tg3 ehci_pci ehci_hcd i2c_algo_bit
>> drm_kms_helper syscopyarea sysfillrect sysimgblt fb_sys_fops ttm drm
>> agpgart unix autofs4
>> [  249.952675] CPU: 5 PID: 0 Comm: swapper/5 Tainted: GW
>> 4.11.0+ #4
>> [  249.952678] Hardware name: Hewlett-Packard HP xw6600
>> Workstation/0A9Ch, BIOS 786F4 v01.46 09/20/2012
>> [  249.952681] task: 88080aea task.stack: c900031b
>> [  249.952695] RIP: 0010:drm_vblank_put+0xc4/0x120 [drm]
>> [  249.952698] RSP: 0018:88080f003d70 EFLAGS: 00010046
>> [  249.952703] RAX:  RBX: 880801d53000 RCX: 
>> 
>> [  249.952706] RDX:  RSI:  RDI: 
>> 88080a4ac000
>> [  249.952709] RBP: 88080f003d88 R08: 0001 R09: 
>> 0003
>> [  249.952711] R10: 88080f003d08 R11: 001da540 R12: 
>> 88080a4ac000
>> [  249.952714] R13:  R14: 0086 R15: 
>> 8808019a
>> [  249.952717] FS:  () GS:88080f00()
>> knlGS:
>> [  249.952720] CS:  0010 DS:  ES:  CR0: 80050033
>> [  249.952723] CR2: 7f8bcc3a5810 CR3: 000808789000 CR4: 
>> 06e0
>> [  249.952726] Call Trace:
>> [  249.952731]  
>> [  249.952746]  drm_crtc_vblank_put+0x1b/0x30 [drm]
>> [  249.952813]  radeon_crtc_handle_flip+0xdc/0x140 [radeon]
>> [  249.952843]  si_irq_process+0x610/0x1e90 [radeon]
>> [  249.952872]  radeon_driver_irq_handler_kms+0x39/0xc0 [radeon]
>> [  249.952881]  __handle_irq_event_percpu+0x60/0x580
>> [  249.952887]  handle_irq_event_percpu+0x20/0x90
>> [  249.952892]  handle_irq_event+0x46/0xb0
>> [  249.952897]  handle_edge_irq+0x13d/0x370
>> [  249.952903]  handle_irq+0x66/0x210
>> [  249.952908]  ? __local_bh_enable+0x34/0x50
>> [  249.952914]  do_IRQ+0x7e/0x1b0
>> [  249.952920]  common_interrupt+0x95/0x95
>
> Weird, not sure how this could happen. Can you bisect?

Hi,

Bisection points to this (also manually applied commit 9739e74646
while testing, got kernel oops otherwise):

commit 29dc0d1de18239cf3ef8bab578b8321ed340d81c
Author: Daniel Vetter 
Date:   Wed Mar 22 22:50:49 2017 +0100

drm: Roll out acquire context for the page_flip ioctl

Again just prep work.

Reviewed-by: Harry Wentland 
Signed-off-by: Daniel Vetter 
Link: 
http://patchwork.freedesktop.org/patch/msgid/20170322215058.8671-11-daniel.vet...@ffwll.ch


I'm also seeing some more warnings in this version:


May 18 19:21:55 xw6600 kernel: IPv6: ADDRCONF(NETDEV_CHANGE): enp14s0:
link becomes ready
May 18 19:21:57 xw6600 kernel: [ cut here ]
May 18 19:21:57 xw6600 kernel: WARNING: CPU: 5 PID: 4607 at
/home/ttrantal/git/linux/drivers/gpu/drm/drm_modeset_lock.c:193
drm_modeset_lock_crtc+0xe5/0x100 [drm]
May 18 19:21:57 xw6600 kernel: Modules linked in: tun bridge stp llc
af_packet pl2303 usbserial shpchp acpi_cpufreq binfmt_misc amdgpu
hid_generic uhci_hcd radeon 3c59x mii i2c_algo_bit drm_kms_helper tg3
syscopyarea sysfillrect sysimgblt
May 18 19:21:57 xw6600 kernel: CPU: 5 PID: 4607 Comm: gnome-shell Not
tainted 4.11.0-rc3-00944-g29dc0d1-dirty #30
May 18 19:21:57 xw6600 kernel: Hardware name: Hewlett-Packard HP
xw6600 Workstation/0A9Ch, BIOS 786F4 v01.46 09/20/2012
May 18 19:21:57 xw6600 kernel: Call Trace:
May 18 19:21:57 xw6600 kernel:  dump_stack+0x69/0x9b
May 18 19:21:57 xw6600 kernel:  __warn+0xff/0x140
May 18 19:21:57 xw6600 kernel:  warn_slowpath_null+0x18/0x20
May 18 19:21:57 xw6600 kernel:  drm_modeset_lock_crtc+0xe5/0x100 [drm]
May 18 19:21:57 xw6600 kernel:  drm_mode_cursor_common+0xbd/0x200 [drm]
May 18 19:21:57 xw6600 kernel:  drm_mode_cursor_ioctl+0x3c/0x40 [drm]
May 18 19:21:57 xw6600 kernel:  drm_ioctl+0x3ea/0x870 [drm]
May 18 19:21:57 xw6600 kernel:  ? drm_mode_setplane+0x1a0/0x1a0 [drm]
May 18 19:21:57 xw6600 kernel:  ? trace_hardirqs_on_caller+0x1ad/0x2c0
May 18 19:21:57 xw6600 kernel:  ? trace_hardirqs_on+0xd/0x10
May 18 19:21:57 xw6600 kernel:  radeon_drm_ioctl+0x6e/0x110 [radeon]
May 18 19:21:57 xw6600 kernel:  do_vfs_ioctl+0xac/0x9d0
May 18 19:21:57 xw6600 kernel:  ? security_file_ioctl+0x4c/0x80
May 18 19:21:57 xw6600 kernel:  SyS_ioctl+0x74/0x8

Re: nouveau "eDP-1: EDID is invalid" regression after 4.11 with HP ZBook 15 G3

2017-05-14 Thread Tommi Rantala
2017-05-15 3:03 GMT+03:00 Ben Skeggs :
> On 05/15/2017 01:10 AM, Tommi Rantala wrote:
>>
>> Hi,
>
> Hey Tommi,
>
> Thanks for bisecting this.  It's rather unexpected that you should be seeing
> problems here, but, the commit makes sense for it at least.
>
> Are you able to get me new kernel logs of both before and after this patch
> with "log_buf_len=8M drm.debug=0x14
> nouveau.debug=disp=trace,i2c=trace,bios=trace" please?

Hi Ben,

Before:
https://www.dropbox.com/s/b2namqtqvzv5ppp/trace.4.10.0-tr-10409-g5c68d91?dl=1

After:
https://www.dropbox.com/s/9url8qdo15959fy/trace.4.10.0-tr-10410-gdf8dc97?dl=1

-Tommi

> Thanks,
> Ben.
>
>
>>
>> Bisected this to:
>>
>> commit df8dc97cd17269474344d73cc02739532c468d04
>> Author: Ben Skeggs 
>> Date:   Wed Mar 1 09:42:04 2017 +1000
>>
>> drm/nouveau/kms/nv50: use drm core i2c-over-aux algorithm
>>
>> I'm not entirely sure NVKM needs to support this now, but I haven't
>> removed it as of yet just in case it's needed from DEVINIT scripts
>> where DRM isn't available.
>>
>> Signed-off-by: Ben Skeggs 
>>
>>
>> dmesg after boot with drm.debug enabled:
>>
>> v4.10-10409-g5c68d91 (still works):
>> http://termbin.com/b0is
>>
>> v4.10-10410-gdf8dc97 (failure):
>> http://termbin.com/j6lq
>>
>>
>> Tommi
>>
>>
>> 2017-05-10 11:24 GMT+03:00 Tommi Rantala :
>>>
>>> Hi,
>>>
>>> The HP ZBook 15 G3 laptop builtin display (eDP-1) does not work
>>> correctly with v4.11-11413-g2868b25.
>>>
>>> When booting the laptop, the resolution seems to be limited to
>>> 1024x768, and gnome-session segfaults.
>>>
>>> Up to 4.11 the display works just fine in 1920x1080 mode.
>>>
>>> I'm seeing this in the kernel logs:
>>>
>>> nouveau :01:00.0: eDP-1: EDID is invalid:
>>>  [00] BAD  00 ff ff ff ff ff ff 00 ff ff ff ff ff ff ff ff
>>>  [00] BAD  ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff
>>>  [00] BAD  ff ff ff ff ff ff ff ff ff ff ff ff ff 84 53 54
>>>  [00] BAD  66 69 50 55 57 66 74 49 48 ff ff ff ff ff ff ff
>>>  [00] BAD  ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff
>>>  [00] BAD  ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff
>>>  [00] BAD  ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff
>>>  [00] BAD  ff ff ff ff ff ff ff ff ff ff ff 00 00 ff 00 ff
>>> nouveau :01:00.0: DRM: DDC responded, but no EDID for eDP-1
>>> [drm] Cannot find any crtc or sizes - going 1024x768
>>>
>>>
>>> $ lspci | grep NVIDIA
>>> 01:00.0 VGA compatible controller: NVIDIA Corporation GM107GLM [Quadro
>>> M2000M] (rev a2)
>>>
>>> Any ideas, or should I bisect?
>>>
>>> 4.11 dmesg & xrandr output:
>>> https://pastebin.com/raw/P9LGP7e1
>>>
>>> 4.11-11413-g2868b25 dmesg:
>>> https://pastebin.com/raw/QBT9mMua
>>>
>>> -Tommi


Re: nouveau "eDP-1: EDID is invalid" regression after 4.11 with HP ZBook 15 G3

2017-05-14 Thread Tommi Rantala
Hi,

Bisected this to:

commit df8dc97cd17269474344d73cc02739532c468d04
Author: Ben Skeggs 
Date:   Wed Mar 1 09:42:04 2017 +1000

   drm/nouveau/kms/nv50: use drm core i2c-over-aux algorithm

   I'm not entirely sure NVKM needs to support this now, but I haven't
   removed it as of yet just in case it's needed from DEVINIT scripts
   where DRM isn't available.

   Signed-off-by: Ben Skeggs 


dmesg after boot with drm.debug enabled:

v4.10-10409-g5c68d91 (still works):
http://termbin.com/b0is

v4.10-10410-gdf8dc97 (failure):
http://termbin.com/j6lq


Tommi


2017-05-10 11:24 GMT+03:00 Tommi Rantala :
> Hi,
>
> The HP ZBook 15 G3 laptop builtin display (eDP-1) does not work
> correctly with v4.11-11413-g2868b25.
>
> When booting the laptop, the resolution seems to be limited to
> 1024x768, and gnome-session segfaults.
>
> Up to 4.11 the display works just fine in 1920x1080 mode.
>
> I'm seeing this in the kernel logs:
>
> nouveau :01:00.0: eDP-1: EDID is invalid:
> [00] BAD  00 ff ff ff ff ff ff 00 ff ff ff ff ff ff ff ff
> [00] BAD  ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff
> [00] BAD  ff ff ff ff ff ff ff ff ff ff ff ff ff 84 53 54
> [00] BAD  66 69 50 55 57 66 74 49 48 ff ff ff ff ff ff ff
> [00] BAD  ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff
> [00] BAD  ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff
> [00] BAD  ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff
> [00] BAD  ff ff ff ff ff ff ff ff ff ff ff 00 00 ff 00 ff
> nouveau :01:00.0: DRM: DDC responded, but no EDID for eDP-1
> [drm] Cannot find any crtc or sizes - going 1024x768
>
>
> $ lspci | grep NVIDIA
> 01:00.0 VGA compatible controller: NVIDIA Corporation GM107GLM [Quadro
> M2000M] (rev a2)
>
> Any ideas, or should I bisect?
>
> 4.11 dmesg & xrandr output:
> https://pastebin.com/raw/P9LGP7e1
>
> 4.11-11413-g2868b25 dmesg:
> https://pastebin.com/raw/QBT9mMua
>
> -Tommi


Re: [PATCH] hwmon: (coretemp) Handle frozen hotplug state correctly

2017-05-10 Thread Tommi Rantala
2017-05-10 23:09 GMT+03:00 Guenter Roeck :
> On Wed, May 10, 2017 at 10:16:33PM +0300, Tommi Rantala wrote:
>> 2017-05-10 17:30 GMT+03:00 Thomas Gleixner :
>> > The recent conversion to the hotplug state machine missed that the original
>> > hotplug notifiers did not execute in the frozen state, which is used on
>> > suspend on resume.
>> >
>> > This does not matter on single socket machines, but on multi socket systems
>> > this breaks when the device for a non-boot socket is removed when the last
>> > CPU of that socket is brought offline. The device removal locks up the
>> > machine hard w/o any debug output.
>> >
>> > Prevent executing the hotplug callbacks when cpuhp_tasks_frozen is true.
>> >
>> > Thanks to Tommi for providing debug information patiently while I failed to
>> > spot the obvious.
>> >
>> > Fixes: e00ca5df37ad ("hwmon: (coretemp) Convert to hotplug state machine")
>> > Reported-by: Tommi Rantala 
>> > Signed-off-by: Thomas Gleixner 
>>
>> Many thanks, I can confirm that it works well!
>>
> Ok if I add your Tested-by: ?

Sure!

Tested-by: Tommi Rantala 

> Thanks,
> Guenter
>
>> -Tommi
>>
>> > ---
>> >  drivers/hwmon/coretemp.c |   14 ++
>> >  1 file changed, 14 insertions(+)
>> >
>> > --- a/drivers/hwmon/coretemp.c
>> > +++ b/drivers/hwmon/coretemp.c
>> > @@ -605,6 +605,13 @@ static int coretemp_cpu_online(unsigned
>> > struct platform_data *pdata;
>> >
>> > /*
>> > +* Don't execute this on resume as the offline callback did
>> > +* not get executed on suspend.
>> > +*/
>> > +   if (cpuhp_tasks_frozen)
>> > +   return 0;
>> > +
>> > +   /*
>> >  * CPUID.06H.EAX[0] indicates whether the CPU has thermal
>> >  * sensors. We check this bit only, all the early CPUs
>> >  * without thermal sensors will be filtered out.
>> > @@ -654,6 +661,13 @@ static int coretemp_cpu_offline(unsigned
>> > struct temp_data *tdata;
>> > int indx, target;
>> >
>> > +   /*
>> > +* Don't execute this on suspend as the device remove locks
>> > +* up the machine.
>> > +*/
>> > +   if (cpuhp_tasks_frozen)
>> > +   return 0;
>> > +
>> > /* If the physical CPU device does not exist, just return */
>> > if (!pdev)
>> > return 0;


(radeon?) WARNING: drivers/gpu/drm/drm_irq.c:1195 drm_vblank_put (v4.11-12441-g56868a4)

2017-05-10 Thread Tommi Rantala
Hi,

I just tested v4.11-12441-g56868a4 on HP xw6600 with radeon graphics,
and I'm seeing the following WARNING triggered constantly.

I have not seen this earlier e.g. with the distro kernel 4.10.13-200.fc25.x86_64

$ lspci|grep -i amd
60:00.0 VGA compatible controller: Advanced Micro Devices, Inc.
[AMD/ATI] Curacao PRO [Radeon R7 370 / R9 270/370 OEM]
60:00.1 Audio device: Advanced Micro Devices, Inc. [AMD/ATI] Cape
Verde/Pitcairn HDMI Audio [Radeon HD 7700/7800 Series]

Complete kernel log:
http://termbin.com/dzy5

[  249.952546] [ cut here ]
[  249.952593] WARNING: CPU: 5 PID: 0 at
/home/ttrantal/git/linux/drivers/gpu/drm/drm_irq.c:1195
drm_vblank_put+0xc4/0x120 [drm]
[  249.952596] Modules linked in: fuse tun bridge stp llc af_packet
pl2303 usbserial shpchp acpi_cpufreq binfmt_misc amdgpu hid_generic
uhci_hcd radeon 3c59x mii tg3 ehci_pci ehci_hcd i2c_algo_bit
drm_kms_helper syscopyarea sysfillrect sysimgblt fb_sys_fops ttm drm
agpgart unix autofs4
[  249.952675] CPU: 5 PID: 0 Comm: swapper/5 Tainted: GW
4.11.0+ #4
[  249.952678] Hardware name: Hewlett-Packard HP xw6600
Workstation/0A9Ch, BIOS 786F4 v01.46 09/20/2012
[  249.952681] task: 88080aea task.stack: c900031b
[  249.952695] RIP: 0010:drm_vblank_put+0xc4/0x120 [drm]
[  249.952698] RSP: 0018:88080f003d70 EFLAGS: 00010046
[  249.952703] RAX:  RBX: 880801d53000 RCX: 
[  249.952706] RDX:  RSI:  RDI: 88080a4ac000
[  249.952709] RBP: 88080f003d88 R08: 0001 R09: 0003
[  249.952711] R10: 88080f003d08 R11: 001da540 R12: 88080a4ac000
[  249.952714] R13:  R14: 0086 R15: 8808019a
[  249.952717] FS:  () GS:88080f00()
knlGS:
[  249.952720] CS:  0010 DS:  ES:  CR0: 80050033
[  249.952723] CR2: 7f8bcc3a5810 CR3: 000808789000 CR4: 06e0
[  249.952726] Call Trace:
[  249.952731]  
[  249.952746]  drm_crtc_vblank_put+0x1b/0x30 [drm]
[  249.952813]  radeon_crtc_handle_flip+0xdc/0x140 [radeon]
[  249.952843]  si_irq_process+0x610/0x1e90 [radeon]
[  249.952872]  radeon_driver_irq_handler_kms+0x39/0xc0 [radeon]
[  249.952881]  __handle_irq_event_percpu+0x60/0x580
[  249.952887]  handle_irq_event_percpu+0x20/0x90
[  249.952892]  handle_irq_event+0x46/0xb0
[  249.952897]  handle_edge_irq+0x13d/0x370
[  249.952903]  handle_irq+0x66/0x210
[  249.952908]  ? __local_bh_enable+0x34/0x50
[  249.952914]  do_IRQ+0x7e/0x1b0
[  249.952920]  common_interrupt+0x95/0x95
[  249.952924] RIP: 0010:mwait_idle+0x9c/0x3c0
[  249.952927] RSP: 0018:c900031b3e68 EFLAGS: 0246 ORIG_RAX:
ff4d
[  249.952932] RAX:  RBX: 88080aea RCX: 
[  249.952935] RDX: 0001 RSI: 0001 RDI: 88080aea
[  249.952938] RBP: c900031b3e98 R08: 0006 R09: 
[  249.952941] R10:  R11:  R12: 88080aea
[  249.952943] R13: 0005 R14: 839ca0c8 R15: 
[  249.952946]  
[  249.952955]  ? mwait_idle+0x93/0x3c0
[  249.952961]  arch_cpu_idle+0xa/0x10
[  249.952965]  default_idle_call+0x24/0x40
[  249.952971]  do_idle+0x154/0x1f0
[  249.952976]  cpu_startup_entry+0x18/0x20
[  249.952981]  start_secondary+0x159/0x1f0
[  249.952987]  secondary_startup_64+0x9f/0x9f
[  249.952995] Code: 0d 32 c7 9a e2 f7 ea 41 c1 fc 1f 48 8d 7b 60 c1
fa 06 44 29 e2 48 63 f2 48 01 ce e8 37 7d 1e e1 eb be 0f ff 5b 41 5c
41 5d 5d c3 <0f> ff eb b1 48 89 df e8 40 fe ff ff eb a7 41 0f b6 f4 48
c7 c7
[  249.953135] ---[ end trace 399ab7917ed3b208 ]---
[  251.185850] [ cut here ]
[  251.185896] WARNING: CPU: 5 PID: 4425 at
/home/ttrantal/git/linux/drivers/gpu/drm/drm_irq.c:1195
drm_vblank_put+0xc4/0x120 [drm]
[  251.185899] Modules linked in: fuse tun bridge stp llc af_packet
pl2303 usbserial shpchp acpi_cpufreq binfmt_misc amdgpu hid_generic
uhci_hcd radeon 3c59x mii tg3 ehci_pci ehci_hcd i2c_algo_bit
drm_kms_helper syscopyarea sysfillrect sysimgblt fb_sys_fops ttm drm
agpgart unix autofs4
[  251.185979] CPU: 5 PID: 4425 Comm: in:imjournal Tainted: GW
  4.11.0+ #4
[  251.185982] Hardware name: Hewlett-Packard HP xw6600
Workstation/0A9Ch, BIOS 786F4 v01.46 09/20/2012
[  251.185984] task: 880802f8b280 task.stack: c900034c8000
[  251.185998] RIP: 0010:drm_vblank_put+0xc4/0x120 [drm]
[  251.186001] RSP: :88080f003d70 EFLAGS: 00010046
[  251.186006] RAX:  RBX: 880801d53000 RCX: 
[  251.186009] RDX:  RSI:  RDI: 88080a4ac000
[  251.186012] RBP: 88080f003d88 R08: 0001 R09: 0003
[  251.186015] R10: 88080f003d08 R11: 001da540 R12: 88080a4ac000
[  251.186017] R13:  R14: 0086 R15: 8808019a
[  251.18

Re: [PATCH] hwmon: (coretemp) Handle frozen hotplug state correctly

2017-05-10 Thread Tommi Rantala
2017-05-10 17:30 GMT+03:00 Thomas Gleixner :
> The recent conversion to the hotplug state machine missed that the original
> hotplug notifiers did not execute in the frozen state, which is used on
> suspend on resume.
>
> This does not matter on single socket machines, but on multi socket systems
> this breaks when the device for a non-boot socket is removed when the last
> CPU of that socket is brought offline. The device removal locks up the
> machine hard w/o any debug output.
>
> Prevent executing the hotplug callbacks when cpuhp_tasks_frozen is true.
>
> Thanks to Tommi for providing debug information patiently while I failed to
> spot the obvious.
>
> Fixes: e00ca5df37ad ("hwmon: (coretemp) Convert to hotplug state machine")
> Reported-by: Tommi Rantala 
> Signed-off-by: Thomas Gleixner 

Many thanks, I can confirm that it works well!

-Tommi

> ---
>  drivers/hwmon/coretemp.c |   14 ++
>  1 file changed, 14 insertions(+)
>
> --- a/drivers/hwmon/coretemp.c
> +++ b/drivers/hwmon/coretemp.c
> @@ -605,6 +605,13 @@ static int coretemp_cpu_online(unsigned
> struct platform_data *pdata;
>
> /*
> +* Don't execute this on resume as the offline callback did
> +* not get executed on suspend.
> +*/
> +   if (cpuhp_tasks_frozen)
> +   return 0;
> +
> +   /*
>  * CPUID.06H.EAX[0] indicates whether the CPU has thermal
>  * sensors. We check this bit only, all the early CPUs
>  * without thermal sensors will be filtered out.
> @@ -654,6 +661,13 @@ static int coretemp_cpu_offline(unsigned
> struct temp_data *tdata;
> int indx, target;
>
> +   /*
> +* Don't execute this on suspend as the device remove locks
> +* up the machine.
> +*/
> +   if (cpuhp_tasks_frozen)
> +   return 0;
> +
> /* If the physical CPU device does not exist, just return */
> if (!pdev)
> return 0;


Re: [patch 0/6] hwmon/coretemp: Hotplug fixes, cleanups and state machine conversion

2017-05-10 Thread Tommi Rantala
2017-05-10 17:01 GMT+03:00 Thomas Gleixner :
> On Wed, 10 May 2017, Tommi Rantala wrote:
>> 2017-05-09 10:16 GMT+03:00 Thomas Gleixner :
>> > On Thu, 4 May 2017, Tommi Rantala wrote:
>> >> Here's the trace output, does it help?
>> >
>> > Not much. Can you please try the following:
>> >
>> > 1) Offline all CPUs except CPU0 before suspend/resume
>>
>> it works!
>>
>> > 2) Offline all CPUs except CPU0 and CPU1 before suspend/resume
>>
>> now it breaks.
>>
>> > 3) Offline all CPUs except CPU0 and CPU2 before suspend/resume
>>
>> works again!
>>
>> (Also works with CPUs 0,2,4,6 onlined.)
>
> Output from /proc/cpuinfo please.

http://termbin.com/vec2


Re: [patch 0/6] hwmon/coretemp: Hotplug fixes, cleanups and state machine conversion

2017-05-10 Thread Tommi Rantala
2017-05-09 10:16 GMT+03:00 Thomas Gleixner :
> On Thu, 4 May 2017, Tommi Rantala wrote:
>> Here's the trace output, does it help?
>
> Not much. Can you please try the following:
>
> 1) Offline all CPUs except CPU0 before suspend/resume

it works!

> 2) Offline all CPUs except CPU0 and CPU1 before suspend/resume

now it breaks.

> 3) Offline all CPUs except CPU0 and CPU2 before suspend/resume

works again!

(Also works with CPUs 0,2,4,6 onlined.)

-Tommi


nouveau "eDP-1: EDID is invalid" regression after 4.11 with HP ZBook 15 G3

2017-05-10 Thread Tommi Rantala
Hi,

The HP ZBook 15 G3 laptop builtin display (eDP-1) does not work
correctly with v4.11-11413-g2868b25.

When booting the laptop, the resolution seems to be limited to
1024x768, and gnome-session segfaults.

Up to 4.11 the display works just fine in 1920x1080 mode.

I'm seeing this in the kernel logs:

nouveau :01:00.0: eDP-1: EDID is invalid:
[00] BAD  00 ff ff ff ff ff ff 00 ff ff ff ff ff ff ff ff
[00] BAD  ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff
[00] BAD  ff ff ff ff ff ff ff ff ff ff ff ff ff 84 53 54
[00] BAD  66 69 50 55 57 66 74 49 48 ff ff ff ff ff ff ff
[00] BAD  ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff
[00] BAD  ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff
[00] BAD  ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff
[00] BAD  ff ff ff ff ff ff ff ff ff ff ff 00 00 ff 00 ff
nouveau :01:00.0: DRM: DDC responded, but no EDID for eDP-1
[drm] Cannot find any crtc or sizes - going 1024x768


$ lspci | grep NVIDIA
01:00.0 VGA compatible controller: NVIDIA Corporation GM107GLM [Quadro
M2000M] (rev a2)

Any ideas, or should I bisect?

4.11 dmesg & xrandr output:
https://pastebin.com/raw/P9LGP7e1

4.11-11413-g2868b25 dmesg:
https://pastebin.com/raw/QBT9mMua

-Tommi


Re: [patch 0/6] hwmon/coretemp: Hotplug fixes, cleanups and state machine conversion

2017-05-04 Thread Tommi Rantala
2017-04-23 18:01 GMT+03:00 Thomas Gleixner :
> On Sat, 15 Apr 2017, Tommi Rantala wrote:
>
>> Testing with 4.10.8-200.fc25.x86_64: freezer, devices and platform are
>> OK, it breaks at "processors".
>> The screen stays off, and the machine no longer answers to ping.
>>
>> (Without coretemp loaded, the machine survives all the states. There
>> are some graphics glitches and radeon error messages)
>
> That's odd. I tried on a similar machine (w/o a radeon card) and it just
> works with the coretemp module loaded.
>
> Can you please do a CPU hotplug cycle (just one CPU) with the cpuhp events
> in the tracer enabled. Send me the trace output so I might be able to spot
> whats different and what interdependencies between other callbacks might be
> there.

Hi,

Here's the trace output, does it help?

http://termbin.com/qugr

-Tommi


Re: [patch 0/6] hwmon/coretemp: Hotplug fixes, cleanups and state machine conversion

2017-04-15 Thread Tommi Rantala
2017-04-14 20:35 GMT+03:00 Thomas Gleixner :
> On Wed, 12 Apr 2017, Thomas Gleixner wrote:
>>
>> Can you please try the following:
>>
>> # for STATE in freezer devices platform processors core; do \
>>   echo $STATE; \
>>   echo $STATE >/sys/power/pm_test; \
>>   echo mem >/sys/power/state
>>
>> That should give us at least a hint in which area to dig.
>
> Any news on that?

Sorry, was traveling.

Testing with 4.10.8-200.fc25.x86_64: freezer, devices and platform are
OK, it breaks at "processors".
The screen stays off, and the machine no longer answers to ping.

(Without coretemp loaded, the machine survives all the states. There
are some graphics glitches and radeon error messages)

-Tommi


Re: [patch 0/6] hwmon/coretemp: Hotplug fixes, cleanups and state machine conversion

2017-04-12 Thread Tommi Rantala
2017-04-12 13:52 GMT+03:00 Thomas Gleixner :
> On Wed, 12 Apr 2017, Tommi Rantala wrote:
>> 2017-04-12 12:28 GMT+03:00 Thomas Gleixner :
>> > On Wed, 12 Apr 2017, Tommi Rantala wrote:
>> >> Resume-from-suspend stopped working in HP xw6600 in fedora kernel
>> >> 4.10.8-200.fc25.x86_64, while it worked just fine in
>> >> 4.9.9-200.fc25.x86_64.
>> >>
>> >> When powering on the suspended PC, there is no video output, and to
>> >> recover, I need to reset the machine.
>> >
>> > Is there just no video output or is the machine completely frozen? If it's
>> > not completely dead, then you might be able to ssh into it.
>>
>> It's completely hosed: not possible to ssh, does not respond to ping either.
>>
>> I made a quick test with netconsole. After booting with
>> no_console_suspend=1, and setting the netconsole parameters, I can get
>> kernel messages (to my android phone) when suspending the machine. But
>> no messages after the failed resume.
>
> Let's do something else first.
>
> Can you please try to offline/online CPUs from the console?
>
> # echo 0 >/sys/devices/system/cpu1/online
> # echo 1 >/sys/devices/system/cpu1/online

ok, that works.

> If that works, then try to offline all CPUs (except 0) in the same order as
> suspend (1 ... 7) and then online them again in the same order?

Seems to work without problems:

# for i in $(seq 1 7) ; do echo 0 > /sys/devices/system/cpu/cpu$i/online ; done

[ 1237.317537] intel_powerclamp: No package C-state available
[ 1308.997620] smpboot: CPU 1 is now offline
[ 1309.007167] intel_powerclamp: No package C-state available
[ 1309.032563] smpboot: CPU 2 is now offline
[ 1309.038118] intel_powerclamp: No package C-state available
[ 1309.072495] smpboot: CPU 3 is now offline
[ 1309.077807] intel_powerclamp: No package C-state available
[ 1309.099545] Broke affinity for irq 29
[ 1309.100587] smpboot: CPU 4 is now offline
[ 1309.105346] intel_powerclamp: No package C-state available
[ 1309.135530] Broke affinity for irq 22
[ 1309.135540] Broke affinity for irq 29
[ 1309.136579] smpboot: CPU 5 is now offline
[ 1309.141653] intel_powerclamp: No package C-state available
[ 1309.171517] Broke affinity for irq 22
[ 1309.171526] Broke affinity for irq 29
[ 1309.171535] Broke affinity for irq 31
[ 1309.172586] smpboot: CPU 6 is now offline
[ 1309.176967] intel_powerclamp: No package C-state available
[ 1309.209122] Broke affinity for irq 19
[ 1309.209126] Broke affinity for irq 22
[ 1309.209135] Broke affinity for irq 29
[ 1309.209145] Broke affinity for irq 31
[ 1309.212071] smpboot: CPU 7 is now offline


# for i in $(seq 1 7) ; do echo 1 > /sys/devices/system/cpu/cpu$i/online ; done

[ 1309.217476] intel_powerclamp: No package C-state available
[ 1380.624184] x86: Booting SMP configuration:
[ 1380.624186] smpboot: Booting Node 0 Processor 1 APIC 0x4
[ 1380.659810] intel_powerclamp: No package C-state available
[ 1380.659957] smpboot: Booting Node 0 Processor 2 APIC 0x2
[ 1380.671198] microcode: sig=0x10676, pf=0x40, revision=0x60f
[ 1380.672088] smpboot: Booting Node 0 Processor 3 APIC 0x6
[ 1380.677952] intel_powerclamp: No package C-state available
[ 1380.686260] microcode: sig=0x1067a, pf=0x40, revision=0xa0b
[ 1380.687098] smpboot: Booting Node 0 Processor 4 APIC 0x1
[ 1380.699214] microcode: sig=0x10676, pf=0x40, revision=0x60f
[ 1380.699742] intel_powerclamp: No package C-state available
[ 1380.700267] smpboot: Booting Node 0 Processor 5 APIC 0x5
[ 1380.715207] microcode: sig=0x1067a, pf=0x40, revision=0xa0b
[ 1380.716202] smpboot: Booting Node 0 Processor 6 APIC 0x3
[ 1380.730264] microcode: sig=0x10676, pf=0x40, revision=0x60f
[ 1380.730567] intel_powerclamp: No package C-state available
[ 1380.731267] smpboot: Booting Node 0 Processor 7 APIC 0x7
[ 1380.748276] microcode: sig=0x1067a, pf=0x40, revision=0xa0b


Re: [patch 0/6] hwmon/coretemp: Hotplug fixes, cleanups and state machine conversion

2017-04-12 Thread Tommi Rantala
2017-04-12 12:28 GMT+03:00 Thomas Gleixner :
> On Wed, 12 Apr 2017, Tommi Rantala wrote:
>> Resume-from-suspend stopped working in HP xw6600 in fedora kernel
>> 4.10.8-200.fc25.x86_64, while it worked just fine in
>> 4.9.9-200.fc25.x86_64.
>>
>> When powering on the suspended PC, there is no video output, and to
>> recover, I need to reset the machine.
>
> Is there just no video output or is the machine completely frozen? If it's
> not completely dead, then you might be able to ssh into it.

It's completely hosed: not possible to ssh, does not respond to ping either.

I made a quick test with netconsole. After booting with
no_console_suspend=1, and setting the netconsole parameters, I can get
kernel messages (to my android phone) when suspending the machine. But
no messages after the failed resume.

Hmm, might I be able to capture messages over USB serial port...?

-Tommi


Re: [patch 0/6] hwmon/coretemp: Hotplug fixes, cleanups and state machine conversion

2017-04-12 Thread Tommi Rantala
2016-11-23 17:28 GMT+02:00 Guenter Roeck :
>
> On 11/22/2016 09:42 AM, Thomas Gleixner wrote:
>>
>> After the first attempt to convert the coretemp driver to the hotplug state
>> machine failed, we had a deeper look and went a bit farther.
>>
>> The driver has quite some interesting concepts vs. the package, core and
>> sysfs file management and a bug in the package temperature sysfs interface
>> vs. cpu hotplug.
>>
>> The following series fixes that bug and simplifies the package/core
>> management and at the end converts it to the hotplug state machine.
>>
>> Along with the source size the binary size shrinks as well:
>>textdata bss dec hex
>>4068360   20 4448 1160 Before
>>3801180   364017 fb1   After
>>
>> Thanks,
>>
>> tglx
>> -
>>  coretemp.c |  321 
>> +
>>  1 file changed, 113 insertions(+), 208 deletions(-)

Hi,

Resume-from-suspend stopped working in HP xw6600 in fedora kernel
4.10.8-200.fc25.x86_64, while it worked just fine in
4.9.9-200.fc25.x86_64.

When powering on the suspended PC, there is no video output, and to
recover, I need to reset the machine.
Nothing is recorded in the journal logs for the resume, last lines are
from the suspend:

  Apr 08 15:41:49 xw6600 systemd[1]: Reached target Sleep.
  Apr 08 15:41:49 xw6600 systemd[1]: Starting Suspend...
  Apr 08 15:41:49 xw6600 systemd-sleep[6675]: Suspending system...

Also tested 4.11-rc5, but it fails the same way.

Bisection leads to commit:

commit e00ca5df37adc68052ea699cbd010ee4e19e39e4
Author: Thomas Gleixner 
Date:   Tue Nov 22 17:42:04 2016 +

hwmon: (coretemp) Convert to hotplug state machine

Install the callbacks via the state machine. Setup and teardown are handled
by the hotplug core.

Signed-off-by: Sebastian Andrzej Siewior 
Cc: linux-hw...@vger.kernel.org
Cc: Fenghua Yu 
Cc: Jean Delvare 
Cc: r...@linuxtronix.de
Cc: Guenter Roeck 
Link: http://lkml.kernel.org/r/20161117183541.8588-5-bige...@linutronix.de
Signed-off-by: Guenter Roeck 

If I do "modprobe -r coretemp", then the resume works OK with
4.10.8-200.fc25.x86_64.

Any ideas?

4.9.9-200.fc25.x86_64 dmesg:
http://termbin.com/3kcl

4.10.8-200.fc25.x86_64 dmesg:
http://termbin.com/62d9

-Tommi


Re: [RFC][PATCH] mm: Tighten x86 /dev/mem with zeroing

2017-04-06 Thread Tommi Rantala

On 06.04.2017 03:00, Kees Cook wrote:

This changes the x86 exception for the low 1MB by reading back zeros for
RAM areas instead of blindly allowing them. (It may be possible for heap
to end up getting allocated in low 1MB RAM, and then read out, possibly
tripping hardened usercopy.)

Unfinished: this still needs mmap support.

Reported-by: Tommi Rantala 
Signed-off-by: Kees Cook 
---
Tommi, can you check and see if this fixes what you're seeing? I want to
make sure this actually works first. (x86info uses seek/read not mmap.)


Hi, I can confirm that it works (after adding CONFIG_STRICT_DEVMEM), no 
more kernel bugs when running x86info.



open("/dev/mem", O_RDONLY)  = 3
lseek(3, 1038, SEEK_SET)= 1038
read(3, "\300\235", 2)  = 2
lseek(3, 646144, SEEK_SET)  = 646144
read(3, 
"\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"..., 
1024) = 1024

lseek(3, 1043, SEEK_SET)= 1043
read(3, "w\2", 2)   = 2
lseek(3, 645120, SEEK_SET)  = 645120
read(3, 
"\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"..., 
1024) = 1024

lseek(3, 654336, SEEK_SET)  = 654336
read(3, 
"\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"..., 
1024) = 1024

lseek(3, 983040, SEEK_SET)  = 983040
read(3, 
"IFE$\245S\0\0\1\0\0\0\0\360y\0\0\360\220\260\30\237{=\23\10\17\\276\17\0"..., 
65536) = 65536

lseek(3, 917504, SEEK_SET)  = 917504
read(3, 
"\377\377\377\377\377\377\377\377\377\377\377\377\377\377\377\377\377\377\377\377\377\377\377\377\377\377\377\377\377\377\377\377"..., 
65536) = 65536

lseek(3, 524288, SEEK_SET)  = 524288
read(3, 
"\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"..., 
65536) = 65536

lseek(3, 589824, SEEK_SET)  = 589824
read(3, 
"\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"..., 
65536) = 65536



dd works too:

# LANG=C dd if=/dev/mem of=/dev/null bs=4096 count=256
256+0 records in
256+0 records out
1048576 bytes (1.0 MB, 1.0 MiB) copied, 0.0874073 s, 12.0 MB/s



---

 arch/x86/mm/init.c | 41 +++
 drivers/char/mem.c | 82 ++
 2 files changed, 82 insertions(+), 41 deletions(-)


Re: sudo x86info -a => kernel BUG at mm/usercopy.c:78!

2017-03-31 Thread Tommi Rantala

On 31.03.2017 21:26, Linus Torvalds wrote:

Hmm. Thinking more about this, we do allow access to the first 1MB of
physical memory unconditionally (see devmem_is_allowed() in
arch/x86/mm/init.c). And I think we only _reserve_ the first 64kB or
something. So I guess even STRICT_DEVMEM isn't actually all that
strict.

So this should be visible even *with* STRICT_DEVMEM.

Does a simple

 sudo dd if=/dev/mem of=/dev/null bs=4096 count=256

also show the same issue? Maybe regardless of STRICT_DEVMEM?


Yep, it is enough to trigger the bug.

Also crashes with the fedora kernel that has STRICT_DEVMEM:

$ sudo dd if=/dev/mem of=/dev/null bs=4096 count=256
Segmentation fault

[   73.224025] usercopy: kernel memory exposure attempt detected from 
893a80059000 (dma-kmalloc-16) (4096 bytes)

[   73.224049] [ cut here ]
[   73.224056] kernel BUG at mm/usercopy.c:75!
[   73.224060] invalid opcode:  [#1] SMP
[   73.224237] CPU: 5 PID: 2860 Comm: dd Not tainted 
4.9.14-200.fc25.x86_64 #1




Maybe we should change devmem_is_allowed() to return a ternary value,
and then have it be "allow access" (for reserved pages), "disallow
access" (for various random stuff), and "just read zero" (for pages in
the low 1M that aren't marked reserved).

That way things like that read the low 1M (like x86info) will
hopefully not be unhappy, but also won't be reading random kernel
data.

  Linus



Re: sudo x86info -a => kernel BUG at mm/usercopy.c:78!

2017-03-31 Thread Tommi Rantala

On 31.03.2017 08:40, Tommi Rantala wrote:

The only thing that I can think of would be a rogue ptr in the bios
table, but that seems unlikely.  Tommi, can you put strace of x86info
-mp somewhere?
That will confirm/deny whether we're at least asking the kernel to do
sane things.


Indeed the bug happens when reading from /dev/mem:

https://pastebin.com/raw/ZEJGQP1X

# strace -f -y x86info -mp
[...]
open("/dev/mem", O_RDONLY)  = 3
lseek(3, 1038, SEEK_SET)  = 1038
read(3, "\300\235", 2)= 2
lseek(3, 646144, SEEK_SET)= 646144
read(3,
"\1\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"...,
1024) = 1024
lseek(3, 1043, SEEK_SET)  = 1043
read(3, "w\2", 2) = 2
lseek(3, 645120, SEEK_SET)= 645120
read(3,
"\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"...,
1024) = 1024
lseek(3, 654336, SEEK_SET)= 654336
read(3,
"\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"...,
1024) = 1024
lseek(3, 983040, SEEK_SET)= 983040
read(3,
"IFE$\245S\0\0\1\0\0\0\0\360y\0\0\360\220\260\30\237{=\23\10\17\\276\17\0"...,
65536) = 65536
lseek(3, 917504, SEEK_SET)= 917504
read(3,
"\377\377\377\377\377\377\377\377\377\377\377\377\377\377\377\377\377\377\377\377\377\377\377\377\377\377\377\377\377\377\377\377"...,
65536) = 65536
lseek(3, 524288, SEEK_SET)= 524288
read(3,  )= ?
+++ killed by SIGSEGV +++


That last read is done in mptable.c:347, trying to read GROPE_AREA1.

# ./x86info --debug
x86info v1.31pre
get_intel_topology:
Siblings: 2
Physical Processor ID: 0
Processor Core ID: 0
get_intel_topology:
Siblings: 2
Physical Processor ID: 0
Processor Core ID: 1
get_intel_topology:
Siblings: 2
Physical Processor ID: 0
Processor Core ID: 2
get_intel_topology:
Siblings: 2
Physical Processor ID: 0
Processor Core ID: 3
get_intel_topology:
Siblings: 2
Physical Processor ID: 0
Processor Core ID: 0
get_intel_topology:
Siblings: 2
Physical Processor ID: 0
Processor Core ID: 1
get_intel_topology:
Siblings: 2
Physical Processor ID: 0
Processor Core ID: 2
get_intel_topology:
Siblings: 2
Physical Processor ID: 0
Processor Core ID: 3
Found 8 identical CPUs
EBDA points to: 9dc0
EBDA segment ptr: 9dc00
Segmentation fault


If I comment out the GROPE_AREA1 read, the same kernel bug still happens 
with the GROPE_AREA2 read.


Removing both GROPE_AREA1 and GROPE_AREA2 reads avoids the crash:

$ git diff
diff --git a/mptable.c b/mptable.c
index 480f19b..00fff35 100644
--- a/mptable.c
+++ b/mptable.c
@@ -342,6 +342,7 @@ static int apic_probe(unsigned long* paddr)
}

/* search additional memory */
+   /*
target = GROPE_AREA1;
seekEntry(target);
if (readEntry(buffer, GROPE_SIZE)) {
@@ -371,6 +372,7 @@ static int apic_probe(unsigned long* paddr)
return 6;
}
}
+   */

*paddr = (unsigned long)0;
return 0;

# ./x86info -mp
x86info v1.31pre
Found 8 identical CPUs
Extended Family: 0 Extended Model: 5 Family: 6 Model: 94 Stepping: 3
Type: 0 (Original OEM)
CPU Model (x86info's best guess): Unknown model.
Processor name string (BIOS programmed): Intel(R) Core(TM) i7-6820HQ CPU 
@ 2.70GHz


Total processor threads: 8
This system has 1 quad-core processor with hyper-threading (2 threads 
per core) running at an estimated 2.70GHz

#

-Tommi


Re: sudo x86info -a => kernel BUG at mm/usercopy.c:78!

2017-03-30 Thread Tommi Rantala

On 30.03.2017 20:44, Laura Abbott wrote:

On 03/30/2017 10:37 AM, Kees Cook wrote:


Reads out of /dev/mem should be restricted to non-RAM on Fedora, yes?

Tommi, do your kernels have CONFIG_STRICT_DEVMEM=y ?

-Kees



CONFIG_STRICT_DEVMEM should be on in all Fedora kernels.


Yes, the fedora kernels do have it enabled:

  $ grep STRICT_DEVMEM /boot/config-4.9.14-200.fc25.x86_64
  CONFIG_STRICT_DEVMEM=y
  CONFIG_IO_STRICT_DEVMEM=y

But I do not have it in my own build:

  $ grep STRICT_DEVMEM .config
  # CONFIG_STRICT_DEVMEM is not set

-Tommi


Re: sudo x86info -a => kernel BUG at mm/usercopy.c:78!

2017-03-30 Thread Tommi Rantala



On 30.03.2017 23:01, Dave Jones wrote:

On Thu, Mar 30, 2017 at 12:52:31PM -0700, Kees Cook wrote:
 > On Thu, Mar 30, 2017 at 12:41 PM, Dave Jones  wrote:
 > > On Thu, Mar 30, 2017 at 09:45:26AM -0700, Kees Cook wrote:
 > >  > On Wed, Mar 29, 2017 at 11:44 PM, Tommi Rantala
 > >  >  wrote:
 > >  > > Hi,
 > >  > >
 > >  > > Running:
 > >  > >
 > >  > >   $ sudo x86info -a
 > >  > >
 > >  > > On this HP ZBook 15 G3 laptop kills the x86info process with segfault 
and
 > >  > > produces the following kernel BUG.
 > >  > >
 > >  > >   $ git describe
 > >  > >   v4.11-rc4-40-gfe82203
 > >  > >
 > >  > > It is also reproducible with the fedora kernel: 4.9.14-200.fc25.x86_64
 > >  > >
 > >  > > Full dmesg output here: https://pastebin.com/raw/Kur2mpZq
 > >  > >
 > >  > > [   51.418954] usercopy: kernel memory exposure attempt detected from
 > >  > > 8809 (dma-kmalloc-256) (4096 bytes)
 > >  >
 > >  > This seems like a real exposure: the copy is attempting to read 4096
 > >  > bytes from a 256 byte object.
 > >
 > > The code[1] is doing a 4k read from /dev/mem in the range 0x9 -> 
0xa
 > > According to arch/x86/mm/init.c:devmem_is_allowed, that's still valid..
 > >
 > > Note that the printk is using the direct mapping address. Is that what's
 > > being passed down to devmem_is_allowed now ? If so, that's probably what 
broke.
 >
 > So this is attempting to read physical memory 0x9 -> 0xa, but
 > that's somehow resolving to a virtual address that is claimed by
 > dma-kmalloc?? I'm confused how that's happening...

The only thing that I can think of would be a rogue ptr in the bios
table, but that seems unlikely.  Tommi, can you put strace of x86info -mp 
somewhere?
That will confirm/deny whether we're at least asking the kernel to do sane 
things.


Indeed the bug happens when reading from /dev/mem:

https://pastebin.com/raw/ZEJGQP1X

# strace -f -y x86info -mp
[...]
open("/dev/mem", O_RDONLY)  = 3
lseek(3, 1038, SEEK_SET)  = 1038
read(3, "\300\235", 2)= 2
lseek(3, 646144, SEEK_SET)= 646144
read(3, 
"\1\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"..., 
1024) = 1024

lseek(3, 1043, SEEK_SET)  = 1043
read(3, "w\2", 2) = 2
lseek(3, 645120, SEEK_SET)= 645120
read(3, 
"\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"..., 
1024) = 1024

lseek(3, 654336, SEEK_SET)= 654336
read(3, 
"\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"..., 
1024) = 1024

lseek(3, 983040, SEEK_SET)= 983040
read(3, 
"IFE$\245S\0\0\1\0\0\0\0\360y\0\0\360\220\260\30\237{=\23\10\17\\276\17\0"..., 
65536) = 65536

lseek(3, 917504, SEEK_SET)= 917504
read(3, 
"\377\377\377\377\377\377\377\377\377\377\377\377\377\377\377\377\377\377\377\377\377\377\377\377\377\377\377\377\377\377\377\377"..., 
65536) = 65536

lseek(3, 524288, SEEK_SET)= 524288
read(3,  )= ?
+++ killed by SIGSEGV +++


sudo x86info -a => kernel BUG at mm/usercopy.c:78!

2017-03-29 Thread Tommi Rantala

Hi,

Running:

  $ sudo x86info -a

On this HP ZBook 15 G3 laptop kills the x86info process with segfault 
and produces the following kernel BUG.


  $ git describe
  v4.11-rc4-40-gfe82203

It is also reproducible with the fedora kernel: 4.9.14-200.fc25.x86_64

Full dmesg output here: https://pastebin.com/raw/Kur2mpZq

[   51.418954] usercopy: kernel memory exposure attempt detected from 
8809 (dma-kmalloc-256) (4096 bytes)

[   51.418959] [ cut here ]
[   51.418968] kernel BUG at /home/tomranta/git/linux/mm/usercopy.c:78!
[   51.418970] invalid opcode:  [#1] SMP
[   51.418972] Modules linked in: fuse ccm ipt_REJECT nf_reject_ipv4 
xt_tcpudp tun af_packet xt_conntrack nf_conntrack libcrc32c ebtable_nat 
ebtable_broute bridge ip6table_mangle ip6table_raw iptable_mangle 
iptable_raw ebtable_filter ebtables ip6table_filter ip6_tables 
iptable_filter ip_tables x_tables nls_iso8859_1 nls_cp437 vfat fat 
dm_mirror dm_region_hash dm_log arc4 hp_wmi sparse_keymap coretemp 
kvm_intel snd_hda_codec_hdmi kvm irqbypass pcbc aesni_intel aes_x86_64 
crypto_simd cryptd glue_helper intel_cstate intel_uncore intel_rapl_perf 
iwlmvm mac80211 snd_usb_audio mousedev snd_usbmidi_lib snd_rawmidi 
input_leds snd_hda_codec_conexant snd_hda_codec_generic efivars iwlwifi 
uvcvideo videobuf2_vmalloc videobuf2_memops snd_hda_intel videobuf2_v4l2 
cfg80211 videobuf2_core snd_hda_codec snd_seq snd_hwdep
[   51.419010]  snd_seq_device snd_hda_core snd_pcm thermal hp_accel 
lis3lv02d input_polldev ac acpi_pad battery led_class evdev hp_wireless 
nfsd lockd grace sunrpc tg3 libphy crc32_pclmul crc32c_intel e1000e 
sd_mod 8021q garp stp llc mrp unix autofs4
[   51.419025] CPU: 7 PID: 2406 Comm: x86info Not tainted 
4.11.0-rc4-tommi+ #14
[   51.419027] Hardware name: HP HP ZBook 15 G3/80D5, BIOS N81 Ver. 
01.12 11/01/2016

[   51.419030] task: 88026ce84100 task.stack: c90003b94000
[   51.419035] RIP: 0010:__check_object_size+0xfd/0x195
[   51.419037] RSP: 0018:c90003b97de0 EFLAGS: 00010282
[   51.419039] RAX: 0066 RBX: 8809 RCX: 

[   51.419042] RDX: 8802bddd33e8 RSI: 8802bddcc9e8 RDI: 
8802bddcc9e8
[   51.419044] RBP: c90003b97e00 R08: 0006648a R09: 
048b
[   51.419046] R10: 0100 R11: 81e9a86d R12: 
1000
[   51.419049] R13: 0001 R14: 88091000 R15: 
8809
[   51.419051] FS:  7f8323436b40() GS:8802bddc() 
knlGS:

[   51.419054] CS:  0010 DS:  ES:  CR0: 80050033
[   51.419056] CR2: 7ffcbec21000 CR3: 00026c8e8000 CR4: 
003406a0
[   51.419058] DR0:  DR1:  DR2: 

[   51.419061] DR3:  DR6: fffe0ff0 DR7: 
0400

[   51.419063] Call Trace:
[   51.419066]  read_mem+0x70/0x120
[   51.419069]  __vfs_read+0x28/0x130
[   51.419072]  ? security_file_permission+0x9b/0xb0
[   51.419075]  ? rw_verify_area+0x4e/0xb0
[   51.419077]  vfs_read+0x96/0x130
[   51.419079]  SyS_read+0x46/0xb0
[   51.419082]  ? SyS_lseek+0x87/0xb0
[   51.419085]  entry_SYSCALL_64_fastpath+0x1a/0xa9
[   51.419087] RIP: 0033:0x7f8322d56bd0
[   51.419089] RSP: 002b:7ffcbec11c68 EFLAGS: 0246 ORIG_RAX: 

[   51.419091] RAX: ffda RBX: 0006 RCX: 
7f8322d56bd0
[   51.419094] RDX: 0001 RSI: 7ffcbec11ca0 RDI: 
0003
[   51.419096] RBP: 0008 R08: 0005 R09: 
0050
[   51.419098] R10:  R11: 0246 R12: 
02231c00
[   51.419100] R13: 7ffcbec11c9e R14: 7ffcbec51cf8 R15: 

[   51.419103] Code: a8 81 48 c7 c2 29 69 a4 81 48 c7 c6 82 89 a5 81 48 
0f 45 d0 48 c7 c0 1a 1e a6 81 48 c7 c7 d0 ed a5 81 48 0f 45 f0 e8 7f 74 
f8 ff <0f> 0b 48 89 df e8 29 98 e8 ff 84 c0 0f 84 3a ff ff ff b8 00 00

[   51.419123] RIP: __check_object_size+0xfd/0x195 RSP: c90003b97de0
[   51.421565] ---[ end trace 441f7992ca25e39d ]---


[tip:perf/core] perf utils: Readlink /proc/self/exe to find the perf binary

2017-03-27 Thread tip-bot for Tommi Rantala
Commit-ID:  55f77128e7652e537d6c226d5b56821cdb5c22de
Gitweb: http://git.kernel.org/tip/55f77128e7652e537d6c226d5b56821cdb5c22de
Author: Tommi Rantala 
AuthorDate: Wed, 22 Mar 2017 15:06:24 +0200
Committer:  Arnaldo Carvalho de Melo 
CommitDate: Mon, 27 Mar 2017 15:37:54 -0300

perf utils: Readlink /proc/self/exe to find the perf binary

Simplification: it is easier to open /proc/self/exe than /proc/$pid/exe.

Signed-off-by: Tommi Rantala 
Cc: Alexander Shishkin 
Cc: Peter Zijlstra 
Link: http://lkml.kernel.org/r/20170322130624.21881-7-tommi.t.rant...@nokia.com
Signed-off-by: Arnaldo Carvalho de Melo 
---
 tools/perf/util/header.c | 8 ++--
 1 file changed, 2 insertions(+), 6 deletions(-)

diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c
index cf22962..ef09f26 100644
--- a/tools/perf/util/header.c
+++ b/tools/perf/util/header.c
@@ -370,15 +370,11 @@ static int write_cmdline(int fd, struct perf_header *h 
__maybe_unused,
 struct perf_evlist *evlist __maybe_unused)
 {
char buf[MAXPATHLEN];
-   char proc[32];
u32 n;
int i, ret;
 
-   /*
-* actual atual path to perf binary
-*/
-   sprintf(proc, "/proc/%d/exe", getpid());
-   ret = readlink(proc, buf, sizeof(buf) - 1);
+   /* actual path to perf binary */
+   ret = readlink("/proc/self/exe", buf, sizeof(buf) - 1);
if (ret <= 0)
return -1;
 


[tip:perf/core] perf buildid: Do not assume that readlink() returns a null terminated string

2017-03-27 Thread tip-bot for Tommi Rantala
Commit-ID:  5a2342111c68e623e27ee7ea3d0492d8dad6bda0
Gitweb: http://git.kernel.org/tip/5a2342111c68e623e27ee7ea3d0492d8dad6bda0
Author: Tommi Rantala 
AuthorDate: Wed, 22 Mar 2017 15:06:20 +0200
Committer:  Arnaldo Carvalho de Melo 
CommitDate: Mon, 27 Mar 2017 15:35:06 -0300

perf buildid: Do not assume that readlink() returns a null terminated string

Valgrind was complaining:

  $ valgrind ./perf list >/dev/null
  ==11643== Memcheck, a memory error detector
  ==11643== Copyright (C) 2002-2015, and GNU GPL'd, by Julian Seward et al.
  ==11643== Using Valgrind-3.12.0 and LibVEX; rerun with -h for copyright info
  ==11643== Command: ./perf list
  ==11643==
  ==11643== Conditional jump or move depends on uninitialised value(s)
  ==11643==at 0x4C30620: rindex (vg_replace_strmem.c:199)
  ==11643==by 0x49DAA9: build_id_cache__origname (build-id.c:198)
  ==11643==by 0x49E1C7: build_id_cache__valid_id (build-id.c:222)
  ==11643==by 0x49E1C7: build_id_cache__list_all (build-id.c:507)
  ==11643==by 0x4B9C8F: print_sdt_events (parse-events.c:2067)
  ==11643==by 0x4BB0B3: print_events (parse-events.c:2313)
  ==11643==by 0x439501: cmd_list (builtin-list.c:53)
  ==11643==by 0x497150: run_builtin (perf.c:359)
  ==11643==by 0x428CE0: handle_internal_command (perf.c:421)
  ==11643==by 0x428CE0: run_argv (perf.c:467)
  ==11643==by 0x428CE0: main (perf.c:614)
  [...]

Additionally, a zero length result from readlink() is not very interesting.

Signed-off-by: Tommi Rantala 
Cc: Alexander Shishkin 
Cc: Peter Zijlstra 
Link: http://lkml.kernel.org/r/20170322130624.21881-3-tommi.t.rant...@nokia.com
Signed-off-by: Arnaldo Carvalho de Melo 
---
 tools/perf/util/build-id.c | 6 +-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/tools/perf/util/build-id.c b/tools/perf/util/build-id.c
index 234859f..33af675 100644
--- a/tools/perf/util/build-id.c
+++ b/tools/perf/util/build-id.c
@@ -182,13 +182,17 @@ char *build_id_cache__origname(const char *sbuild_id)
char buf[PATH_MAX];
char *ret = NULL, *p;
size_t offs = 5;/* == strlen("../..") */
+   ssize_t len;
 
linkname = build_id_cache__linkname(sbuild_id, NULL, 0);
if (!linkname)
return NULL;
 
-   if (readlink(linkname, buf, PATH_MAX) < 0)
+   len = readlink(linkname, buf, sizeof(buf) - 1);
+   if (len <= 0)
goto out;
+   buf[len] = '\0';
+
/* The link should be "../../" */
p = strrchr(buf, '/');  /* Cut off the "/" */
if (p && (p > buf + offs)) {


[tip:perf/core] perf utils: Null terminate buf in read_ftrace_printk()

2017-03-27 Thread tip-bot for Tommi Rantala
Commit-ID:  d4b364df5f6540e8d6a38008ce2693ba73a8508a
Gitweb: http://git.kernel.org/tip/d4b364df5f6540e8d6a38008ce2693ba73a8508a
Author: Tommi Rantala 
AuthorDate: Wed, 22 Mar 2017 15:06:23 +0200
Committer:  Arnaldo Carvalho de Melo 
CommitDate: Mon, 27 Mar 2017 15:37:35 -0300

perf utils: Null terminate buf in read_ftrace_printk()

Ensure that the string that we read from the data file is null terminated.

Valgrind was complaining:

  ==31357== Invalid read of size 1
  ==31357==at 0x4EC8C1: __strtok_r_1c (string2.h:200)
  ==31357==by 0x4EC8C1: parse_ftrace_printk (trace-event-parse.c:161)
  ==31357==by 0x4F82A8: read_ftrace_printk (trace-event-read.c:204)
  ==31357==by 0x4F82A8: trace_report (trace-event-read.c:468)
  ==31357==by 0x4CD552: process_tracing_data (header.c:1576)
  ==31357==by 0x4D3397: perf_file_section__process (header.c:2705)
  ==31357==by 0x4D3397: perf_header__process_sections (header.c:2488)
  ==31357==by 0x4D3397: perf_session__read_header (header.c:2925)
  ==31357==by 0x4E71E2: perf_session__open (session.c:32)
  ==31357==by 0x4E71E2: perf_session__new (session.c:139)
  ==31357==by 0x429F5D: cmd_annotate (builtin-annotate.c:472)
  ==31357==by 0x497150: run_builtin (perf.c:359)
  ==31357==by 0x428CE0: handle_internal_command (perf.c:421)
  ==31357==by 0x428CE0: run_argv (perf.c:467)
  ==31357==by 0x428CE0: main (perf.c:614)
  ==31357==  Address 0x8ac0efb is 0 bytes after a block of size 1,963 alloc'd
  ==31357==at 0x4C2DB9D: malloc (vg_replace_malloc.c:299)
  ==31357==by 0x4F827B: read_ftrace_printk (trace-event-read.c:195)
  ==31357==by 0x4F827B: trace_report (trace-event-read.c:468)
  ==31357==by 0x4CD552: process_tracing_data (header.c:1576)
  ==31357==by 0x4D3397: perf_file_section__process (header.c:2705)
  ==31357==by 0x4D3397: perf_header__process_sections (header.c:2488)
  ==31357==by 0x4D3397: perf_session__read_header (header.c:2925)
  ==31357==by 0x4E71E2: perf_session__open (session.c:32)
  ==31357==by 0x4E71E2: perf_session__new (session.c:139)
  ==31357==by 0x429F5D: cmd_annotate (builtin-annotate.c:472)
  ==31357==by 0x497150: run_builtin (perf.c:359)
  ==31357==by 0x428CE0: handle_internal_command (perf.c:421)
  ==31357==by 0x428CE0: run_argv (perf.c:467)
  ==31357==by 0x428CE0: main (perf.c:614)

Signed-off-by: Tommi Rantala 
Cc: Alexander Shishkin 
Cc: Peter Zijlstra 
Link: http://lkml.kernel.org/r/20170322130624.21881-6-tommi.t.rant...@nokia.com
Signed-off-by: Arnaldo Carvalho de Melo 
---
 tools/perf/util/trace-event-read.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/tools/perf/util/trace-event-read.c 
b/tools/perf/util/trace-event-read.c
index 2742015..8a9a677 100644
--- a/tools/perf/util/trace-event-read.c
+++ b/tools/perf/util/trace-event-read.c
@@ -192,7 +192,7 @@ static int read_ftrace_printk(struct pevent *pevent)
if (!size)
return 0;
 
-   buf = malloc(size);
+   buf = malloc(size + 1);
if (buf == NULL)
return -1;
 
@@ -201,6 +201,8 @@ static int read_ftrace_printk(struct pevent *pevent)
return -1;
}
 
+   buf[size] = '\0';
+
parse_ftrace_printk(pevent, buf, size);
 
free(buf);


[tip:perf/core] perf tests: Do not assume that readlink() returns a null terminated string

2017-03-27 Thread tip-bot for Tommi Rantala
Commit-ID:  0e6ba11511aef91ba8e2528ddc681d88922d7b0b
Gitweb: http://git.kernel.org/tip/0e6ba11511aef91ba8e2528ddc681d88922d7b0b
Author: Tommi Rantala 
AuthorDate: Wed, 22 Mar 2017 15:06:21 +0200
Committer:  Arnaldo Carvalho de Melo 
CommitDate: Mon, 27 Mar 2017 15:35:56 -0300

perf tests: Do not assume that readlink() returns a null terminated string

Ensure that the string in buf is null terminated.

Signed-off-by: Tommi Rantala 
Cc: Alexander Shishkin 
Cc: Peter Zijlstra 
Link: http://lkml.kernel.org/r/20170322130624.21881-4-tommi.t.rant...@nokia.com
Signed-off-by: Arnaldo Carvalho de Melo 
---
 tools/perf/tests/sdt.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/perf/tests/sdt.c b/tools/perf/tests/sdt.c
index f59d210..26e5b7a 100644
--- a/tools/perf/tests/sdt.c
+++ b/tools/perf/tests/sdt.c
@@ -43,7 +43,7 @@ static char *get_self_path(void)
 {
char *buf = calloc(PATH_MAX, sizeof(char));
 
-   if (buf && readlink("/proc/self/exe", buf, PATH_MAX) < 0) {
+   if (buf && readlink("/proc/self/exe", buf, PATH_MAX - 1) < 0) {
pr_debug("Failed to get correct path of perf\n");
free(buf);
return NULL;


[tip:perf/core] perf utils: use sizeof(buf) - 1 in readlink() call

2017-03-27 Thread tip-bot for Tommi Rantala
Commit-ID:  b7126ef78612a3d4a37aadf39125cff048cebb9b
Gitweb: http://git.kernel.org/tip/b7126ef78612a3d4a37aadf39125cff048cebb9b
Author: Tommi Rantala 
AuthorDate: Wed, 22 Mar 2017 15:06:22 +0200
Committer:  Arnaldo Carvalho de Melo 
CommitDate: Mon, 27 Mar 2017 15:36:27 -0300

perf utils: use sizeof(buf) - 1 in readlink() call

Ensure that we have space for the null byte in buf.

Signed-off-by: Tommi Rantala 
Cc: Alexander Shishkin 
Cc: Peter Zijlstra 
Link: http://lkml.kernel.org/r/20170322130624.21881-5-tommi.t.rant...@nokia.com
Signed-off-by: Arnaldo Carvalho de Melo 
---
 tools/perf/util/header.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c
index 05714d5..cf22962 100644
--- a/tools/perf/util/header.c
+++ b/tools/perf/util/header.c
@@ -378,7 +378,7 @@ static int write_cmdline(int fd, struct perf_header *h 
__maybe_unused,
 * actual atual path to perf binary
 */
sprintf(proc, "/proc/%d/exe", getpid());
-   ret = readlink(proc, buf, sizeof(buf));
+   ret = readlink(proc, buf, sizeof(buf) - 1);
if (ret <= 0)
return -1;
 


[tip:perf/core] perf buildid: Do not update SDT cache with null filename

2017-03-27 Thread tip-bot for Tommi Rantala
Commit-ID:  2ccc220238680642be87a2d010ce07f1c40edafb
Gitweb: http://git.kernel.org/tip/2ccc220238680642be87a2d010ce07f1c40edafb
Author: Tommi Rantala 
AuthorDate: Wed, 22 Mar 2017 15:06:19 +0200
Committer:  Arnaldo Carvalho de Melo 
CommitDate: Mon, 27 Mar 2017 15:33:36 -0300

perf buildid: Do not update SDT cache with null filename

Valgrind was complaining:

  ==2633== Syscall param open(filename) points to unaddressable byte(s)
  ==2633==at 0x5281CC0: __open_nocancel (syscall-template.S:84)
  ==2633==by 0x537D38: open (fcntl2.h:53)
  ==2633==by 0x537D38: get_sdt_note_list (symbol-elf.c:2017)
  ==2633==by 0x5396FD: probe_cache__scan_sdt (probe-file.c:700)
  ==2633==by 0x49EA2C: build_id_cache__add_sdt_cache (build-id.c:625)
  ==2633==by 0x49EA2C: build_id_cache__add_s (build-id.c:697)
  ==2633==by 0x49EE72: build_id_cache__add_b (build-id.c:717)
  ==2633==by 0x49EE72: dso__cache_build_id (build-id.c:782)
  ==2633==by 0x49F190: __dsos__cache_build_ids (build-id.c:793)
  ==2633==by 0x49F190: machine__cache_build_ids (build-id.c:801)
  ==2633==by 0x49F190: perf_session__cache_build_ids (build-id.c:815)
  ==2633==by 0x4CD4F2: write_build_id (header.c:165)
  ==2633==by 0x4D26F7: do_write_feat (header.c:2296)
  ==2633==by 0x4D26F7: perf_header__adds_write (header.c:2335)
  ==2633==by 0x4D26F7: perf_session__write_header (header.c:2414)
  ==2633==by 0x43B324: __cmd_record (builtin-record.c:1154)
  ==2633==by 0x43B324: cmd_record (builtin-record.c:1839)
  ==2633==by 0x455A07: __cmd_record (builtin-kmem.c:1868)
  ==2633==by 0x455A07: cmd_kmem (builtin-kmem.c:1944)
  ==2633==by 0x497150: run_builtin (perf.c:359)
  ==2633==by 0x428CE0: handle_internal_command (perf.c:421)
  ==2633==by 0x428CE0: run_argv (perf.c:467)
  ==2633==by 0x428CE0: main (perf.c:614)
  ==2633==  Address 0x0 is not stack'd, malloc'd or (recently) free'd

Signed-off-by: Tommi Rantala 
Cc: Alexander Shishkin 
Cc: Peter Zijlstra 
Cc: Tommi Rantala 
Link: http://lkml.kernel.org/r/20170322130624.21881-2-tommi.t.rant...@nokia.com
Signed-off-by: Arnaldo Carvalho de Melo 
---
 tools/perf/util/build-id.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/perf/util/build-id.c b/tools/perf/util/build-id.c
index e528c40..234859f 100644
--- a/tools/perf/util/build-id.c
+++ b/tools/perf/util/build-id.c
@@ -690,7 +690,7 @@ int build_id_cache__add_s(const char *sbuild_id, const char 
*name,
err = 0;
 
/* Update SDT cache : error is just warned */
-   if (build_id_cache__add_sdt_cache(sbuild_id, realname) < 0)
+   if (realname && build_id_cache__add_sdt_cache(sbuild_id, realname) < 0)
pr_debug4("Failed to update/scan SDT cache for %s\n", realname);
 
 out_free:


[PATCH 1/6] perf buildid: do not update SDT cache with null filename

2017-03-22 Thread Tommi Rantala
Valgrind was complaining:

  ==2633== Syscall param open(filename) points to unaddressable byte(s)
  ==2633==at 0x5281CC0: __open_nocancel (syscall-template.S:84)
  ==2633==by 0x537D38: open (fcntl2.h:53)
  ==2633==by 0x537D38: get_sdt_note_list (symbol-elf.c:2017)
  ==2633==by 0x5396FD: probe_cache__scan_sdt (probe-file.c:700)
  ==2633==by 0x49EA2C: build_id_cache__add_sdt_cache (build-id.c:625)
  ==2633==by 0x49EA2C: build_id_cache__add_s (build-id.c:697)
  ==2633==by 0x49EE72: build_id_cache__add_b (build-id.c:717)
  ==2633==by 0x49EE72: dso__cache_build_id (build-id.c:782)
  ==2633==by 0x49F190: __dsos__cache_build_ids (build-id.c:793)
  ==2633==by 0x49F190: machine__cache_build_ids (build-id.c:801)
  ==2633==by 0x49F190: perf_session__cache_build_ids (build-id.c:815)
  ==2633==by 0x4CD4F2: write_build_id (header.c:165)
  ==2633==by 0x4D26F7: do_write_feat (header.c:2296)
  ==2633==by 0x4D26F7: perf_header__adds_write (header.c:2335)
  ==2633==by 0x4D26F7: perf_session__write_header (header.c:2414)
  ==2633==by 0x43B324: __cmd_record (builtin-record.c:1154)
  ==2633==by 0x43B324: cmd_record (builtin-record.c:1839)
  ==2633==by 0x455A07: __cmd_record (builtin-kmem.c:1868)
  ==2633==by 0x455A07: cmd_kmem (builtin-kmem.c:1944)
  ==2633==by 0x497150: run_builtin (perf.c:359)
  ==2633==by 0x428CE0: handle_internal_command (perf.c:421)
  ==2633==by 0x428CE0: run_argv (perf.c:467)
  ==2633==by 0x428CE0: main (perf.c:614)
  ==2633==  Address 0x0 is not stack'd, malloc'd or (recently) free'd

Signed-off-by: Tommi Rantala 
---
 tools/perf/util/build-id.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/perf/util/build-id.c b/tools/perf/util/build-id.c
index e528c40..234859f 100644
--- a/tools/perf/util/build-id.c
+++ b/tools/perf/util/build-id.c
@@ -690,7 +690,7 @@ int build_id_cache__add_s(const char *sbuild_id, const char 
*name,
err = 0;
 
/* Update SDT cache : error is just warned */
-   if (build_id_cache__add_sdt_cache(sbuild_id, realname) < 0)
+   if (realname && build_id_cache__add_sdt_cache(sbuild_id, realname) < 0)
pr_debug4("Failed to update/scan SDT cache for %s\n", realname);
 
 out_free:
-- 
2.9.3



[PATCH 2/6] perf buildid: do not assume that readlink() returns a null terminated string

2017-03-22 Thread Tommi Rantala
Valgrind was complaining:

  $ valgrind ./perf list >/dev/null
  ==11643== Memcheck, a memory error detector
  ==11643== Copyright (C) 2002-2015, and GNU GPL'd, by Julian Seward et al.
  ==11643== Using Valgrind-3.12.0 and LibVEX; rerun with -h for copyright info
  ==11643== Command: ./perf list
  ==11643==
  ==11643== Conditional jump or move depends on uninitialised value(s)
  ==11643==at 0x4C30620: rindex (vg_replace_strmem.c:199)
  ==11643==by 0x49DAA9: build_id_cache__origname (build-id.c:198)
  ==11643==by 0x49E1C7: build_id_cache__valid_id (build-id.c:222)
  ==11643==by 0x49E1C7: build_id_cache__list_all (build-id.c:507)
  ==11643==by 0x4B9C8F: print_sdt_events (parse-events.c:2067)
  ==11643==by 0x4BB0B3: print_events (parse-events.c:2313)
  ==11643==by 0x439501: cmd_list (builtin-list.c:53)
  ==11643==by 0x497150: run_builtin (perf.c:359)
  ==11643==by 0x428CE0: handle_internal_command (perf.c:421)
  ==11643==by 0x428CE0: run_argv (perf.c:467)
  ==11643==by 0x428CE0: main (perf.c:614)
  [...]

Additionally, a zero length result from readlink() is not very interesting.

Signed-off-by: Tommi Rantala 
---
 tools/perf/util/build-id.c | 6 +-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/tools/perf/util/build-id.c b/tools/perf/util/build-id.c
index 234859f..9ad77b0 100644
--- a/tools/perf/util/build-id.c
+++ b/tools/perf/util/build-id.c
@@ -182,13 +182,17 @@ char *build_id_cache__origname(const char *sbuild_id)
char buf[PATH_MAX];
char *ret = NULL, *p;
size_t offs = 5;/* == strlen("../..") */
+   ssize_t len;
 
linkname = build_id_cache__linkname(sbuild_id, NULL, 0);
if (!linkname)
return NULL;
 
-   if (readlink(linkname, buf, PATH_MAX) < 0)
+   len = readlink(linkname, buf, sizeof(buf)-1);
+   if (len <= 0)
goto out;
+   buf[len] = '\0';
+
/* The link should be "../../" */
p = strrchr(buf, '/');  /* Cut off the "/" */
if (p && (p > buf + offs)) {
-- 
2.9.3



[PATCH 6/6] perf utils: readlink /proc/self/exe to find the perf binary

2017-03-22 Thread Tommi Rantala
Signed-off-by: Tommi Rantala 
---
 tools/perf/util/header.c | 8 ++--
 1 file changed, 2 insertions(+), 6 deletions(-)

diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c
index ab10e9d..c6243af 100644
--- a/tools/perf/util/header.c
+++ b/tools/perf/util/header.c
@@ -370,15 +370,11 @@ static int write_cmdline(int fd, struct perf_header *h 
__maybe_unused,
 struct perf_evlist *evlist __maybe_unused)
 {
char buf[MAXPATHLEN];
-   char proc[32];
u32 n;
int i, ret;
 
-   /*
-* actual atual path to perf binary
-*/
-   sprintf(proc, "/proc/%d/exe", getpid());
-   ret = readlink(proc, buf, sizeof(buf)-1);
+   /* actual path to perf binary */
+   ret = readlink("/proc/self/exe", buf, sizeof(buf)-1);
if (ret <= 0)
return -1;
 
-- 
2.9.3



[PATCH 4/6] perf utils: use sizeof(buf)-1 in readlink() call

2017-03-22 Thread Tommi Rantala
Ensure that we have space for the null byte in buf.

Signed-off-by: Tommi Rantala 
---
 tools/perf/util/header.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c
index 05714d5..ab10e9d 100644
--- a/tools/perf/util/header.c
+++ b/tools/perf/util/header.c
@@ -378,7 +378,7 @@ static int write_cmdline(int fd, struct perf_header *h 
__maybe_unused,
 * actual atual path to perf binary
 */
sprintf(proc, "/proc/%d/exe", getpid());
-   ret = readlink(proc, buf, sizeof(buf));
+   ret = readlink(proc, buf, sizeof(buf)-1);
if (ret <= 0)
return -1;
 
-- 
2.9.3



[PATCH 5/6] perf utils: null terminate buf in read_ftrace_printk()

2017-03-22 Thread Tommi Rantala
Ensure that the string that we read from the data file is null terminated.

Valgrind was complaining:

  ==31357== Invalid read of size 1
  ==31357==at 0x4EC8C1: __strtok_r_1c (string2.h:200)
  ==31357==by 0x4EC8C1: parse_ftrace_printk (trace-event-parse.c:161)
  ==31357==by 0x4F82A8: read_ftrace_printk (trace-event-read.c:204)
  ==31357==by 0x4F82A8: trace_report (trace-event-read.c:468)
  ==31357==by 0x4CD552: process_tracing_data (header.c:1576)
  ==31357==by 0x4D3397: perf_file_section__process (header.c:2705)
  ==31357==by 0x4D3397: perf_header__process_sections (header.c:2488)
  ==31357==by 0x4D3397: perf_session__read_header (header.c:2925)
  ==31357==by 0x4E71E2: perf_session__open (session.c:32)
  ==31357==by 0x4E71E2: perf_session__new (session.c:139)
  ==31357==by 0x429F5D: cmd_annotate (builtin-annotate.c:472)
  ==31357==by 0x497150: run_builtin (perf.c:359)
  ==31357==by 0x428CE0: handle_internal_command (perf.c:421)
  ==31357==by 0x428CE0: run_argv (perf.c:467)
  ==31357==by 0x428CE0: main (perf.c:614)
  ==31357==  Address 0x8ac0efb is 0 bytes after a block of size 1,963 alloc'd
  ==31357==at 0x4C2DB9D: malloc (vg_replace_malloc.c:299)
  ==31357==by 0x4F827B: read_ftrace_printk (trace-event-read.c:195)
  ==31357==by 0x4F827B: trace_report (trace-event-read.c:468)
  ==31357==by 0x4CD552: process_tracing_data (header.c:1576)
  ==31357==by 0x4D3397: perf_file_section__process (header.c:2705)
  ==31357==by 0x4D3397: perf_header__process_sections (header.c:2488)
  ==31357==by 0x4D3397: perf_session__read_header (header.c:2925)
  ==31357==by 0x4E71E2: perf_session__open (session.c:32)
  ==31357==by 0x4E71E2: perf_session__new (session.c:139)
  ==31357==by 0x429F5D: cmd_annotate (builtin-annotate.c:472)
  ==31357==by 0x497150: run_builtin (perf.c:359)
  ==31357==by 0x428CE0: handle_internal_command (perf.c:421)
  ==31357==by 0x428CE0: run_argv (perf.c:467)
  ==31357==by 0x428CE0: main (perf.c:614)

Signed-off-by: Tommi Rantala 
---
 tools/perf/util/trace-event-read.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/tools/perf/util/trace-event-read.c 
b/tools/perf/util/trace-event-read.c
index 2742015..04605c0 100644
--- a/tools/perf/util/trace-event-read.c
+++ b/tools/perf/util/trace-event-read.c
@@ -192,7 +192,7 @@ static int read_ftrace_printk(struct pevent *pevent)
if (!size)
return 0;
 
-   buf = malloc(size);
+   buf = malloc(size+1);
if (buf == NULL)
return -1;
 
@@ -201,6 +201,8 @@ static int read_ftrace_printk(struct pevent *pevent)
return -1;
}
 
+   buf[size] = '\0';
+
parse_ftrace_printk(pevent, buf, size);
 
free(buf);
-- 
2.9.3



[PATCH 3/6] perf tests: do not assume that readlink() returns a null terminated string

2017-03-22 Thread Tommi Rantala
Ensure that the string in buf is null terminated.

Signed-off-by: Tommi Rantala 
---
 tools/perf/tests/sdt.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/perf/tests/sdt.c b/tools/perf/tests/sdt.c
index f59d210..121949a 100644
--- a/tools/perf/tests/sdt.c
+++ b/tools/perf/tests/sdt.c
@@ -43,7 +43,7 @@ static char *get_self_path(void)
 {
char *buf = calloc(PATH_MAX, sizeof(char));
 
-   if (buf && readlink("/proc/self/exe", buf, PATH_MAX) < 0) {
+   if (buf && readlink("/proc/self/exe", buf, PATH_MAX-1) < 0) {
pr_debug("Failed to get correct path of perf\n");
free(buf);
return NULL;
-- 
2.9.3



[PATCH 0/6] perf string handling fixes

2017-03-22 Thread Tommi Rantala
Hi,

Some small perf fixes, mostly caught with valgrind.

The last patch is a simplification: it is easier to open /proc/self/exe
than /proc/$pid/exe.

Tommi Rantala (6):
  perf buildid: do not update SDT cache with null filename
  perf buildid: do not assume that readlink() returns a null terminated
string
  perf tests: do not assume that readlink() returns a null terminated
string
  perf utils: use sizeof(buf)-1 in readlink() call
  perf utils: null terminate buf in read_ftrace_printk()
  perf utils: readlink /proc/self/exe to find the perf binary

 tools/perf/tests/sdt.c | 2 +-
 tools/perf/util/build-id.c | 8 ++--
 tools/perf/util/header.c   | 8 ++--
 tools/perf/util/trace-event-read.c | 4 +++-
 4 files changed, 12 insertions(+), 10 deletions(-)

-- 
2.9.3



Re: nouveau: iowrite32 oops & warning at drivers/gpu/drm/nouveau/nouveau_fence.c:198

2015-11-23 Thread Tommi Rantala
2015-11-22 22:49 GMT+02:00 Ilia Mirkin :
> Not sure if these apply here but there are a couple of outstanding
> locking fixes available in
> http://cgit.freedesktop.org/~darktama/nouveau/ -- specifically these
> two:
>
> http://cgit.freedesktop.org/~darktama/nouveau/commit/?id=2f3a56ad019e378a352e9cb7a559f478826f1a87
> http://cgit.freedesktop.org/~darktama/nouveau/commit/?id=4179b15c6e9fcfb253e811e5477debe46c84c395
>
> Not sure if they affect this particular issue, but thought I'd point
> it out. Are you fuzzing with multiple threads, or just one at a time?
> Do you have a branch somewhere public with the changes to add nouveau
> ioctl support to trinity?

Hi!

I applied those two on top of v4.4-rc2, but the same warning and oops
are still easily reproducible. I can test with older kernels and/or
try to bisect when I have more time, unless anyone has better ideas.

I'm actually running unmodified trinity, and for this purpose only
fuzzing the ioctl() syscall from multiple processes, and opening only
the files from /dev/dri/:

$ ./trinity -q -loff -C20 -c ioctl -V /dev/dri/

Trinity knows about a bunch of DRM ioctl commands, but the rest of the
ioctl arguments will be garbage:
https://github.com/kernelslacker/trinity/blob/master/ioctls/drm.c

Tommi
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


nouveau: iowrite32 oops & warning at drivers/gpu/drm/nouveau/nouveau_fence.c:198

2015-11-22 Thread Tommi Rantala
Hello,

I'm seeing this warning and oops when trying to fuzz linus
v4.4-rc1-290-g3ad5d7e with trinity.

[ cut here ]
WARNING: CPU: 1 PID: 4308 at
drivers/gpu/drm/nouveau/nouveau_fence.c:198
nouveau_fence_context_new+0x22e/0x270()
CPU: 1 PID: 4308 Comm: trinity-c19 Tainted: GB   4.4.0-rc1+ #1
Hardware name: Dell Inc. OptiPlex 990/0D6H9T, BIOS A06 07/25/2011
0009 8800ac4bf8b0 818a802e 
8800ac4bf8f0 8111bc75 820170be 8800ac9c1050
88015b0e8cc0 8800ca703070 88016f432130 8800afb675f0
Call Trace:
[] dump_stack+0x4b/0x6d
[] warn_slowpath_common+0x95/0xd0
[] ? nouveau_fence_context_new+0x22e/0x270
[] warn_slowpath_null+0x15/0x20
[] nouveau_fence_context_new+0x22e/0x270
[] ? nouveau_fence_context_free+0x20/0x20
[] ? kasan_kmalloc+0x5e/0x70
[] ? kmem_cache_alloc_trace+0x123/0x290
[] ? nv84_fence_context_new+0x73/0x2d0
[] nv84_fence_context_new+0x9d/0x2d0
[] nvc0_fence_context_new+0x11/0x70
[] nouveau_channel_new+0x7e1/0xca0
[] ? create_object+0x28c/0x4d0
[] ? nouveau_channel_prep+0x4b0/0x4b0
[] ? create_object+0x406/0x4d0
[] ? kmemleak_disable+0x70/0x70
[] ? nouveau_abi16_get+0x37/0x80
[] ? trace_hardirqs_on_caller+0x16/0x280
[] ? kasan_unpoison_shadow+0x36/0x50
[] ? kasan_unpoison_shadow+0x36/0x50
[] ? kmem_cache_alloc_trace+0x123/0x290
[] ? nouveau_abi16_ioctl_channel_alloc+0xec/0x4d0
[] nouveau_abi16_ioctl_channel_alloc+0x1ae/0x4d0
[] ? nouveau_abi16_ioctl_setparam+0x10/0x10
[] drm_ioctl+0x20b/0x6b0
[] ? debug_lockdep_rcu_enabled+0x26/0x40
[] ? drm_ioctl_permit+0x120/0x120
[] ? trace_hardirqs_on+0xd/0x10
[] nouveau_drm_ioctl+0x99/0x110
[] do_vfs_ioctl+0x4ae/0x800
[] ? selinux_file_ioctl+0x1f9/0x2d0
[] ? ioctl_preallocate+0x140/0x140
[] ? selinux_parse_skb.constprop.42+0x7c0/0x7c0
[] ? do_setitimer+0x28b/0x420
[] ? alarm_setitimer+0x85/0xe0
[] ? do_setitimer+0x420/0x420
[] SyS_ioctl+0x74/0x80
[] entry_SYSCALL_64_fastpath+0x12/0x6f
---[ end trace 780046761495600a ]---



BUG: unable to handle kernel paging request at c9000188
IP: [] iowrite32+0x2e/0x40
PGD 174f36067 PUD 174f37067 PMD 16a13b067 PTE 0
Oops: 0002 [#1] SMP KASAN
CPU: 3 PID: 4638 Comm: trinity-c19 Tainted: GB   W   4.4.0-rc1+ #1
Hardware name: Dell Inc. OptiPlex 990/0D6H9T, BIOS A06 07/25/2011
task: 8800ab8e1a00 ti: 8800ab6c task.ti: 8800ab6c
RIP: 0010:[]  [] iowrite32+0x2e/0x40
RSP: 0018:8800ab6c75f8  EFLAGS: 00010292
RAX: ed001940c2b8 RBX: c9000188 RCX: 8800ca0615c7
RDX:  RSI: c9000188 RDI: 
RBP: 8800ab6c7618 R08:  R09: 
R10:  R11: ed001443979e R12: 8800ca061590
R13:  R14: 880152553390 R15: 833e0220
FS:  7f48be30a700() GS:88017580() knlGS:
CS:  0010 DS:  ES:  CR0: 80050033
CR2: c9000188 CR3: ab455000 CR4: 000406e0
Stack:
81f58f8b 00080004 8800ca061590 880169d3a4c0
8800ab6c76b8 81fa3b68  0141c180
0001 88006000 1000 8800ab6c7820
Call Trace:
[] ? nvkm_instobj_wr32+0x2b/0x40
[] gf100_fifo_gpfifo_new+0x528/0xa50
[] nvkm_fifo_class_new+0x58/0x70
[] ? nvkm_fifo_uevent_ctor+0x50/0x50
[] nvkm_udevice_child_new+0x63/0x80
[] nvkm_ioctl_new+0x2aa/0x5e0
[] ? create_object+0x8c/0x4d0
[] ? save_stack_address+0x6a/0x70
[] ? nvkm_ioctl_del+0x110/0x110
[] ? nvkm_udevice_map+0x90/0x90
[] ? create_object+0x406/0x4d0
[] nvkm_ioctl+0x1fb/0x510
[] ? trace_hardirqs_on_caller+0x16/0x280
[] ? nvkm_client_map+0x10/0x10
[] nvkm_client_ioctl+0xd/0x10
[] nvif_object_ioctl+0xad/0xe0
[] nvif_object_init+0x160/0x1e0
[] nouveau_channel_new+0x18d/0xca0
[] ? create_object+0x28c/0x4d0
[] ? nouveau_channel_prep+0x4b0/0x4b0
[] ? create_object+0x406/0x4d0
[] ? kmemleak_disable+0x70/0x70
[] ? nouveau_abi16_get+0x37/0x80
[] ? trace_hardirqs_on_caller+0x16/0x280
[] ? kasan_unpoison_shadow+0x36/0x50
[] ? kasan_unpoison_shadow+0x36/0x50
[] ? kmem_cache_alloc_trace+0x123/0x290
[] ? nouveau_abi16_ioctl_channel_alloc+0xec/0x4d0
[] nouveau_abi16_ioctl_channel_alloc+0x1ae/0x4d0
[] ? nouveau_abi16_ioctl_setparam+0x10/0x10
[] drm_ioctl+0x20b/0x6b0
[] ? debug_lockdep_rcu_enabled+0x26/0x40
[] ? drm_ioctl_permit+0x120/0x120
[] ? trace_hardirqs_on+0xd/0x10
[] nouveau_drm_ioctl+0x99/0x110
[] do_vfs_ioctl+0x4ae/0x800
[] ? selinux_file_ioctl+0x1f9/0x2d0
 [] ? ioctl_preallocate+0x140/0x140
[] ? selinux_parse_skb.constprop.42+0x7c0/0x7c0
[] ? do_setitimer+0x28b/0x420
[] ? alarm_setitimer+0x85/0xe0
[] ? do_setitimer+0x420/0x420
[] SyS_ioctl+0x74/0x80
[] entry_SYSCALL_64_fastpath+0x12/0x6f
Code: ff ff 03 00 77 25 48 81 fe 00 00 01 00 76 07 0f b7 d6 89 f8 ef
c3 55 48 89 f7 48 c7 c6 e0 39 1e 83 48 89 e5 e8 84 fd ff ff 5d c3 <89>
3e c3 66 66 66 66 66 66 2e 0f 1f 84 00 00 00 00 00 48 81 fe
RIP  [] iowrite32+0x2e/0x40
RSP 
CR2: c9000188
---[ end tr

nouveau: BUG: KASAN: slab-out-of-bounds in memcpy+0x1d/0x40

2015-11-22 Thread Tommi Rantala
Hello,

I'm seeing this kasan report after booting with linus v4.4-rc1-290-g3ad5d7e.

BUG: KASAN: slab-out-of-bounds in memcpy+0x1d/0x40 at addr 880169e21fd0
Read of size 64 by task kworker/1:0/14
=
BUG kmalloc-8192 (Not tainted): kasan: bad access detected
-
Disabling lock debugging due to kernel taint
INFO: Allocated in register_framebuffer+0x466/0x550 age=30792 cpu=1 pid=1
   ___slab_alloc+0x53b/0x560
   __slab_alloc+0x3e/0x70
   kmem_cache_alloc_trace+0x20f/0x290
   register_framebuffer+0x466/0x550
   drm_fb_helper_initial_config+0x5a1/0x800
   nouveau_fbcon_init+0x148/0x180
   nouveau_drm_load+0x583/0xf30
   drm_dev_register+0xb9/0xd0
   drm_get_pci_dev+0x176/0x370
   nouveau_drm_probe+0x2f2/0x3c0
   local_pci_probe+0x75/0xd0
   pci_device_probe+0x19f/0x1f0
   driver_probe_device+0x208/0x6c0
   __driver_attach+0xb8/0xc0
   bus_for_each_dev+0xe6/0x150
   driver_attach+0x26/0x30
INFO: Slab 0xea0005a78800 objects=3 used=3 fp=0x  (null)
flags=0x2004080
INFO: Object 0x880169e2 @offset=0 fp=0x  (null)
Object 880169e2: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
00  
Object 880169e20010: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
00  
...
Object 880169e20fc0: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ff
ff  
Object 880169e20fd0: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
00  
Object 880169e20fe0: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ff
ff  
Object 880169e20ff0: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
00  
CPU: 1 PID: 14 Comm: kworker/1:0 Tainted: GB   4.4.0-rc1+ #1
Hardware name: Dell Inc. OptiPlex 990/0D6H9T, BIOS A06 07/25/2011
Workqueue: events_power_efficient fb_flashcursor
ea0005a78800 8801740ef7f0 818a802e 880174c04e00
8801740ef820 813030e4 880174c04e00 ea0005a78800
880169e2 880169e21fd0 8801740ef848 813063ef
Call Trace:
[] dump_stack+0x4b/0x6d
[] print_trailer+0xf4/0x150
[] object_err+0x2f/0x40
[] kasan_report_error+0x20d/0x510
[] ? native_sched_clock+0x67/0x140
[] kasan_report+0x34/0x40
[] ? memcpy+0x1d/0x40
[] __asan_loadN+0x12a/0x180
[] memcpy+0x1d/0x40
[] OUT_RINGp+0x75/0x90
[] nvc0_fbcon_imageblit+0x462/0x6c0
[] nouveau_fbcon_imageblit+0xfd/0x110
[] soft_cursor+0x2f6/0x400
[] bit_cursor+0xb14/0xb60
[] ? update_attr.isra.0+0xc0/0xc0
[] ? fb_flashcursor+0x33/0x1b0
[] ? fb_get_color_depth+0x7f/0xb0
[] ? get_color+0xd6/0x1d0
[] ? update_attr.isra.0+0xc0/0xc0
[] fb_flashcursor+0x19f/0x1b0
[] process_one_work+0x3fe/0xae0
[] ? process_one_work+0x32e/0xae0
[] ? try_to_grab_pending+0x200/0x200
[] ? debug_lockdep_rcu_enabled+0x35/0x40
[] worker_thread+0x8a/0x7f0
[] ? process_one_work+0xae0/0xae0
[] kthread+0x185/0x1b0
[] ? __kthread_parkme+0xe0/0xe0
[] ? acpi_ps_parse_loop+0x41c/0xab8
[] ? trace_hardirqs_on_caller+0x186/0x280
[] ? ddebug_add_module+0x38/0x130
[] ? __kthread_parkme+0xe0/0xe0
[] ret_from_fork+0x3f/0x70
[] ? __kthread_parkme+0xe0/0xe0
Memory state around the buggy address:
880169e21f00: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
880169e21f80: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
>880169e22000: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc
  ^
880169e22080: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc
880169e22100: fc fc fc fc fc fc 00 00 00 00 00 00 00 00 00 00
==


Some nouveau messages from the boot, if this helps:

nouveau :01:00.0: NVIDIA GF119 (0d90a0a1)
nouveau :01:00.0: bios: version 75.19.55.00.02
nouveau :01:00.0: fb: 1024 MiB DDR3
[TTM] Zone  kernel: Available graphics memory: 2590256 kiB
[TTM] Zone   dma32: Available graphics memory: 2097152 kiB
[TTM] Initializing pool allocator
[TTM] Initializing DMA pool allocator
nouveau :01:00.0: DRM: VRAM: 1024 MiB
nouveau :01:00.0: DRM: GART: 1048576 MiB
nouveau :01:00.0: DRM: TMDS table version 2.0
nouveau :01:00.0: DRM: DCB version 4.0
nouveau :01:00.0: DRM: DCB outp 00: 02000300 
nouveau :01:00.0: DRM: DCB outp 01: 01000302 00020030
nouveau :01:00.0: DRM: DCB outp 02: 02011362 00020010
nouveau :01:00.0: DRM: DCB outp 03: 04022310 
nouveau :01:00.0: DRM: DCB conn 00: 1030
nouveau :01:00.0: DRM: DCB conn 01: 2161
nouveau :01:00.0: DRM: DCB conn 02: 0200
[drm] Supports vblank timestamp caching Rev 2 (21.10.2013).
[drm] Driver supports precise vblank timestamp query.
nouveau :01:00.0: DRM: MM: using COPY0 for buffer copies
nouveau :01:00.0: No connectors reported connected with modes
[drm] Cannot find any crtc or sizes - going 1024x768
nouveau :01:00.0: DRM: allocated 

[PATCH] [media] cx231xx: Add support for Terratec Grabby

2015-05-20 Thread Tommi Rantala
Add support for the Terratec Grabby with USB ID 0ccd:00a6.

Signed-off-by: Tommi Rantala 
---
 drivers/media/usb/cx231xx/cx231xx-cards.c | 28 
 drivers/media/usb/cx231xx/cx231xx.h   |  1 +
 2 files changed, 29 insertions(+)

diff --git a/drivers/media/usb/cx231xx/cx231xx-cards.c 
b/drivers/media/usb/cx231xx/cx231xx-cards.c
index fe00da1..404e17c 100644
--- a/drivers/media/usb/cx231xx/cx231xx-cards.c
+++ b/drivers/media/usb/cx231xx/cx231xx-cards.c
@@ -815,6 +815,32 @@ struct cx231xx_board cx231xx_boards[] = {
.gpio = NULL,
} },
},
+   [CX231XX_BOARD_TERRATEC_GRABBY] = {
+   .name = "Terratec Grabby",
+   .tuner_type = TUNER_ABSENT,
+   .decoder = CX231XX_AVDECODER,
+   .output_mode = OUT_MODE_VIP11,
+   .demod_xfer_mode = 0,
+   .ctl_pin_status_mask = 0xFFC4,
+   .agc_analog_digital_select_gpio = 0x0c,
+   .gpio_pin_status_mask = 0x4001000,
+   .norm = V4L2_STD_PAL,
+   .no_alt_vanc = 1,
+   .external_av = 1,
+   .input = {{
+   .type = CX231XX_VMUX_COMPOSITE1,
+   .vmux = CX231XX_VIN_2_1,
+   .amux = CX231XX_AMUX_LINE_IN,
+   .gpio = NULL,
+   }, {
+   .type = CX231XX_VMUX_SVIDEO,
+   .vmux = CX231XX_VIN_1_1 |
+   (CX231XX_VIN_1_2 << 8) |
+   CX25840_SVIDEO_ON,
+   .amux = CX231XX_AMUX_LINE_IN,
+   .gpio = NULL,
+   } },
+   },
 };
 const unsigned int cx231xx_bcount = ARRAY_SIZE(cx231xx_boards);
 
@@ -880,6 +906,8 @@ struct usb_device_id cx231xx_id_table[] = {
 .driver_info = CX231XX_BOARD_ELGATO_VIDEO_CAPTURE_V2},
{USB_DEVICE(0x1f4d, 0x0102),
 .driver_info = CX231XX_BOARD_OTG102},
+   {USB_DEVICE(USB_VID_TERRATEC, 0x00a6),
+.driver_info = CX231XX_BOARD_TERRATEC_GRABBY},
{},
 };
 
diff --git a/drivers/media/usb/cx231xx/cx231xx.h 
b/drivers/media/usb/cx231xx/cx231xx.h
index 00d3bce..54790fb 100644
--- a/drivers/media/usb/cx231xx/cx231xx.h
+++ b/drivers/media/usb/cx231xx/cx231xx.h
@@ -77,6 +77,7 @@
 #define CX231XX_BOARD_HAUPPAUGE_930C_HD_1113xx 19
 #define CX231XX_BOARD_HAUPPAUGE_930C_HD_1114xx 20
 #define CX231XX_BOARD_HAUPPAUGE_955Q 21
+#define CX231XX_BOARD_TERRATEC_GRABBY 22
 
 /* Limits minimum and default number of buffers */
 #define CX231XX_MIN_BUF 4
-- 
1.9.3

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH] drm/radeon: fix DRM_IOCTL_RADEON_CS oops

2015-03-02 Thread Tommi Rantala
Passing zeroed drm_radeon_cs struct to DRM_IOCTL_RADEON_CS produces the
following oops.

Fix by always calling INIT_LIST_HEAD() to avoid the crash in list_sort().

--

 #include 
 #include 
 #include 
 #include 
 #include 

 static const struct drm_radeon_cs cs;

 int main(int argc, char **argv)
 {
 return ioctl(open(argv[1], O_RDWR), DRM_IOCTL_RADEON_CS, &cs);
 }

--

[ttrantal@test2 ~]$ ./main /dev/dri/card0
[   46.904650] BUG: unable to handle kernel NULL pointer dereference at 
  (null)
[   46.905022] IP: [] list_sort+0x42/0x240
[   46.905022] PGD 68f29067 PUD 688b5067 PMD 0
[   46.905022] Oops: 0002 [#1] SMP
[   46.905022] CPU: 0 PID: 2413 Comm: main Not tainted 4.0.0-rc1+ #58
[   46.905022] Hardware name: Hewlett-Packard HP Compaq dc5750 Small Form 
Factor/0A64h, BIOS 786E3 v02.10 01/25/2007
[   46.905022] task: 880058e2bcc0 ti: 880058e64000 task.ti: 
880058e64000
[   46.905022] RIP: 0010:[]  [] 
list_sort+0x42/0x240
[   46.905022] RSP: 0018:880058e67998  EFLAGS: 00010246
[   46.905022] RAX:  RBX:  RCX: 
[   46.905022] RDX: 81644410 RSI: 880058e67b40 RDI: 880058e67a58
[   46.905022] RBP: 880058e67a88 R08:  R09: 
[   46.905022] R10: 880058e2bcc0 R11: 828e6ca0 R12: 81644410
[   46.905022] R13: 8800694b8018 R14:  R15: 880058e679b0
[   46.905022] FS:  7fdc65a65700() GS:88006d60() 
knlGS:
[   46.905022] CS:  0010 DS:  ES:  CR0: 80050033
[   46.905022] CR2:  CR3: 58dd9000 CR4: 06f0
[   46.905022] DR0:  DR1:  DR2: 
[   46.905022] DR3:  DR6: 4ff0 DR7: 0400
[   46.905022] Stack:
[   46.905022]  880058e67b40 880058e2bcc0 880058e67a78 

[   46.905022]     

[   46.905022]     

[   46.905022] Call Trace:
[   46.905022]  [] radeon_cs_parser_fini+0x195/0x220
[   46.905022]  [] radeon_cs_ioctl+0xa9/0x960
[   46.905022]  [] drm_ioctl+0x19c/0x640
[   46.905022]  [] ? trace_hardirqs_on_caller+0xfd/0x1c0
[   46.905022]  [] ? trace_hardirqs_on+0xd/0x10
[   46.905022]  [] radeon_drm_ioctl+0x46/0x80
[   46.905022]  [] do_vfs_ioctl+0x318/0x570
[   46.905022]  [] ? selinux_file_ioctl+0x56/0x110
[   46.905022]  [] SyS_ioctl+0x81/0xa0
[   46.905022]  [] system_call_fastpath+0x12/0x17
[   46.905022] Code: 48 89 b5 10 ff ff ff 0f 84 03 01 00 00 4c 8d bd 28 ff ff
ff 31 c0 48 89 fb b9 15 00 00 00 49 89 d4 4c 89 ff f3 48 ab 48 8b 46 08 <48> c7
00 00 00 00 00 48 8b 0e 48 85 c9 0f 84 7d 00 00 00 c7 85
[   46.905022] RIP  [] list_sort+0x42/0x240
[   46.905022]  RSP 
[   46.905022] CR2: 
[   47.149253] ---[ end trace 09576b4e8b2c20b8 ]---

Signed-off-by: Tommi Rantala 
---
 drivers/gpu/drm/radeon/radeon_cs.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/radeon/radeon_cs.c 
b/drivers/gpu/drm/radeon/radeon_cs.c
index a579ed3..4d0f96c 100644
--- a/drivers/gpu/drm/radeon/radeon_cs.c
+++ b/drivers/gpu/drm/radeon/radeon_cs.c
@@ -256,11 +256,13 @@ int radeon_cs_parser_init(struct radeon_cs_parser *p, 
void *data)
u32 ring = RADEON_CS_RING_GFX;
s32 priority = 0;
 
+   INIT_LIST_HEAD(&p->validated);
+
if (!cs->num_chunks) {
return 0;
}
+
/* get chunks */
-   INIT_LIST_HEAD(&p->validated);
p->idx = 0;
p->ib.sa_bo = NULL;
p->const_ib.sa_bo = NULL;
-- 
1.9.3

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH 2/2] Input: xpad - add Thrustmaster as Xbox 360 controller vendor

2014-10-15 Thread Tommi Rantala
Add Thrustmaster as Xbox 360 controller vendor. This is required for
example to make the GP XID (044f:b326) gamepad work.

Signed-off-by: Tommi Rantala 
---
 drivers/input/joystick/xpad.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/input/joystick/xpad.c b/drivers/input/joystick/xpad.c
index cee4fe3..2ed7905 100644
--- a/drivers/input/joystick/xpad.c
+++ b/drivers/input/joystick/xpad.c
@@ -293,6 +293,7 @@ static const signed short xpad_abs_triggers[] = {
 
 static struct usb_device_id xpad_table[] = {
{ USB_INTERFACE_INFO('X', 'B', 0) },/* X-Box USB-IF not approved 
class */
+   XPAD_XBOX360_VENDOR(0x044f),/* Thrustmaster X-Box 360 
controllers */
XPAD_XBOX360_VENDOR(0x045e),/* Microsoft X-Box 360 
controllers */
XPAD_XBOXONE_VENDOR(0x045e),/* Microsoft X-Box One 
controllers */
XPAD_XBOX360_VENDOR(0x046d),/* Logitech X-Box 360 style 
controllers */
-- 
1.9.3

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH 1/2] Input: xpad - add USB ID for Thrustmaster Ferrari 458 Racing Wheel

2014-10-15 Thread Tommi Rantala
Add the USB ID for the Xbox 360 Thrustmaster Ferrari 458 Racing Wheel.

Signed-off-by: Tommi Rantala 
---
 drivers/input/joystick/xpad.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/input/joystick/xpad.c b/drivers/input/joystick/xpad.c
index cd13c82..cee4fe3 100644
--- a/drivers/input/joystick/xpad.c
+++ b/drivers/input/joystick/xpad.c
@@ -209,6 +209,7 @@ static const struct xpad_device {
{ 0x24c6, 0x5501, "Hori Real Arcade Pro VX-SA", 0, XTYPE_XBOX360 },
{ 0x24c6, 0x5506, "Hori SOULCALIBUR V Stick", 0, XTYPE_XBOX360 },
{ 0x24c6, 0x5b02, "Thrustmaster, Inc. GPX Controller", 0, XTYPE_XBOX360 
},
+   { 0x24c6, 0x5b03, "Thrustmaster Ferrari 458 Racing Wheel", 0, 
XTYPE_XBOX360 },
{ 0x, 0x, "Chinese-made Xbox Controller", 0, XTYPE_XBOX },
{ 0x, 0x, "Generic X-Box pad", 0, XTYPE_UNKNOWN }
 };
-- 
1.9.3

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: f2fs get_dnode_of_data oops

2014-09-09 Thread Tommi Rantala
2014-09-09 7:41 GMT+03:00 Jaegeuk Kim :
> Hi Tommi,
>
> This patch should resolve this bug.
> Thanks a lot. :)
>
> From ee24677b9917583f50f16b6f59771439f91b890c Mon Sep 17 00:00:00 2001
> From: Jaegeuk Kim 
> Date: Mon, 8 Sep 2014 10:59:43 -0700
> Subject: [PATCH] f2fs: fix negative value for lseek offset
>

Thanks, with this patch applied, I could not reproduce the lseek oops,
but now I hit the following:


[  720.673788] [ cut here ]
[  720.674011] kernel BUG at fs/f2fs/node.c:1229!
[  720.674011] invalid opcode:  [#1] SMP DEBUG_PAGEALLOC
[  720.674011] CPU: 0 PID: 5298 Comm: trinity-c15 Not tainted 3.17.0-rc4+ #38
[  720.674011] Hardware name: Bochs Bochs, BIOS Bochs 01/01/2011
[  720.674011] task: 88002c468000 ti: 88002c47 task.ti:
88002c47
[  720.674011] RIP: 0010:[]  []
f2fs_write_node_page+0x171/0x290
[  720.674011] RSP: 0018:88002c473cb0  EFLAGS: 00010206
[  720.674011] RAX:  RBX:  RCX: ea6173c0
[  720.674011] RDX: 0001 RSI: 0001 RDI: ea6173c0
[  720.674011] RBP: 88002c473cf8 R08:  R09: 
[  720.674011] R10: 0001 R11: 8800185cf000 R12: ea6173c0
[  720.674011] R13: 8800399d4520 R14: 88002c473e68 R15: 8800185cf000
[  720.674011] FS:  7fb4b61d4700() GS:88003fa0()
knlGS:
[  720.674011] CS:  0010 DS:  ES:  CR0: 8005003b
[  720.674011] CR2: 0008 CR3: 2c45 CR4: 06f0
[  720.674011] DR0: 01ee3000 DR1: 019d3000 DR2: 
[  720.674011] DR3:  DR6: 0ff0 DR7: 0600
[  720.674011] Stack:
[  720.674011]  81189e75 04110001 880037cb10a8
88002c473cd8
[  720.674011]  8800 1600 88002c473d58

[  720.674011]  0001 88002c473df0 81778745

[  720.674011] Call Trace:
[  720.674011]  [] ? trace_hardirqs_on_caller+0x185/0x220
[  720.674011]  [] sync_node_pages+0x415/0x5f0
[  720.674011]  [] ? SyS_tee+0x390/0x390
[  720.674011]  [] write_checkpoint+0x21d/0xeb0
[  720.674011]  [] ? mark_held_locks+0x90/0xa0
[  720.674011]  [] ? mutex_lock_nested+0x435/0x4b0
[  720.674011]  [] ? trace_hardirqs_on_caller+0x185/0x220
[  720.674011]  [] ? SyS_tee+0x390/0x390
[  720.674011]  [] f2fs_sync_fs+0x100/0x180
[  720.674011]  [] sync_fs_one_sb+0x1b/0x20
[  720.674011]  [] iterate_supers+0x7f/0xe0
[  720.674011]  [] sys_sync+0x50/0x90
[  720.674011]  [] system_call_fastpath+0x16/0x1b
[  720.674011] Code: 63 00 00 48 b8 00 00 00 00 00 16 00 00 4c 01 e0
48 c1 f8 06 48 c1 e0 0c 49 01 c7 41 8b 9f e8 0f 00 00 89 d8 49 39 44
24 10 74 07 <0f> 0b 0f 1f 44 00 00 48 8d 55 c8 89 de 4c 89 ef e8 3a fb
ff ff
[  720.674011] RIP  [] f2fs_write_node_page+0x171/0x290
[  720.674011]  RSP 
[  720.674011] [ cut here ]
[  720.674011] kernel BUG at arch/x86/mm/pageattr.c:216!
[  720.674011] invalid opcode:  [#2] SMP DEBUG_PAGEALLOC
[  720.674011] CPU: 0 PID: 5298 Comm: trinity-c15 Not tainted 3.17.0-rc4+ #38
[  720.674011] Hardware name: Bochs Bochs, BIOS Bochs 01/01/2011
[  720.674011] task: 88002c468000 ti: 88002c47 task.ti:
88002c47
[  720.674011] RIP: 0010:[]  []
change_page_attr_set_clr+0x250/0x430
[  720.674011] RSP: 0018:88002c4730b8  EFLAGS: 00010046
[  720.674011] RAX: 0046 RBX:  RCX: 0010
[  720.674011] RDX: 4600 RSI:  RDI: 8000
[  720.674011] RBP: 88002c473148 R08: 0001 R09: 8800
[  720.674011] R10: 880034780738 R11: 88000e526610 R12: 
[  720.674011] R13: 0010 R14: 0004 R15: 0005
[  720.674011] FS:  7fb4b61d4700() GS:88003fa0()
knlGS:
[  720.674011] CS:  0010 DS:  ES:  CR0: 8005003b
[  720.674011] CR2: 0008 CR3: 2c45 CR4: 06f0
[  720.674011] DR0: 01ee3000 DR1: 019d3000 DR2: 
[  720.674011] DR3:  DR6: 0ff0 DR7: 0600
[  720.674011] Stack:
[  720.674011]    
0200
[  720.674011]  8801  
0010
[  720.674011]   00050001 5d4d
0200
[  720.674011] Call Trace:
[  720.674011]  [] _set_pages_array+0x86/0x130
[  720.674011]  [] set_pages_array_wc+0xe/0x10
[  720.674011]  [] ttm_set_pages_caching+0x47/0x70
[  720.674011]  [] ttm_alloc_new_pages.isra.4+0xf3/0x190
[  720.674011]  [] ttm_pool_populate+0x1b5/0x490
[  720.674011]  [] cirrus_ttm_tt_populate+0x9/0x10
[  720.674011]  [] ttm_bo_move_memcpy+0x183/0x640
[  720.674011]  [] cirrus_bo_move+0x13/0x20
[  720.674011]  [] ttm_bo_handle_

Re: f2fs get_dnode_of_data oops

2014-09-08 Thread Tommi Rantala
2014-09-08 7:20 GMT+03:00 Jaegeuk Kim :
> Hi,
>
> Thank you for the report.
> Could you share a little bit more information about the file accessing
> f2fs_llseek?
> E.g., file size, file offset, file allocation information, or dump of that 
> file.

Hi,

I can reproduce the bug with the following.
-17595150933902 is just something I saw trinity passing to lseek().

#define _GNU_SOURCE

#include 
#include 
#include 
#include 
#include 

int main(int argc, char **argv)
{
int fd;

if (argc < 2) {
printf("give filename\n");
return 1;
}

fd = open(argv[1], O_RDONLY);
if (fd < 0) {
perror("open");
return 1;
}

lseek(fd, -17595150933902LL, SEEK_DATA);

return 0;
}

{ttrantal@arkki ~}> touch /f2fs/x ; ./a.out /f2fs/x
[   73.437182] BUG: unable to handle kernel paging request at 88043368e340
[   73.438035] IP: [] get_dnode_of_data+0x3a9/0x440
[   73.438035] PGD 4595067 PUD 0
[   73.438035] Oops:  [#1] SMP DEBUG_PAGEALLOC
[   73.438035] CPU: 0 PID: 2933 Comm: a.out Not tainted 3.17.0-rc4+ #37
[   73.438035] Hardware name: Bochs Bochs, BIOS Bochs 01/01/2011
[   73.438035] task: 88003755cac0 ti: 880022734000 task.ti:
880022734000
[   73.438035] RIP: 0010:[]  []
get_dnode_of_data+0x3a9/0x440
[   73.438035] RSP: 0018:880022737e08  EFLAGS: 00010246
[   73.438035] RAX: 880033951000 RBX: 010b RCX: fff4f476
[   73.438035] RDX: 880033951168 RSI: 00111932488f RDI: 880022737ef0
[   73.438035] RBP: 880022737eb8 R08: 0148 R09: 
[   73.438035] R10: 8b86 R11: 0001 R12: fffefff4f476
[   73.438035] R13:  R14: eace5440 R15: 880021c28000
[   73.438035] FS:  7fefc2f08700() GS:88003fa0()
knlGS:
[   73.438035] CS:  0010 DS:  ES:  CR0: 8005003b
[   73.438035] CR2: 88043368e340 CR3: 32d6b000 CR4: 06f0
[   73.438035] Stack:
[   73.438035]  880022737ef0 81228d7c 88003d9fe7b0
880022737eb8
[   73.438035]  81763164 0002 
fff4f476
[   73.438035]  0246  8259bd47
010b
[   73.438035] Call Trace:
[   73.438035]  [] ? pagevec_lookup_tag+0x1c/0x30
[   73.438035]  [] ? __get_first_dirty_index+0x44/0x90
[   73.438035]  [] ? _raw_spin_unlock_irq+0x27/0x40
[   73.438035]  [] ? trace_hardirqs_on_caller+0x185/0x220
[   73.438035]  [] f2fs_llseek+0xf7/0x420
[   73.438035]  [] SyS_lseek+0x65/0xa0
[   73.438035]  [] system_call_fastpath+0x16/0x1b
[   73.438035] Code: ba 00 00 00 00 00 88 ff ff 48 c1 f8 06 48 c1 e0
0c 48 01 d0 8b 98 ec 0f 00 00 39 98 e8 0f 00 00 48 8d 90 68 01 00 00
48 0f 45 d0 <8b> 04 8a 89 47 24 31 c0 eb 75 41 bc e4 ff ff ff 4d 85 f6
74 19
[   73.438035] RIP  [] get_dnode_of_data+0x3a9/0x440
[   73.438035]  RSP 
[   73.438035] CR2: 88043368e340
[   73.438035] ---[ end trace e94f7065a7961f54 ]---
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: f2fs get_dnode_of_data oops

2014-09-07 Thread Tommi Rantala
2014-09-07 22:14 GMT+03:00 Tommi Rantala :
> Hello,
>
> Hit this oops while fuzzing v3.17-rc3-176-g2b12164 with Trinity.
>
> Tommi
>
>
> BUG: unable to handle kernel paging request at 8804338717a8
> IP: [] get_dnode_of_data+0x3a9/0x440
> PGD 4594067 PUD 0
> Oops:  [#1] SMP DEBUG_PAGEALLOC
> CPU: 0 PID: 4719 Comm: trinity-c3 Not tainted 3.17.0-rc3+ #33
> Hardware name: Bochs Bochs, BIOS Bochs 01/01/2011
> task: 88001563 ti: 88000724c000 task.ti: 88000724c000
> RIP: 0010:[]  []
> get_dnode_of_data+0x3a9/0x440
> RSP: 0018:88000724fe08  EFLAGS: 00010246
> RAX: 880033874000 RBX: 00f8 RCX: f590
> RDX: 880033874168 RSI: 88000724fd98 RDI: 88000724fef0
> RBP: 88000724feb8 R08:  R09: 
> R10: 0001 R11: 83b33f90 R12: f590
> R13:  R14: eace1d00 R15: 8800209f8000
> FS:  7f2bd22dc700() GS:88003fa0() knlGS:
> CS:  0010 DS:  ES:  CR0: 8005003b
> CR2: 8804338717a8 CR3: 346c CR4: 06f0
> DR0: 0185d000 DR1: 0185d000 DR2: 
> DR3:  DR6: 0ff0 DR7: 000b0602
> Stack:
>  88000724fef0 88000724fe30 880036c18000 0004
>  8800209f80f0 0002f590 81189f1d 8800f590
>  0246  81189ce0 00f8
> Call Trace:
>  [] ? trace_hardirqs_on+0xd/0x10
>  [] ? mark_held_locks+0x90/0xa0
>  [] ? trace_hardirqs_on_caller+0x185/0x220
>  [] f2fs_llseek+0xf7/0x420
>  [] SyS_lseek+0x65/0xa0
>  [] system_call_fastpath+0x16/0x1b
> Code: ba 00 00 00 00 00 88 ff ff 48 c1 f8 06 48 c1 e0 0c 48 01 d0 8b
> 98 ec 0f 00 00 39 98 e8 0f 00 00 48 8d 90 68 01 00 00 48 0f 45 d0 <8b>
> 04 8a 89 47 24 31 c0 eb 75 41 bc e4 ff ff ff 4d 85 f6 74 19
> RIP  [] get_dnode_of_data+0x3a9/0x440
>  RSP 
> CR2: 8804338717a8
> ---[ end trace bed7b35d1c48e9c3 ]---

If it helps, here is the location of the crash:

(gdb) list *0x81779039
0x81779039 is in get_dnode_of_data (fs/f2fs/f2fs.h:950).
945 {
946 struct f2fs_node *raw_node;
947 __le32 *addr_array;
948 raw_node = F2FS_NODE(node_page);
949 addr_array = blkaddr_in_node(raw_node);
950 return le32_to_cpu(addr_array[offset]);
951 }
952
953 static inline int f2fs_test_bit(unsigned int nr, char *addr)
954 {
(gdb)

Tommi
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


f2fs get_dnode_of_data oops

2014-09-07 Thread Tommi Rantala
Hello,

Hit this oops while fuzzing v3.17-rc3-176-g2b12164 with Trinity.

Tommi


BUG: unable to handle kernel paging request at 8804338717a8
IP: [] get_dnode_of_data+0x3a9/0x440
PGD 4594067 PUD 0
Oops:  [#1] SMP DEBUG_PAGEALLOC
CPU: 0 PID: 4719 Comm: trinity-c3 Not tainted 3.17.0-rc3+ #33
Hardware name: Bochs Bochs, BIOS Bochs 01/01/2011
task: 88001563 ti: 88000724c000 task.ti: 88000724c000
RIP: 0010:[]  []
get_dnode_of_data+0x3a9/0x440
RSP: 0018:88000724fe08  EFLAGS: 00010246
RAX: 880033874000 RBX: 00f8 RCX: f590
RDX: 880033874168 RSI: 88000724fd98 RDI: 88000724fef0
RBP: 88000724feb8 R08:  R09: 
R10: 0001 R11: 83b33f90 R12: f590
R13:  R14: eace1d00 R15: 8800209f8000
FS:  7f2bd22dc700() GS:88003fa0() knlGS:
CS:  0010 DS:  ES:  CR0: 8005003b
CR2: 8804338717a8 CR3: 346c CR4: 06f0
DR0: 0185d000 DR1: 0185d000 DR2: 
DR3:  DR6: 0ff0 DR7: 000b0602
Stack:
 88000724fef0 88000724fe30 880036c18000 0004
 8800209f80f0 0002f590 81189f1d 8800f590
 0246  81189ce0 00f8
Call Trace:
 [] ? trace_hardirqs_on+0xd/0x10
 [] ? mark_held_locks+0x90/0xa0
 [] ? trace_hardirqs_on_caller+0x185/0x220
 [] f2fs_llseek+0xf7/0x420
 [] SyS_lseek+0x65/0xa0
 [] system_call_fastpath+0x16/0x1b
Code: ba 00 00 00 00 00 88 ff ff 48 c1 f8 06 48 c1 e0 0c 48 01 d0 8b
98 ec 0f 00 00 39 98 e8 0f 00 00 48 8d 90 68 01 00 00 48 0f 45 d0 <8b>
04 8a 89 47 24 31 c0 eb 75 41 bc e4 ff ff ff 4d 85 f6 74 19
RIP  [] get_dnode_of_data+0x3a9/0x440
 RSP 
CR2: 8804338717a8
---[ end trace bed7b35d1c48e9c3 ]---
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


RTNL: assertion failed at net/ipv6/addrconf.c (1699)

2014-08-29 Thread Tommi Rantala
Hi,

Was fuzzing Linus v3.17-rc2-89-g59753a8 with Trinity as the root user
in qemu, when I hit the following assertion failures.

Tommi


[init] Started watchdog process, PID is 4841
[main] Main thread is alive.
[   77.229699] sctp: [Deprecated]: trinity-main (pid 4842) Use of int
in max_burst socket option deprecated.
[   77.229699] Use struct sctp_assoc_value instead
[   77.297196] RTNL: assertion failed at net/ipv6/addrconf.c (1699)
[   77.298080] CPU: 0 PID: 4842 Comm: trinity-main Not tainted 3.17.0-rc2+ #30
[   77.299039] Hardware name: Bochs Bochs, BIOS Bochs 01/01/2011
[   77.299789]  88003d76a618 880026133c50 8238ba79
880037c84520
[   77.300829]  880026133c90 820bd52b 
82d86c40
[   77.301869]   f76fd1e1 8800382d8000
8800382d8220
[   77.302906] Call Trace:
[   77.303246]  [] dump_stack+0x4d/0x66
[   77.303928]  [] addrconf_join_solict+0x4b/0xb0
[   77.304731]  [] ipv6_dev_ac_inc+0x2bb/0x330
[   77.305498]  [] ? ac6_seq_start+0x260/0x260
[   77.306257]  [] ipv6_sock_ac_join+0x26e/0x360
[   77.307046]  [] ? ipv6_sock_ac_join+0x99/0x360
[   77.307798]  [] do_ipv6_setsockopt.isra.5+0xa70/0xf20
[   77.308570]  [] ? sched_clock_local+0x1d/0x80
[   77.309260]  [] ? kvm_clock_read+0x27/0x40
[   77.309915]  [] ? sched_clock+0x9/0x10
[   77.310537]  [] ? sock_has_perm+0x168/0x1e0
[   77.311204]  [] ? sched_clock_cpu+0xa8/0xf0
[   77.311866]  [] ? local_clock+0x1b/0x30
[   77.312501]  [] ? lock_release_holdtime+0x1d/0x170
[   77.313241]  [] ? sock_has_perm+0x180/0x1e0
[   77.313905]  [] ?
selinux_msg_queue_alloc_security+0xa0/0xa0
[   77.314746]  [] ipv6_setsockopt+0x53/0xb0
[   77.315397]  [] udpv6_setsockopt+0x25/0x30
[   77.316058]  [] sock_common_setsockopt+0xf/0x20
[   77.316764]  [] SyS_setsockopt+0x8e/0xd0
[   77.317406]  [] system_call_fastpath+0x16/0x1b
[main] 375 sockets created based on info from socket cachefile.
[main] Generating file descriptors
[main] Added 129 filenames from /dev
[main] Added 44048 filenames from /proc
[main] Added 18192 filenames from /sys
[main] Enabled 9 fd providers.
[watchdog] Watchdog is alive. (pid:4841)
[child3:4846] finit_module (313) returned ENOSYS, marking as inactive.
[child1:4844] kcmp (312) returned ENOSYS, marking as inactive.
[child2:4845] uselib (134) returned ENOSYS, marking as inactive.
[child1:4844] nfsservctl (180) returned ENOSYS, marking as inactive.
[child2:4845] delete_module (129:[32BIT]) returned ENOSYS, marking as inactive.
[child2:4845] init_module (175) returned ENOSYS, marking as inactive.
[   84.126609] trinity-c7: vm86 mode not supported on 64 bit kernel
[child7:4850] vm86 (166:[32BIT]) returned ENOSYS, marking as inactive.
[main] Bailing main loop because ctrl-c.
[   84.345840] RTNL: assertion failed at net/ipv6/addrconf.c (1712)
[   84.346615] CPU: 0 PID: 4842 Comm: trinity-main Not tainted 3.17.0-rc2+ #30
[   84.347426] Hardware name: Bochs Bochs, BIOS Bochs 01/01/2011
[   84.348102]  88003d76a618 880026133d10 8238ba79
8800382d8000
[   84.349018]  880026133d50 820bd5db 81141555
8800382d8220
[   84.349935]  8800382d8000 f76fd1e1 88003d76a618
8800382d8000
[   84.350848] Call Trace:
[   84.351149]  [] dump_stack+0x4d/0x66
[   84.351751]  [] addrconf_leave_solict+0x4b/0xb0
[   84.352574]  [] ? __local_bh_enable_ip+0xa5/0xf0
[   84.353315]  [] __ipv6_dev_ac_dec+0xc3/0x140
[   84.354019]  [] ipv6_dev_ac_dec+0x98/0xb0
[   84.354687]  [] ipv6_sock_ac_close+0x10d/0x1a0
[   84.355410]  [] ? ipv6_sock_ac_close+0x2e/0x1a0
[   84.356147]  [] inet6_release+0x23/0x40
[   84.356789]  [] sock_release+0x14/0x80
[   84.357410]  [] sock_close+0xd/0x20
[   84.358042]  [] __fput+0x111/0x1e0
[   84.358622]  [] fput+0x9/0x10
[   84.359196]  [] task_work_run+0x9e/0xd0
[   84.359825]  [] do_exit+0x456/0xb30
[   84.360419]  [] ? retint_swapgs+0x13/0x1b
[   84.361075]  [] do_group_exit+0x84/0xd0
[   84.361705]  [] SyS_exit_group+0xf/0x10
[   84.362338]  [] system_call_fastpath+0x16/0x1b
[watchdog] [4841] Watchdog exiting because ctrl-c.
[init] Ran 775 syscalls. Successes: 179  Failures: 596
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


drm_ioctl & WARNING at arch/x86/mm/ioremap.c:98

2014-08-27 Thread Tommi Rantala
Hello,

Got this warning while fuzzing v3.17-rc2-40-gff0c57a with Trinity. Was
running as root in qemu.

Tommi


ioremap: invalid physical address 40004000
[ cut here ]
WARNING: CPU: 0 PID: 2887 at arch/x86/mm/ioremap.c:98
__ioremap_caller+0x7a/0x2e0()
CPU: 0 PID: 2887 Comm: trinity-c6 Not tainted 3.17.0-rc2+ #29
Hardware name: Bochs Bochs, BIOS Bochs 01/01/2011
 0009 880036ee7c90 8238ba09 
 880036ee7cc8 8113c603  40004000
 40008000 81747c7d 0010 880036ee7cd8
Call Trace:
 [] dump_stack+0x4d/0x66
 [] warn_slowpath_common+0x73/0x90
 [] ? drm_addmap_core+0x1dd/0x600
 [] warn_slowpath_null+0x15/0x20
 [] __ioremap_caller+0x7a/0x2e0
 [] ? kmemleak_alloc+0x23/0x50
 [] ? kmem_cache_alloc_trace+0x119/0x290
 [] ? drm_addmap_core+0x3b/0x600
 [] ioremap_nocache+0x12/0x20
 [] drm_addmap_core+0x1dd/0x600
 [] drm_addmap_ioctl+0x45/0x70
 [] drm_ioctl+0x3fe/0x640
 [] ? drm_addmap+0x30/0x30
 [] ? avc_has_perm+0x20/0x2f0
 [] ? sched_clock_cpu+0xa8/0xf0
 [] do_vfs_ioctl+0x4d0/0x510
 [] ? selinux_file_ioctl+0xf5/0x100
 [] SyS_ioctl+0x4e/0x80
 [] system_call_fastpath+0x16/0x1b
---[ end trace c988df0287baa491 ]---
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: drm_mode_create_dumb_ioctl: divide error

2014-08-24 Thread Tommi Rantala
22.8.2014 13.38 kirjoitti "David Herrmann" :
>
> Hi
>
> On Thu, Aug 21, 2014 at 8:18 PM, Tommi Rantala  wrote:
> > Hello,
> >
> > Triggered this while fuzzing v3.17-rc1-51-g372b1db with Trinity.
> >
> > Tommi
> >
> >
> > [drm:drm_mode_legacy_fb_format] *ERROR* bad bpp, assuming x8r8g8b8 pixel 
> > format
> > divide error:  [#1] SMP DEBUG_PAGEALLOC
> > CPU: 0 PID: 2854 Comm: trinity-c7 Not tainted 3.17.0-rc1+ #14
> > Hardware name: Bochs Bochs, BIOS Bochs 01/01/2011
> > task: 88003926cac0 ti: 8800356b4000 task.ti: 8800356b4000
> > RIP: 0010:[]  []
> > drm_mode_create_dumb_ioctl+0x53/0xa0
> > RSP: 0018:8800356b7dc0  EFLAGS: 00010246
> > RAX:  RBX: 88003545da68 RCX: 
> > RDX:  RSI: 8800356b7e18 RDI: 88003d5c67b0
> > RBP: 8800356b7dc8 R08:  R09: 
> > R10:  R11: 817f6d30 R12: 00b2
> > R13: fff2 R14: 88003d5c67b0 R15: 88003545da68
> > FS:  7f06208fa700() GS:88003fa0() knlGS:
> > CS:  0010 DS:  ES:  CR0: 80050033
> > CR2: 01903108 CR3: 36efa000 CR4: 06f0
> > Stack:
> >  8800356b7e18 8800356b7ec8 8165ac60 8800356b7df8
> >  8800356b7e18 8800356b7e18 824e1440 00d52000
> >  0020 c02064b2 fff2 
> > Call Trace:
> >  [] drm_ioctl+0x3b0/0x640
> >  [] ? avc_has_perm+0x218/0x2f0
> >  [] ? avc_has_perm+0x20/0x2f0
> >  [] ? sched_clock_cpu+0xa8/0xf0
> >  [] do_vfs_ioctl+0x4d0/0x510
> >  [] ? selinux_file_ioctl+0xf5/0x100
> >  [] SyS_ioctl+0x4e/0x80
> >  [] system_call_fastpath+0x16/0x1b
> > Code: 55 41 b9 ff ff ff ff 41 83 c0 07 44 89 c8 41 c1 e8 03 48 89 e5
> > 53 48 89 d3 31 d2 f7 f1 41 39 c0 77 46 41 0f af c8 31 d2 44 89 c8 
> > f1 41 39 c2 77 36 41 0f af ca b8 ea ff ff ff 81 c1 ff 0f 00
> > RIP  [] drm_mode_create_dumb_ioctl+0x53/0xa0
> >  RSP 
> > ---[ end trace 6919129b71d9bf98 ]---
> > [drm:drm_mode_legacy_fb_format] *ERROR* bad bpp, assuming x8r8g8b8 pixel 
> > format
> >
> >
> >
> > (gdb) list *0x816688e3
> > 0x816688e3 is in drm_mode_create_dumb_ioctl
> > (drivers/gpu/drm/drm_crtc.c:4703).
> > 4698/* overflow checks for 32bit size calculations */
> > 4699cpp = DIV_ROUND_UP(args->bpp, 8);
> > 4700if (cpp > 0xU / args->width)
> > 4701return -EINVAL;
> > 4702stride = cpp * args->width;
> > 4703if (args->height > 0xU / stride)
> > 4704return -EINVAL;
>
> Hm, this doesn't make sense to me. args->bpp/width/height are
> guaranteed to be non-zero and 32bit. Therefore, DIV_ROUND_UP() cannot
> return 0 and "cpp" is thus non-zero. The overflow check makes sure
> "cpp * args->width" cannot overflow, both are non-zero so "stride" is
> non-zero and valid.
>
> I cannot make much sense out of the x86 assembly below, so help welcome.

Hi David,

I put a BUG_ON(stride==0), and it is indeed crashing due to stride being zero.

I tried to get the variables with gdb since I'm running the kernel in
qemu, but for whatever reason the breakpoints do not seem to always
have any effect, IOW the execution blasts right through the
breakpoints.

Finally I did get one instance where the breakpoint would trigger, and
gdb told me this. Does it make sense?

(gdb) bt
#0  drm_mode_create_dumb_ioctl (dev=0x88003d634520,
data=0x880034837e18, file_priv=0x8800361faa40) at
drivers/gpu/drm/drm_crtc.c:4704
#1  0x8165ad20 in drm_ioctl (filp=,
cmd=, arg=) at
drivers/gpu/drm/drm_ioctl.c:727
#2  0x812922a0 in vfs_ioctl (arg=,
cmd=, filp=) at fs/ioctl.c:43
#3  do_vfs_ioctl (filp=0x88003d634520, fd=36, cmd=,
arg=) at fs/ioctl.c:598
#4  0x8129232e in SYSC_ioctl (arg=,
cmd=, fd=) at fs/ioctl.c:613
#5  SyS_ioctl (fd=36, cmd=3223348402, arg=9592832) at fs/ioctl.c:604
#6  
#7  0x7f69a2b0eb69 in ?? ()
#8  0xdead4ead6b6b0e0e in ?? ()
#9  0x6b6b6b6b in ?? ()
#10 0x in ?? ()
#11 0x840ba810 in ?? ()
#12 0x838a9e70 in lock_classes ()
#13 0x83924540 in lock_classes ()
#14 0x828ed4ef in kallsyms_token_index ()
#15 0x6b6b6b6b in ?? ()
#16 0x81279ec8 in kmemleak_scan () at mm/kmemleak.c:1410
Backtrace stopped: previous frame inner to this frame (corrupt stack?)

(gdb) print *args
value has been optimized out

(gdb) info locals
cpp = 0
stride = 

drm_mode_create_dumb_ioctl: divide error

2014-08-21 Thread Tommi Rantala
Hello,

Triggered this while fuzzing v3.17-rc1-51-g372b1db with Trinity.

Tommi


[drm:drm_mode_legacy_fb_format] *ERROR* bad bpp, assuming x8r8g8b8 pixel format
divide error:  [#1] SMP DEBUG_PAGEALLOC
CPU: 0 PID: 2854 Comm: trinity-c7 Not tainted 3.17.0-rc1+ #14
Hardware name: Bochs Bochs, BIOS Bochs 01/01/2011
task: 88003926cac0 ti: 8800356b4000 task.ti: 8800356b4000
RIP: 0010:[]  []
drm_mode_create_dumb_ioctl+0x53/0xa0
RSP: 0018:8800356b7dc0  EFLAGS: 00010246
RAX:  RBX: 88003545da68 RCX: 
RDX:  RSI: 8800356b7e18 RDI: 88003d5c67b0
RBP: 8800356b7dc8 R08:  R09: 
R10:  R11: 817f6d30 R12: 00b2
R13: fff2 R14: 88003d5c67b0 R15: 88003545da68
FS:  7f06208fa700() GS:88003fa0() knlGS:
CS:  0010 DS:  ES:  CR0: 80050033
CR2: 01903108 CR3: 36efa000 CR4: 06f0
Stack:
 8800356b7e18 8800356b7ec8 8165ac60 8800356b7df8
 8800356b7e18 8800356b7e18 824e1440 00d52000
 0020 c02064b2 fff2 
Call Trace:
 [] drm_ioctl+0x3b0/0x640
 [] ? avc_has_perm+0x218/0x2f0
 [] ? avc_has_perm+0x20/0x2f0
 [] ? sched_clock_cpu+0xa8/0xf0
 [] do_vfs_ioctl+0x4d0/0x510
 [] ? selinux_file_ioctl+0xf5/0x100
 [] SyS_ioctl+0x4e/0x80
 [] system_call_fastpath+0x16/0x1b
Code: 55 41 b9 ff ff ff ff 41 83 c0 07 44 89 c8 41 c1 e8 03 48 89 e5
53 48 89 d3 31 d2 f7 f1 41 39 c0 77 46 41 0f af c8 31 d2 44 89 c8 
f1 41 39 c2 77 36 41 0f af ca b8 ea ff ff ff 81 c1 ff 0f 00
RIP  [] drm_mode_create_dumb_ioctl+0x53/0xa0
 RSP 
---[ end trace 6919129b71d9bf98 ]---
[drm:drm_mode_legacy_fb_format] *ERROR* bad bpp, assuming x8r8g8b8 pixel format



(gdb) list *0x816688e3
0x816688e3 is in drm_mode_create_dumb_ioctl
(drivers/gpu/drm/drm_crtc.c:4703).
4698/* overflow checks for 32bit size calculations */
4699cpp = DIV_ROUND_UP(args->bpp, 8);
4700if (cpp > 0xU / args->width)
4701return -EINVAL;
4702stride = cpp * args->width;
4703if (args->height > 0xU / stride)
4704return -EINVAL;
4705
4706/* test for wrap-around */
4707size = args->height * stride;
(gdb) disassemble drm_mode_create_dumb_ioctl
Dump of assembler code for function drm_mode_create_dumb_ioctl:
   0x81668890 <+0>: mov0x20(%rdi),%rax
   0x81668894 <+4>: mov0x160(%rax),%r11
   0x8166889b <+11>:test   %r11,%r11
   0x8166889e <+14>:je 0x8166890f

   0x816688a0 <+16>:mov0x4(%rsi),%ecx
   0x816688a3 <+19>:test   %ecx,%ecx
   0x816688a5 <+21>:je 0x81668918

   0x816688a7 <+23>:mov(%rsi),%r10d
   0x816688aa <+26>:test   %r10d,%r10d
   0x816688ad <+29>:je 0x81668918

   0x816688af <+31>:mov0x8(%rsi),%r8d
   0x816688b3 <+35>:test   %r8d,%r8d
   0x816688b6 <+38>:je 0x81668918

   0x816688b8 <+40>:push   %rbp
   0x816688b9 <+41>:mov$0x,%r9d
   0x816688bf <+47>:add$0x7,%r8d
   0x816688c3 <+51>:mov%r9d,%eax
   0x816688c6 <+54>:shr$0x3,%r8d
   0x816688ca <+58>:mov%rsp,%rbp
   0x816688cd <+61>:push   %rbx
   0x816688ce <+62>:mov%rdx,%rbx
   0x816688d1 <+65>:xor%edx,%edx
   0x816688d3 <+67>:div%ecx
   0x816688d5 <+69>:cmp%eax,%r8d
   0x816688d8 <+72>:ja 0x81668920

   0x816688da <+74>:imul   %r8d,%ecx
   0x816688de <+78>:xor%edx,%edx
   0x816688e0 <+80>:mov%r9d,%eax
   0x816688e3 <+83>:div%ecx
   0x816688e5 <+85>:cmp%eax,%r10d
   0x816688e8 <+88>:ja 0x81668920

   0x816688ea <+90>:imul   %r10d,%ecx
   0x816688ee <+94>:mov$0xffea,%eax
   0x816688f3 <+99>:add$0xfff,%ecx
   0x816688f9 <+105>:   and$0xf000,%ecx
   0x816688ff <+111>:   je 0x81668928

   0x81668901 <+113>:   mov%rsi,%rdx
   0x81668904 <+116>:   mov%rdi,%rsi
   0x81668907 <+119>:   mov%rbx,%rdi
   0x8166890a <+122>:   callq  *%r11
   0x8166890d <+125>:   jmp0x81668928

   0x8166890f <+127>:   mov$0xffda,%eax
   0x81668914 <+132>:   retq
   0x81668915 <+133>:   nopl   (%rax)
   0x81668918 <+136>:   mov$0xffea,%eax
   0x8166891d <+141>:   retq
   0x8166891e <+142>:   xchg   %ax,%ax
   0x81668920 <+144>:   mov$0xffea,%eax
   0x81668925 <+149>:

/proc/asound/card0/oss_mixer stack corruption

2014-08-21 Thread Tommi Rantala
Hello,

Trinity discovered that writing 128 bytes to
/proc/asound/card0/oss_mixer triggers a stack corruption.

Tommi


# printf %128s > /proc/asound/card0/oss_mixer

ALSA: mixer_oss: invalid OSS volume ''
Kernel panic - not syncing: stack-protector: Kernel stack is corrupted
in: 81e193ba

CPU: 0 PID: 2778 Comm: bash Not tainted 3.17.0-rc1+ #13
Hardware name: Bochs Bochs, BIOS Bochs 01/01/2011
 880039fd4bf0 880034c87bd8 8229824a 828e2a40
 880034c87c50 8229051d 8810 880034c87c60
 880034c87c00 0020 81e193ba 0080
Call Trace:
 [] dump_stack+0x4d/0x66
 [] panic+0xc8/0x201
 [] ? snd_mixer_oss_proc_write+0x24a/0x270
 [] __stack_chk_fail+0x16/0x20
 [] snd_mixer_oss_proc_write+0x24a/0x270
 [] ? kvm_clock_read+0x27/0x40
 [] snd_info_entry_release+0x6c/0x110
 [] close_pdeo+0x136/0x1a0
 [] ? __lock_acquire+0x951/0xb40
 [] ? kvm_clock_read+0x27/0x40
 [] proc_reg_release+0x3e/0x60
 [] __fput+0x111/0x1e0
 [] fput+0x9/0x10
 [] task_work_run+0x9e/0xd0
 [] do_notify_resume+0x55/0x70
 [] int_signal+0x12/0x17
Kernel Offset: 0x0 from 0x8100 (relocation range:
0x8000-0x9fff)
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


kernel BUG at security/keys/keyring.c:1003!

2014-02-27 Thread Tommi Rantala
Hello,

Hit the following BUG while fuzzing 3.14.0-rc3 with trinity.

Tommi

[708836.755392] [ cut here ]
[708836.756044] kernel BUG at /build/linux/security/keys/keyring.c:1003!
[708836.756044] invalid opcode:  [#1] SMP DEBUG_PAGEALLOC
[708836.756044] CPU: 0 PID: 5594 Comm: trinity-c26 Not tainted 3.14.0-rc3 #1
[708836.756044] Hardware name: Bochs Bochs, BIOS Bochs 01/01/2011
[708836.756044] task: 880036a3ca40 ti: 880003e6e000 task.ti:
880003e6e000
[708836.756044] RIP: 0010:[]  []
keyring_detect_cycle_iterator+0xe/0x20
[708836.756044] RSP: :880003e6fdb0  EFLAGS: 00010206
[708836.756044] RAX: 880056025b82 RBX: 003a RCX:
0003
[708836.756044] RDX: 0003 RSI: 880003e6fe98 RDI:
880056025b80
[708836.756044] RBP: 880003e6fdb0 R08: 0064 R09:

[708836.756044] R10: 880036a3ca40 R11:  R12:
880003e6fe98
[708836.756044] R13:  R14: 880003e6fe98 R15:
88006c950780
[708836.756044] FS:  7f88ae6bd700() GS:8800bf60()
knlGS:
[708836.756044] CS:  0010 DS:  ES:  CR0: 8005003b
[708836.756044] CR2: 0004 CR3: 3ba4d000 CR4:
06f0
[708836.756044] DR0: 00899000 DR1: 0115a000 DR2:
01b66000
[708836.756044] DR3:  DR6: 0ff0 DR7:
0600
[708836.756044] Stack:
[708836.756044]  880003e6fe80 814ac9e2 81078869
880003e6fdf0
[708836.756044]  81179f4d 8800bf7d5a40 001d5a40

[708836.756044]  88006c950780  0002
0001
[708836.756044] Call Trace:
[708836.756044]  [] search_nested_keyrings+0xf2/0x340
[708836.756044]  [] ? sched_clock+0x9/0x10
[708836.756044]  [] ? sched_clock_local+0x1d/0x90
[708836.756044]  [] ? __key_link_check_live_key+0x26/0x160
[708836.756044]  [] __key_link_check_live_key+0xe3/0x160
[708836.756044]  [] ? __key_link_check_live_key+0x26/0x160
[708836.756044]  [] ? keyring_instantiate+0xf0/0xf0
[708836.756044]  [] key_link+0x5c/0xb0
[708836.756044]  [] keyctl_keyring_link+0x7e/0xb0
[708836.756044]  [] SyS_keyctl+0x98/0x1a0
[708836.756044]  [] ia32_do_call+0x13/0x13
[708836.756044] Code: c0 eb 12 66 2e 0f 1f 84 00 00 00 00 00 31 c0 66
0f 1f 44 00 00 5b 41 5c 5d f3 c3 66 90 48 83 e7 fc 48 39 7e 28 55 48
89 e5 74 02 <0f> 0b b8 01 00 00 00 48 c7 46 48 dd ff ff ff 5d c3 90 55
48 89
[708836.756044] RIP  [] keyring_detect_cycle_iterator+0xe/0x20
[708836.756044]  RSP 
[708836.855231] ---[ end trace e2b699c76aca5cff ]---
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


btrfs "possible irq lock inversion dependency detected"

2014-02-17 Thread Tommi Rantala
Hello,

Saw this while fuzzing the kernel with Trinity.

Tommi


[  396.136048] =
[  396.136048] [ INFO: possible irq lock inversion dependency detected ]
[  396.136048] 3.14.0-rc3 #1 Not tainted
[  396.136048] -
[  396.136048] kswapd0/1482 just changed the state of lock:
[  396.136048]  (&delayed_node->mutex){+.+.-.}, at: [] 
__btrfs_release_delayed_node+0x4b/0x1e0
[  396.136048] but this lock took another, RECLAIM_FS-unsafe lock in the past:
[  396.136048]  (&found->groups_sem){+.}

and interrupts could create inverse lock ordering between them.

[  396.136048]
[  396.136048] other info that might help us debug this:
[  396.136048]  Possible interrupt unsafe locking scenario:
[  396.136048]
[  396.136048]CPU0CPU1
[  396.136048]
[  396.136048]   lock(&found->groups_sem);
[  396.136048]local_irq_disable();
[  396.136048]lock(&delayed_node->mutex);
[  396.136048]lock(&found->groups_sem);
[  396.136048]   
[  396.136048] lock(&delayed_node->mutex);
[  396.136048]
[  396.136048]  *** DEADLOCK ***
[  396.136048]
[  396.136048] 2 locks held by kswapd0/1482:
[  396.136048]  #0:  (shrinker_rwsem){..}, at: [] 
shrink_slab+0x3a/0x170
[  396.136048]  #1:  (&type->s_umount_key#25){.+}, at: [] 
grab_super_passive+0x4f/0x80
[  396.136048]
[  396.136048] the shortest dependencies between 2nd lock and 1st lock:
[  396.136048]  -> (&found->groups_sem){+.} ops: 38935 {
[  396.136048] HARDIRQ-ON-W at:
[  396.136048]   [] 
__lock_acquire+0x88e/0x1d90
[  396.136048]   [] 
lock_acquire+0x182/0x210
[  396.136048]   [] down_write+0x5c/0xc0
[  396.136048]   [] 
__link_block_group+0x3d/0xf0
[  396.136048]   [] 
btrfs_read_block_groups+0x392/0x690
[  396.136048]   [] 
open_ctree+0x1ad7/0x2140
[  396.136048]   [] 
btrfs_mount+0x44e/0x8e0
[  396.136048]   [] mount_fs+0x7a/0x1a0
[  396.136048]   [] 
vfs_kern_mount+0x71/0x150
[  396.136048]   [] 
btrfs_mount+0x831/0x8e0
[  396.136048]   [] mount_fs+0x7a/0x1a0
[  396.136048]   [] 
vfs_kern_mount+0x71/0x150
[  396.136048]   [] do_mount+0x954/0xb90
[  396.136048]   [] SyS_mount+0x94/0xe0
[  396.136048]   [] 
do_mount_root+0x1a/0x93
[  396.136048]   [] 
mount_block_root+0xe5/0x203
[  396.136048]   [] mount_root+0xe1/0xea
[  396.136048]   [] 
prepare_namespace+0x13c/0x174
[  396.136048]   [] 
kernel_init_freeable+0x242/0x251
[  396.136048]   [] kernel_init+0x9/0xf0
[  396.136048]   [] 
ret_from_fork+0x7c/0xb0
[  396.136048] HARDIRQ-ON-R at:
[  396.136048]   [] 
__lock_acquire+0x847/0x1d90
[  396.136048]   [] 
lock_acquire+0x182/0x210
[  396.136048]   [] down_read+0x4c/0xa0
[  396.136048]   [] 
btrfs_calc_num_tolerated_disk_barrier_failures+0x24a/0x310
[  396.136048]   [] 
open_ctree+0x1b0f/0x2140
[  396.136048]   [] 
btrfs_mount+0x44e/0x8e0
[  396.136048]   [] mount_fs+0x7a/0x1a0
[  396.136048]   [] 
vfs_kern_mount+0x71/0x150
[  396.136048]   [] 
btrfs_mount+0x831/0x8e0
[  396.136048]   [] mount_fs+0x7a/0x1a0
[  396.136048]   [] 
vfs_kern_mount+0x71/0x150
[  396.136048]   [] do_mount+0x954/0xb90
[  396.136048]   [] SyS_mount+0x94/0xe0
[  396.136048]   [] 
do_mount_root+0x1a/0x93
[  396.136048]   [] 
mount_block_root+0xe5/0x203
[  396.136048]   [] mount_root+0xe1/0xea
[  396.136048]   [] 
prepare_namespace+0x13c/0x174
[  396.136048]   [] 
kernel_init_freeable+0x242/0x251
[  396.136048]   [] kernel_init+0x9/0xf0
[  396.136048]   [] 
ret_from_fork+0x7c/0xb0
[  396.136048] SOFTIRQ-ON-W at:
[  396.136048]   [] 
__lock_acquire+0x8c3/0x1d90
[  396.136048]   [] 
lock_acquire+0x182/0x210
[  396.136048]   [] down_write+0x5c/0xc0
[  396.136048]   [] 
__link_block_group+0x3d/0xf0
[  396.136048]   [] 
btrfs_read_block_groups+0x392/0x690
[  396.136048]   [] 
open_ctree+0x1ad7/0x2140
[  396.136048]   [] 
btrfs_mount+0x44e/0x8e0
[  396.136048]   [

BUG: Bad rss-counter state mm:ffff88005f936c00 idx:0 val:1

2014-02-16 Thread Tommi Rantala
Hello,

Noticed the following kernel message while fuzzing
3.14.0-rc2-00488-gca03339 with trinity. Should I be worried?

[40879.796336] BUG: Bad rss-counter state mm:88005f936c00 idx:0 val:1

Tommi
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


BUG: Bad page state in process trinity-c19

2014-02-15 Thread Tommi Rantala
Hello,

Hit the following bug while fuzzing with trinity. I can see that Dave
reported similar bad page state problems for 3.13-rc4, but this one
does not seem to be AIO related.

https://lkml.org/lkml/2013/12/18/932

Tommi


BUG: Bad page state in process trinity-c19  pfn:2429e
page:ea90a780 count:0 mapcount:0 mapping:88003a018758 index:0xed
page flags: 0x108(uptodate)
page dumped because: non-NULL mapping
CPU: 1 PID: 28094 Comm: trinity-c19 Not tainted 3.14.0-rc2-00209-g45f7fdc #1
Hardware name: Hewlett-Packard HP Compaq dc5750 Small Form
Factor/0A64h, BIOS 786E3 v02.10 01/25/2007
 828f4590 880054591758 82363c9d ea90a780
 880054591780 8235d165 ea90a780 
 ea90a780 8800545917d8 8121a010 828f457f
Call Trace:
 [] dump_stack+0x4d/0x66
 [] bad_page+0xd5/0xf2
 [] free_pages_prepare+0x1f0/0x2b0
 [] free_hot_cold_page+0x3b/0x150
 [] free_hot_cold_page_list+0x10e/0x190
 [] release_pages+0x1dc/0x210
 [] pagevec_lru_move_fn+0xd3/0xf0
 [] ? __put_single_page+0x20/0x20
 [] __pagevec_lru_add+0x12/0x20
 [] __lru_cache_add+0x66/0x90
 [] lru_cache_add+0x35/0x40
 [] putback_lru_page+0x4a/0xd0
 [] migrate_pages+0x84b/0x880
 [] ? isolate_freepages_block+0x440/0x440
 [] compact_zone+0x249/0x770
 [] compact_zone_order+0xb6/0xf0
 [] ? native_send_call_func_single_ipi+0x31/0x40
 [] try_to_compact_pages+0xb2/0x110
 [] __alloc_pages_direct_compact+0xa5/0x1b5
 [] __alloc_pages_slowpath+0x73a/0x79e
 [] ? sched_clock_local+0x1d/0x90
 [] __alloc_pages_nodemask+0x226/0x3b0
 [] alloc_pages_vma+0x16f/0x1e0
 [] ? do_huge_pmd_anonymous_page+0x218/0x3f0
 [] do_huge_pmd_anonymous_page+0x218/0x3f0
 [] handle_mm_fault+0x1d7/0x320
 [] __do_page_fault+0x4d0/0x540
 [] ? trace_hardirqs_on_caller+0x185/0x220
 [] ? trace_hardirqs_on+0xd/0x10
 [] ? _raw_spin_unlock_irq+0x27/0x40
 [] ? finish_task_switch+0x81/0x130
 [] ? finish_task_switch+0x43/0x130
 [] ? trace_hardirqs_off_thunk+0x3a/0x3c
 [] do_page_fault+0x9/0x10
 [] page_fault+0x28/0x30
Disabling lock debugging due to kernel taint
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


lockdep: strange %s#5 lock name

2014-02-10 Thread Tommi Rantala
Hello,

Noticed a suspicious "%s#5" lock name in a lockdep splat while fuzzing
with trinity.

Tommi

[249844.491141] INFO: task kworker/u2:2:32113 blocked for more than 120 seconds.
[249844.493268]   Not tainted v3.13-11268-g8a1f006 #3
[249844.494731] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs"
disables this message.
[249844.496922] kworker/u2:2D 880074a92520 0 32113  2 0x
[249844.498985] Workqueue: netns cleanup_net
[249844.500188]  8800956d1c78 0046 880074a92520
001d4f80
[249844.502360]  8800956d1fd8 001d4f80 880074a9
880074a92520
[249844.504617]  8800b414dab8 82db4d20 82db4d28
0246
[249844.506647] Call Trace:
[249844.507331]  [] schedule+0x65/0x70
[249844.508576]  [] schedule_preempt_disabled+0x11/0x20
[249844.510185]  [] mutex_lock_nested+0x285/0x4a0
[249844.511777]  [] ? cleanup_net+0x80/0x1e0
[249844.513422]  [] ? cleanup_net+0x80/0x1e0
[249844.514998]  [] cleanup_net+0x80/0x1e0
[249844.516523]  [] process_one_work+0x366/0x690
[249844.518172]  [] ? process_one_work+0x240/0x690
[249844.519840]  [] worker_thread+0x21e/0x370
[249844.521422]  [] ? rescuer_thread+0x2c0/0x2c0
[249844.523040]  [] kthread+0xf0/0x100
[249844.524425]  [] ? finish_task_switch+0x81/0x130
[249844.525987]  [] ? insert_kthread_work+0x90/0x90
[249844.527487]  [] ret_from_fork+0x7c/0xb0
[249844.528959]  [] ? insert_kthread_work+0x90/0x90
[249844.530451] 3 locks held by kworker/u2:2/32113:
[249844.531638]  #0:  (%s#5){.+.+.+}, at: []
process_one_work+0x240/0x690
[249844.533891]  #1:  (net_cleanup_work){+.+.+.}, at:
[] process_one_work+0x240/0x690
[249844.536317]  #2:  (net_mutex){+.+.+.}, at: []
cleanup_net+0x80/0x1e0
[249844.538744] INFO: task trinity-c10:23911 blocked for more than 120 seconds.
[249844.540723]   Not tainted v3.13-11268-g8a1f006 #3
[249844.542192] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs"
disables this message.
[249844.544785] trinity-c10 D 8800b963a520 0 23911  22465 0x0004
[249844.546929]  880020d49df8 0046 8800b963a520
001d4f80
[249844.549183]  880020d49fd8 001d4f80 8800bb42a520
8800b963a520
[249844.551401]  880024e58000 82db4d20 82db4d28
0246
[249844.553376] Call Trace:
[249844.553998]  [] schedule+0x65/0x70
[249844.555237]  [] schedule_preempt_disabled+0x11/0x20
[249844.556844]  [] mutex_lock_nested+0x285/0x4a0
[249844.558349]  [] ? copy_net_ns+0x97/0x150
[249844.559748]  [] ? copy_net_ns+0x97/0x150
[249844.561305]  [] copy_net_ns+0x97/0x150
[249844.562789]  [] create_new_namespaces+0x126/0x1c0
[249844.564551]  [] ? ip_build_and_send_pkt+0x1cd/0x260
[249844.566484]  [] unshare_nsproxy_namespaces+0xa7/0xe0
[249844.568325]  [] SyS_unshare+0x116/0x2c0
[249844.569838]  [] ? ip_build_and_send_pkt+0x1cd/0x260
[249844.571664]  [] system_call_fastpath+0x16/0x1b
[249844.573328]  [] ? ip_build_and_send_pkt+0x1cd/0x260
[249844.574886] 1 lock held by trinity-c10/23911:
[249844.576036]  #0:  (net_mutex){+.+.+.}, at: []
copy_net_ns+0x97/0x150
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: BUG ip_dst_cache (Not tainted): Poison overwritten

2014-02-03 Thread Tommi Rantala
2014-02-01 Tommi Rantala :
> 2014-01-31 Eric Dumazet :
>> On Fri, 2014-01-31 at 22:11 +0200, Tommi Rantala wrote:
>>> Hello,
>>>
>>> Hit this while fuzzing v3.13-9218-g0e47c96 with trinity in a qemu
>>> virtual machine.
>>>
>>> Tommi
>>
>> Hi Tommi
>>
>> Could you please try the following fix ?
>
> Thanks, giving this a spin. This does not reproduce very easily with
> Trinity, I'll let you know if anything blows up.

Looking good after two days of fuzzing in several virtual machines.
The bug has not been reproduced, and no other ill effects visible.

Thanks!

Tommi
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: BUG ip_dst_cache (Not tainted): Poison overwritten

2014-02-01 Thread Tommi Rantala
2014-01-31 Eric Dumazet :
> On Fri, 2014-01-31 at 22:11 +0200, Tommi Rantala wrote:
>> Hello,
>>
>> Hit this while fuzzing v3.13-9218-g0e47c96 with trinity in a qemu
>> virtual machine.
>>
>> Tommi
>
> Hi Tommi
>
> Could you please try the following fix ?

Thanks, giving this a spin. This does not reproduce very easily with
Trinity, I'll let you know if anything blows up.

Tommi

> I'll send an official patch in a couple of hours
>
> There are two bugs :
> One dst leak, and one plain bug, as rt initial NULL
> value might be scratched.
>
>  net/ipv4/ip_tunnel.c |   27 ++-
>  1 file changed, 10 insertions(+), 17 deletions(-)
>
> diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c
> index bd28f386bd02..bc6acdcb7625 100644
> --- a/net/ipv4/ip_tunnel.c
> +++ b/net/ipv4/ip_tunnel.c
> @@ -101,27 +101,21 @@ static void tunnel_dst_reset_all(struct ip_tunnel *t)
> __tunnel_dst_set(per_cpu_ptr(t->dst_cache, i), NULL);
>  }
>
> -static struct dst_entry *tunnel_dst_get(struct ip_tunnel *t)
> +static struct dst_entry *tunnel_dst_check(struct ip_tunnel *t, u32 cookie)
>  {
> struct dst_entry *dst;
>
> rcu_read_lock();
> dst = rcu_dereference(this_cpu_ptr(t->dst_cache)->dst);
> -   if (dst)
> +   if (dst) {
> +   if (dst->obsolete && dst->ops->check(dst, cookie) == NULL) {
> +   rcu_read_unlock();
> +   tunnel_dst_reset(t);
> +   return NULL;
> +   }
> dst_hold(dst);
> -   rcu_read_unlock();
> -   return dst;
> -}
> -
> -static struct dst_entry *tunnel_dst_check(struct ip_tunnel *t, u32 cookie)
> -{
> -   struct dst_entry *dst = tunnel_dst_get(t);
> -
> -   if (dst && dst->obsolete && dst->ops->check(dst, cookie) == NULL) {
> -   tunnel_dst_reset(t);
> -   return NULL;
> }
> -
> +   rcu_read_unlock();
> return dst;
>  }
>
> @@ -584,7 +578,7 @@ void ip_tunnel_xmit(struct sk_buff *skb, struct 
> net_device *dev,
> struct flowi4 fl4;
> u8 tos, ttl;
> __be16 df;
> -   struct rtable *rt = NULL;   /* Route to the other host */
> +   struct rtable *rt;  /* Route to the other host */
> unsigned int max_headroom;  /* The extra header space needed */
> __be32 dst;
> int err;
> @@ -657,8 +651,7 @@ void ip_tunnel_xmit(struct sk_buff *skb, struct 
> net_device *dev,
> init_tunnel_flow(&fl4, protocol, dst, tnl_params->saddr,
>  tunnel->parms.o_key, RT_TOS(tos), 
> tunnel->parms.link);
>
> -   if (connected)
> -   rt = (struct rtable *)tunnel_dst_check(tunnel, 0);
> +   rt = (connected) ? (struct rtable *)tunnel_dst_check(tunnel, 0) : 
> NULL;
>
> if (!rt) {
> rt = ip_route_output_key(tunnel->net, &fl4);
>
>
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


BUG ip_dst_cache (Not tainted): Poison overwritten

2014-01-31 Thread Tommi Rantala
Hello,

Hit this while fuzzing v3.13-9218-g0e47c96 with trinity in a qemu
virtual machine.

Tommi

[ 6329.061605] 
=
[ 6329.062014] BUG ip_dst_cache (Not tainted): Poison overwritten
[ 6329.062014] 
-
[ 6329.062014] Disabling lock debugging due to kernel taint
[ 6329.062014] INFO: 0x8800b4809940-0x8800b4809940. First byte
0x6a instead of 0x6b
[ 6329.062014] INFO: Allocated in dst_alloc+0x46/0x180 age=33 cpu=0 pid=6108
[ 6329.062014]  __slab_alloc+0x4f8/0x58c
[ 6329.062014]  kmem_cache_alloc+0x94/0x290
[ 6329.062014]  dst_alloc+0x46/0x180
[ 6329.062014]  rt_dst_alloc+0x47/0x50
[ 6329.062014]  __ip_route_output_key+0x882/0xa80
[ 6329.062014]  ip_route_output_flow+0x22/0x60
[ 6329.062014]  igmpv3_newpack+0xe2/0x210
[ 6329.062014]  add_grhead.isra.17+0x37/0xa0
[ 6329.062014]  add_grec+0x3b2/0x470
[ 6329.062014]  igmp_ifc_timer_expire+0x28e/0x400
[ 6329.062014]  call_timer_fn+0x146/0x320
[ 6329.062014]  run_timer_softirq+0x2d4/0x360
[ 6329.062014]  __do_softirq+0x217/0x4a0
[ 6329.062014]  irq_exit+0x45/0xb0
[ 6329.062014]  smp_apic_timer_interrupt+0x3f/0x50
[ 6329.062014]  apic_timer_interrupt+0x72/0x80
[ 6329.062014] INFO: Freed in dst_destroy+0x8a/0xe0 age=33 cpu=0 pid=6108
[ 6329.062014]  __slab_free+0x32/0x380
[ 6329.062014]  kmem_cache_free+0x186/0x2c0
[ 6329.062014]  dst_destroy+0x8a/0xe0
[ 6329.062014]  dst_release+0x53/0x70
[ 6329.062014]  ip_tunnel_xmit+0x50e/0xfb0
[ 6329.062014]  ipip_tunnel_xmit+0x41/0x60
[ 6329.062014]  dev_hard_start_xmit+0x3ed/0x950
[ 6329.062014]  __dev_queue_xmit+0x621/0x890
[ 6329.062014]  dev_queue_xmit+0xb/0x10
[ 6329.062014]  neigh_direct_output+0xc/0x10
[ 6329.062014]  ip_finish_output2+0x494/0x5d0
[ 6329.062014]  ip_finish_output+0x238/0x2d0
[ 6329.062014]  ip_output+0x9f/0x110
[ 6329.062014]  ip_local_out+0x6e/0xa0
[ 6329.062014]  igmpv3_sendpack+0x43/0x50
[ 6329.062014]  igmp_ifc_timer_expire+0x395/0x400
[ 6329.062014] INFO: Slab 0xea0002d20200 objects=14 used=14 fp=0x
(null) flags=0x1004080
[ 6329.062014] INFO: Object 0x8800b48098c0 @offset=6336
fp=0x8800b4809680
[ 6329.062014] Bytes b4 8800b48098b0: 5a 5a 5a 5a 5a 5a 5a 5a 5a
5a 5a 5a 5a 5a 5a 5a  
[ 6329.062014] Object 8800b48098c0: 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b
6b 6b 6b 6b 6b 6b  
[ 6329.062014] Object 8800b48098d0: 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b
6b 6b 6b 6b 6b 6b  
[ 6329.062014] Object 8800b48098e0: 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b
6b 6b 6b 6b 6b 6b  
[ 6329.062014] Object 8800b48098f0: 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b
6b 6b 6b 6b 6b 6b  
[ 6329.062014] Object 8800b4809900: 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b
6b 6b 6b 6b 6b 6b  
[ 6329.062014] Object 8800b4809910: 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b
6b 6b 6b 6b 6b 6b  
[ 6329.062014] Object 8800b4809920: 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b
6b 6b 6b 6b 6b 6b  
[ 6329.062014] Object 8800b4809930: 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b
6b 6b 6b 6b 6b 6b  
[ 6329.062014] Object 8800b4809940: 6a 6b 6b 6b 6b 6b 6b 6b 6b 6b
6b 6b 6b 6b 6b 6b  jkkk
[ 6329.062014] Object 8800b4809950: 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b
6b 6b 6b 6b 6b 6b  
[ 6329.062014] Object 8800b4809960: 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b
6b 6b 6b 6b 6b 6b  
[ 6329.062014] Object 8800b4809970: 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b
6b 6b 6b 6b 6b a5  kkk.
[ 6329.062014] Redzone 8800b4809980: bb bb bb bb bb bb bb bb
   
[ 6329.062014] Padding 8800b4809ac0: 5a 5a 5a 5a 5a 5a 5a 5a 5a 5a
5a 5a 5a 5a 5a 5a  
[ 6329.062014] Padding 8800b4809ad0: 5a 5a 5a 5a 5a 5a 5a 5a 5a 5a
5a 5a 5a 5a 5a 5a  
[ 6329.062014] Padding 8800b4809ae0: 5a 5a 5a 5a 5a 5a 5a 5a 5a 5a
5a 5a 5a 5a 5a 5a  
[ 6329.062014] Padding 8800b4809af0: 5a 5a 5a 5a 5a 5a 5a 5a 5a 5a
5a 5a 5a 5a 5a 5a  
[ 6329.062014] CPU: 0 PID: 6108 Comm: trinity-main Tainted: GB
   3.13.0+ #1
[ 6329.062014] Hardware name: Bochs Bochs, BIOS Bochs 01/01/2011
[ 6329.062014]  8800b48098c0 8800ab253b38 82366c34
8800baacd8c0
[ 6329.062014]  8800ab253b68 81262e41 8800b4809941
8800baacd8c0
[ 6329.062014]  006b 8800b48098c0 8800ab253bb0
81263284
[ 6329.062014] Call Trace:
[ 6329.062014]  [] dump_stack+0x4d/0x66
[ 6329.062014]  [] print_trailer+0x131/0x140
[ 6329.062014]  [] check_bytes_and_report+0xc4/0x120
[ 6329.062014]  [] check_object+0x11e/0x240
[ 6329.062014]  [] ? dst_alloc+0x46/0x180
[ 6329.062014]  [] alloc_debug_processing+0x62/0x104
[ 6329.062014]  [] __slab_alloc+0x4f8/0x58c
[ 6329.062014]  [] ? sched_clock_cpu+0xb8/0xe0
[ 6329.062014]  [] ? kvm_clock_read+0x27/0x40
[ 6329.062014]  [] ? sched_clock+0x9

sched_rr_get_interval NULL pointer OOPS

2014-01-24 Thread Tommi Rantala
Hello,

Trinity triggered the following bug in two separate qemu virtual
machines after fuzzing v3.13-3995-g0dc3fd0 for a day or two. I have
not been running Trinity in a while, so no idea if this is a
regression or not.

If I'm reading this right, it's oopsing in kernel/sched/core.c:

SYSCALL_DEFINE2(sched_rr_get_interval, pid_t, pid,
struct timespec __user *, interval)
{
...
rq = task_rq_lock(p, &flags);
time_slice = p->sched_class->get_rr_interval(rq, p);   <==
task_rq_unlock(rq, p, &flags);
...

The first trace:

[21451.975552] trinity-c9: vm86 mode not supported on 64 bit kernel
[21452.242792] trinity-c23: vm86 mode not supported on 64 bit kernel
[21452.309518] trinity-c30: vm86 mode not supported on 64 bit kernel
[21456.862415] type=1401 audit(1390484421.888:396): SELinux:
unrecognized netlink message type=0 for sclass=34
[21456.862415]
[21472.032599] BUG: unable to handle kernel NULL pointer dereference
at   (null)
[21472.034764] IP: [<  (null)>]   (null)
[21472.036117] PGD a6243067 PUD a712a067 PMD 0
[21472.037345] Oops: 0010 [#1] SMP DEBUG_PAGEALLOC
[21472.038616] CPU: 0 PID: 15522 Comm: trinity-c8 Not tainted 3.13.0+ #1
[21472.040309] Hardware name: Bochs Bochs, BIOS Bochs 01/01/2011
[21472.041823] task: 88006f8f ti: 8800a101e000 task.ti:
8800a101e000
[21472.043814] RIP: 0010:[<>]  [<  (null)>]
   (null)
[21472.045823] RSP: 0018:8800a101ff30  EFLAGS: 00010046
[21472.047225] RAX: 82434ae0 RBX: 8800b926ca40 RCX: 02c0
[21472.049143] RDX: 8800bf60e460 RSI: 8800b926ca40 RDI: 8800bf7d4fc0
[21472.050900] RBP: 8800a101ff78 R08: fffe8fd25bb38016 R09: 0001
[21472.052621] R10: 88006f8f R11:  R12: 0004
[21472.054469] R13: 8800bf7d4fc0 R14: 0094 R15: 20008465485f
[21472.056303] FS:  7f904f260700() GS:8800bf60()
knlGS:
[21472.058211] CS:  0010 DS:  ES:  CR0: 8005003b
[21472.059516] CR2:  CR3: 44ec3000 CR4: 06f0
[21472.061143] DR0: 0276a000 DR1: 0276aff8 DR2: 
[21472.062762] DR3:  DR6: 0ff0 DR7: 0600
[21472.064445] Stack:
[21472.064975]  81160cdf 81160c23 0282
0001
[21472.067017]  04ae 0008 0008
7f904f233de0
[21472.069053]  0094 0094 8235ba79
0246
[21472.071089] Call Trace:
[21472.071761]  [] ? SyS_sched_rr_get_interval+0xdf/0x230
[21472.073570]  [] ? SyS_sched_rr_get_interval+0x23/0x230
[21472.075401]  [] system_call_fastpath+0x16/0x1b
[21472.076987] Code:  Bad RIP value.
[21472.077929] RIP  [<  (null)>]   (null)
[21472.079302]  RSP 
[21472.080247] CR2: 
[21472.117066] ---[ end trace cc44b07941fc4905 ]---

The second trace looks more or less identical:

[106143.588795] RDS: rds_bind() could not find a transport, load
rds_tcp or rds_rdma?
[106146.597725] trinity-c1: vm86 mode not supported on 64 bit kernel
[106146.865957] trinity-c36: vm86 mode not supported on 64 bit kernel
[106156.562726] BUG: unable to handle kernel NULL pointer dereference
at   (null)
[106156.565411] IP: [<  (null)>]   (null)
[106156.567021] PGD a61e6067 PUD a03a4067 PMD 0
[106156.568451] Oops: 0010 [#1] SMP DEBUG_PAGEALLOC
[106156.569929] CPU: 0 PID: 19875 Comm: trinity-c23 Not tainted 3.13.0+ #1
[106156.571987] Hardware name: Bochs Bochs, BIOS Bochs 01/01/2011
[106156.573758] task: 8800b65d8000 ti: 880009ac8000 task.ti:
880009ac8000
[106156.576051] RIP: 0010:[<>]  [<  (null)>]
(null)
[106156.578322] RSP: 0018:880009ac9f30  EFLAGS: 00010046
[106156.579920] RAX: 82434ae0 RBX: 8800b4cb2520 RCX:
02c0
[106156.582122] RDX: 8800bf60e460 RSI: 8800b4cb2520 RDI:
8800bf7d4fc0
[106156.584225] RBP: 880009ac9f78 R08: fffe8fd25bb38016 R09:
0001
[106156.586340] R10: 8800b65d8000 R11:  R12:
008c8000
[106156.588513] R13: 8800bf7d4fc0 R14: 0094 R15:
40004a1b
[106156.590684] FS:  7f75c3e23700() GS:8800bf60()
knlGS:
[106156.593171] CS:  0010 DS:  ES:  CR0: 8005003b
[106156.594922] CR2:  CR3: a69c1000 CR4:
06f0
[106156.597114] DR0: 008c8000 DR1: 00ca5000 DR2:
024dc000
[106156.599295] DR3: 026df000 DR6: 0ff0 DR7:
00030602
[106156.601449] Stack:
[106156.602085]  81160cdf 81160c23 0282
0001
[106156.604423]  0003d7dc 0017 0017
7f75c3df6de0
[106156.606758]  0094 0094 8235ba79
0246
[106156.609117] Call Trace:
[106156.609913]  [] ? SyS_s

kernel BUG at net/core/skbuff.c:1065!

2013-06-16 Thread Tommi Rantala
Hello,

Hit this bug while fuzzing in a qemu virtual machine as the root user.

Kernel is v3.10-rc5-0-g317ddd2.

Tommi

[575180.874750] type=1401 audit(1371378748.322:7750): SELinux:
unrecognized netlink message type=0 for sclass=36
[575180.874750]
[575191.358143] [ cut here ]
[575191.358498] kernel BUG at /build/linux/net/core/skbuff.c:1065!
[575191.358498] invalid opcode:  [#1] SMP DEBUG_PAGEALLOC
[575191.358498] CPU: 0 PID: 28554 Comm: trinity-child33 Not tainted
3.10.0-rc5 #1
[575191.358498] Hardware name: Bochs Bochs, BIOS Bochs 01/01/2011
[575191.358498] task: 880005f0c7c0 ti: 88002cec6000 task.ti:
88002cec6000
[575191.358498] RIP: 0010:[]  []
pskb_expand_head+0x3b/0x290
[575191.358498] RSP: 0018:88002cec79f0  EFLAGS: 00010202
[575191.358498] RAX: 0002 RBX: 880010e7cd80 RCX:
0020
[575191.358498] RDX: 003c RSI:  RDI:
880010e7cd80
[575191.358498] RBP: 88002cec7a28 R08: 0001 R09:

[575191.358498] R10:  R11:  R12:
002b
[575191.358498] R13:  R14: 0011 R15:
40014b89
[575191.358498] FS:  7f3b21cd6700() GS:8800bf60()
knlGS:
[575191.358498] CS:  0010 DS:  ES:  CR0: 80050033
[575191.358498] CR2: 009fb000 CR3: 1a873000 CR4:
06f0
[575191.358498] DR0: 02592d30 DR1:  DR2:

[575191.358498] DR3:  DR6: 0ff0 DR7:
0600
[575191.358498] Stack:
[575191.358498]  880090cbd668  880010e7cd80
002b
[575191.358498]   0011 40014b89
88002cec7a50
[575191.358498]  81eb6fc3 88002cec7a70 0011
8800b952d498
[575191.358498] Call Trace:
[575191.358498]  [] skb_pad+0xa3/0x150
[575191.358498]  [] e1000_xmit_frame+0x78/0xfc0
[575191.358498]  [] ? dev_queue_xmit_nit+0x360/0x390
[575191.358498]  [] ? get_rps_cpu+0x4a0/0x4a0
[575191.358498]  [] dev_hard_start_xmit+0x2ec/0x720
[575191.358498]  [] sch_direct_xmit+0x80/0x290
[575191.358498]  [] dev_queue_xmit+0x4b4/0x8e0
[575191.358498]  [] ? dev_hard_start_xmit+0x720/0x720
[575191.358498]  [] llc_sap_action_send_test_c+0x7f/0x90
[575191.358498]  [] llc_sap_state_process+0xd0/0x160
[575191.358498]  [] llc_build_and_send_test_pkt+0x44/0x50
[575191.358498]  [] llc_ui_sendmsg+0x1e7/0x490
[575191.358498]  [] sock_sendmsg+0xa1/0xd0
[575191.358498]  [] ? __do_page_fault+0x288/0x530
[575191.358498]  [] SYSC_sendto+0x11c/0x160
[575191.358498]  [] ? _raw_spin_unlock_irq+0x27/0x50
[575191.358498]  [] ? do_setitimer+0x27c/0x330
[575191.358498]  [] ? trace_hardirqs_on_caller+0x16/0x220
[575191.358498]  [] ? trace_hardirqs_on_thunk+0x3a/0x3f
[575191.358498]  [] SyS_sendto+0x9/0x10
[575191.358498]  [] system_call_fastpath+0x16/0x1b
[575191.358498] Code: 48 89 fb 48 83 ec 10 8b 87 d4 00 00 00 01 f0 01
c2 85 f6 79 0b 0f 0b 66 0f 1f 84 00 00 00 00 00 8b 87 ec 00 00 00 83
f8 01 74 05 <0f> 0b 0f 1f 00 83 c2 3f 41 89 cf 83 e2 c0 f6 87 aa 00 00
00 04
[575191.358498] RIP  [] pskb_expand_head+0x3b/0x290
[575191.358498]  RSP 
[575191.518696] ---[ end trace 866084dcc0c2aa3e ]---
[575191.522588] Kernel panic - not syncing: Fatal exception in interrupt
[575191.523574] drm_kms_helper: panic occurred, switching back to text console
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


  1   2   >