[ This is FYI only. Documenting what I found with gcc 4.5.1 (but is
  fixed in 4.5.4 ]

Part of my test suit is to build the kernel with a compiler before asm
goto was supported (to test jump labels without it).

Recently I noticed that the kernel started to hang when building with
it. For a while, I just disabled that test, but I finally got some time
to look at why it was happening. I took my config that was causing the
hang and started a bisect. It came down to this commit:

commit 54acd4397d7e7a725c94101180cd9f38ef701acc
Author: Kees Cook <keesc...@chromium.org>
Date:   Wed Aug 17 14:42:09 2016 -0700

    rculist: Consolidate DEBUG_LIST for list_add_rcu()


Which I thought was a tad weird. But testing the commit before would
boot fine, and this one would hang again. I then checked out a recent
4.13-rc release, tested it and it hung. Then I reverted the change
(with the attached patch) and it booted fine! Thinking that there may
be some subtle bug with that code, I dug deeper. Here's the NMI
watchdog lockup dump:

 NMI watchdog: Watchdog detected hard LOCKUP on cpu 1
 Modules linked in:
 CPU: 1 PID: 1 Comm: systemd Not tainted 4.13.0-rc3-test+ #347
 Hardware name: Hewlett-Packard HP Compaq Pro 6300 SFF/339A, BIOS K01 v03.03 
07/14/2016
 task: ffff8801195a8040 task.stack: ffffc90000008000
 RIP: 0010:cgroup_migrate_execute+0x1bc/0x540
 RSP: 0018:ffffc9000000baa8 EFLAGS: 00000046
 RAX: ffffffff81e71830 RBX: ffffffff81e71910 RCX: ffffc9000000b928
 RDX: ffffffff81e71830 RSI: ffff880119725918 RDI: ffff8801195f8aa8
 RBP: ffffc9000000bb88 R08: 00000000000001c0 R09: ffffc9000000b858
 R10: ffff8801195f8b78 R11: ffffc9000000b868 R12: ffffffff81e717c0
 R13: ffffffff81e71830 R14: ffffffff81e70758 R15: ffffffff81e717c0
 FS:  00007f4eff598d80(0000) GS:ffff88011ea80000(0000) knlGS:0000000000000000
 CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
 CR2: 00000043fc899e30 CR3: 00000001142ba000 CR4: 00000000001406e0
 Call Trace:
  ? cgroup_migrate+0xb8/0xd0
  cgroup_migrate+0xc0/0xd0
  ? cgroup_migrate_execute+0x540/0x540
  cgroup_attach_task+0x1a9/0x260
  ? cgroup_attach_task+0x97/0x260
  ? get_task_cred+0x90/0xb0
  __cgroup_procs_write+0x314/0x370
  ? __cgroup_procs_write+0x85/0x370
  cgroup_procs_write+0x14/0x20
  cgroup_file_write+0x77/0x190
  kernfs_fop_write+0x11c/0x1d0
  __vfs_write+0x34/0x140
  ? __sb_start_write+0x11d/0x1d0
  ? file_start_write.clone.19+0x28/0x30
  vfs_write+0xcb/0x120
  ? __fdget_pos+0x12/0x50
  SyS_write+0x59/0xc0
  entry_SYSCALL_64_fastpath+0x1f/0xbe
 RIP: 0033:0x7f4efdb5ac30
 RSP: 002b:00007ffdf4e99388 EFLAGS: 00000246 ORIG_RAX: 0000000000000001
 RAX: ffffffffffffffda RBX: 0000000000000001 RCX: 00007f4efdb5ac30
 RDX: 0000000000000002 RSI: 00000043fcfd52b0 RDI: 0000000000000006
 RBP: 0000000000000002 R08: 00000043fcfd40a0 R09: 00007f4eff598d80
 R10: 00000043fcfd52b0 R11: 0000000000000246 R12: 0000000000000002
 R13: 00007f4eff2a073e R14: 00000043fcfd3fc0 R15: 0000000000000006
 Code: 52 08 15 81 31 c0 e8 04 8c 04 00 44 89 a5 24 ff ff ff 49 8b 47 70 48 39 
85 38 ff ff ff 4c 8d b0 28 ef ff ff 4d 8b ae d8 10 00 00 <0f> 84 80 00 00 00 49 
81 ed d8 10 00 00 eb 0a 4d 89 ee 4c 8d aa 

It would always lock up in the same location. That
cgroup_migrate_execute() call. That is here:

        spin_lock_irq(&css_set_lock);
        list_for_each_entry(cset, &tset->src_csets, mg_node) {
                list_for_each_entry_safe(task, tmp_task, &cset->mg_tasks, 
cg_list) {
                        struct css_set *from_cset = task_css_set(task);
                        struct css_set *to_cset = cset->mg_dst_cset;

                        get_css_set(to_cset);
                        to_cset->nr_tasks++;
                        css_set_move_task(task, from_cset, to_cset, true);
                        put_css_set_locked(from_cset);
                        from_cset->nr_tasks--;
                }
        }

Adding several trace_printk()s, I found that the cset->mg_node was an
empty list (list_empty(&cset->mg_node) returns true), but the
tset->src_csets point to it. I added more printks to when that is added
and removed and found something interesting:

 cgroup_migrate_add_task: src=ffffc9000000bc18 src->next=ffffc9000000bc18 
src->prev=ffffc9000000bc18
 cgroup_migrate_add_task: add tail ffffffff81e71910 to ffffc9000000bc18 
(empty=1)
 cgroup_migrate_add_task: cset=ffffffff81e71910 cset->next=ffffffff81e71910 
(ffffc9000000bc18) cset->prev=ffffc9000000bc18
 cgroup_migrate_execute: start cset loop
 cgroup_migrate_execute: list empty ffffffff81e71910 from ffffc9000000bc18
 cgroup_migrate_execute: start loop on ffffffff81e71830 node ffffffff81e71910 
empty 1
 cgroup_migrate_execute: cset=ffffffff81e71910 cset->next=ffffffff81e71910 
cset->prev=ffffffff81e71910
 cgroup_migrate_execute: loop from ffffffff81e717c0 to ffff880113ddbba8

The set up in cgroup_migrate_add_task() added the cset to the test
list, but the cset was never initialized to be part of the list. And the
third trace_printk() above had this:

                trace_printk("cset=%p cset->next=%p (%p) cset->prev=%p\n",
                             &cset->mg_node,
                             READ_ONCE(cset->mg_node.next),
                             cset->mg_node.next,
                             cset->mg_node.prev);

The "READ_ONCE()" of cset->mg_node.next returned a different value than
just reading cset->mg_node.next directly!

It appears that gcc 4.5.1 somehow lost the fact that the variable
passed into the static inline was global and not a local variable.

                list_add_tail(&cset->mg_node,
                              &mgctx->tset.src_csets);

Where we have:

static inline void list_add_tail(struct list_head *new, struct list_head *head)
{
        __list_add(new, head->prev, head);
}

and

static inline void __list_add(struct list_head *new,
                              struct list_head *prev,
                              struct list_head *next)
{
        if (!__list_add_valid(new, prev, next))
                return;

        next->prev = new;
        new->next = next;
        new->prev = prev;
        WRITE_ONCE(prev->next, new);
}

And __list_add_valid() is an out of line (extern) function, not an
inlined one although, if I remove it, the kernel still hangs.

Note CONFIG_DEBUG_LIST is set. Also, I tested gcc 4.5.4 and it works
again. I just want it to be documented somewhere that gcc 4.5.1 has a
bug that screws up local and global labels on variables when dealing
with inlined functions.

Again, this is just an FYI to document my findings as I spent two days
debugging this. The bug is in gcc 4.5.1 but not in 4.5.4 so it has been
fixed in that release.

-- Steve
diff --git a/include/linux/list.h b/include/linux/list.h
index ae537fa..bdc9bda 100644
--- a/include/linux/list.h
+++ b/include/linux/list.h
@@ -29,17 +29,8 @@ static inline void INIT_LIST_HEAD(struct list_head *list)
 }
 
 #ifdef CONFIG_DEBUG_LIST
-extern bool __list_add_valid(struct list_head *new,
-			      struct list_head *prev,
-			      struct list_head *next);
 extern bool __list_del_entry_valid(struct list_head *entry);
 #else
-static inline bool __list_add_valid(struct list_head *new,
-				struct list_head *prev,
-				struct list_head *next)
-{
-	return true;
-}
 static inline bool __list_del_entry_valid(struct list_head *entry)
 {
 	return true;
@@ -52,18 +43,21 @@ static inline bool __list_del_entry_valid(struct list_head *entry)
  * This is only for internal list manipulation where we know
  * the prev/next entries already!
  */
+#ifndef CONFIG_DEBUG_LIST
 static inline void __list_add(struct list_head *new,
 			      struct list_head *prev,
 			      struct list_head *next)
 {
-	if (!__list_add_valid(new, prev, next))
-		return;
-
 	next->prev = new;
 	new->next = next;
 	new->prev = prev;
 	WRITE_ONCE(prev->next, new);
 }
+#else
+extern void __list_add(struct list_head *new,
+			      struct list_head *prev,
+			      struct list_head *next);
+#endif
 
 /**
  * list_add - add a new entry
diff --git a/include/linux/rculist.h b/include/linux/rculist.h
index b1fd8bf..0535b41 100644
--- a/include/linux/rculist.h
+++ b/include/linux/rculist.h
@@ -45,17 +45,19 @@ static inline void INIT_LIST_HEAD_RCU(struct list_head *list)
  * This is only for internal list manipulation where we know
  * the prev/next entries already!
  */
+#ifndef CONFIG_DEBUG_LIST
 static inline void __list_add_rcu(struct list_head *new,
 		struct list_head *prev, struct list_head *next)
 {
-	if (!__list_add_valid(new, prev, next))
-		return;
-
 	new->next = next;
 	new->prev = prev;
 	rcu_assign_pointer(list_next_rcu(prev), new);
 	next->prev = new;
 }
+#else
+void __list_add_rcu(struct list_head *new,
+		    struct list_head *prev, struct list_head *next);
+#endif
 
 /**
  * list_add_rcu - add a new entry to rcu-protected list
diff --git a/lib/list_debug.c b/lib/list_debug.c
index a34db8d..efb12a9 100644
--- a/lib/list_debug.c
+++ b/lib/list_debug.c
@@ -2,7 +2,8 @@
  * Copyright 2006, Red Hat, Inc., Dave Jones
  * Released under the General Public License (GPL).
  *
- * This file contains the linked list validation for DEBUG_LIST.
+ * This file contains the linked list implementations for
+ * DEBUG_LIST.
  */
 
 #include <linux/export.h>
@@ -12,28 +13,33 @@
 #include <linux/rculist.h>
 
 /*
- * Check that the data structures for the list manipulations are reasonably
- * valid. Failures here indicate memory corruption (and possibly an exploit
- * attempt).
+ * Insert a new entry between two known consecutive entries.
+ *
+ * This is only for internal list manipulation where we know
+ * the prev/next entries already!
  */
 
-bool __list_add_valid(struct list_head *new, struct list_head *prev,
-		      struct list_head *next)
+void __list_add(struct list_head *new,
+			      struct list_head *prev,
+			      struct list_head *next)
 {
-	if (CHECK_DATA_CORRUPTION(next->prev != prev,
-			"list_add corruption. next->prev should be prev (%p), but was %p. (next=%p).\n",
-			prev, next->prev, next) ||
-	    CHECK_DATA_CORRUPTION(prev->next != next,
-			"list_add corruption. prev->next should be next (%p), but was %p. (prev=%p).\n",
-			next, prev->next, prev) ||
-	    CHECK_DATA_CORRUPTION(new == prev || new == next,
-			"list_add double add: new=%p, prev=%p, next=%p.\n",
-			new, prev, next))
-		return false;
-
-	return true;
+	WARN(next->prev != prev,
+		"list_add corruption. next->prev should be "
+		"prev (%p), but was %p. (next=%p).\n",
+		prev, next->prev, next);
+	WARN(prev->next != next,
+		"list_add corruption. prev->next should be "
+		"next (%p), but was %p. (prev=%p).\n",
+		next, prev->next, prev);
+	WARN(new == prev || new == next,
+	     "list_add double add: new=%p, prev=%p, next=%p.\n",
+	     new, prev, next);
+	next->prev = new;
+	new->next = next;
+	new->prev = prev;
+	WRITE_ONCE(prev->next, new);
 }
-EXPORT_SYMBOL(__list_add_valid);
+EXPORT_SYMBOL(__list_add);
 
 bool __list_del_entry_valid(struct list_head *entry)
 {
@@ -60,3 +66,22 @@ bool __list_del_entry_valid(struct list_head *entry)
 
 }
 EXPORT_SYMBOL(__list_del_entry_valid);
+
+/*
+ * RCU variants.
+ */
+void __list_add_rcu(struct list_head *new,
+		    struct list_head *prev, struct list_head *next)
+{
+	WARN(next->prev != prev,
+		"list_add_rcu corruption. next->prev should be prev (%p), but was %p. (next=%p).\n",
+		prev, next->prev, next);
+	WARN(prev->next != next,
+		"list_add_rcu corruption. prev->next should be next (%p), but was %p. (prev=%p).\n",
+		next, prev->next, prev);
+	new->next = next;
+	new->prev = prev;
+	rcu_assign_pointer(list_next_rcu(prev), new);
+	next->prev = new;
+}
+EXPORT_SYMBOL(__list_add_rcu);

Reply via email to