[PATCH v3 18/24] locking/lockdep: Reuse list entries that are no longer in use

2018-12-06 Thread Bart Van Assche
Instead of abandoning elements of list_entries[] that are no longer in
use, make alloc_list_entry() reuse array elements that have been freed.

Cc: Peter Zijlstra 
Cc: Waiman Long 
Cc: Johannes Berg 
Signed-off-by: Bart Van Assche 
---
 kernel/locking/lockdep.c | 27 ---
 1 file changed, 20 insertions(+), 7 deletions(-)

diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c
index ce8abd268727..6f338f852f7e 100644
--- a/kernel/locking/lockdep.c
+++ b/kernel/locking/lockdep.c
@@ -130,6 +130,8 @@ static inline int debug_locks_off_graph_unlock(void)
 
 unsigned long nr_list_entries;
 static struct lock_list list_entries[MAX_LOCKDEP_ENTRIES];
+static DECLARE_BITMAP(list_entries_in_use, MAX_LOCKDEP_ENTRIES);
+static DECLARE_BITMAP(list_entries_being_freed, MAX_LOCKDEP_ENTRIES);
 
 /*
  * All data structures here are protected by the global debug_lock.
@@ -870,7 +872,10 @@ register_lock_class(struct lockdep_map *lock, unsigned int 
subclass, int force)
  */
 static struct lock_list *alloc_list_entry(void)
 {
-   if (nr_list_entries >= MAX_LOCKDEP_ENTRIES) {
+   int idx = find_first_zero_bit(list_entries_in_use,
+ ARRAY_SIZE(list_entries));
+
+   if (idx >= ARRAY_SIZE(list_entries)) {
if (!debug_locks_off_graph_unlock())
return NULL;
 
@@ -878,7 +883,8 @@ static struct lock_list *alloc_list_entry(void)
dump_stack();
return NULL;
}
-   return list_entries + nr_list_entries++;
+   __set_bit(idx, list_entries_in_use);
+   return list_entries + idx;
 }
 
 /*
@@ -983,7 +989,7 @@ static inline void mark_lock_accessed(struct lock_list 
*lock,
unsigned long nr;
 
nr = lock - list_entries;
-   WARN_ON(nr >= nr_list_entries); /* Out-of-bounds, input fail */
+   WARN_ON(nr >= ARRAY_SIZE(list_entries)); /* Out-of-bounds, input fail */
lock->parent = parent;
lock->class->dep_gen_id = lockdep_dependency_gen_id;
 }
@@ -993,7 +999,7 @@ static inline unsigned long lock_accessed(struct lock_list 
*lock)
unsigned long nr;
 
nr = lock - list_entries;
-   WARN_ON(nr >= nr_list_entries); /* Out-of-bounds, input fail */
+   WARN_ON(nr >= ARRAY_SIZE(list_entries)); /* Out-of-bounds, input fail */
return lock->class->dep_gen_id == lockdep_dependency_gen_id;
 }
 
@@ -4252,9 +4258,12 @@ static void zap_class(struct lock_class *class)
 * Remove all dependencies this lock is
 * involved in:
 */
-   for (i = 0, entry = list_entries; i < nr_list_entries; i++, entry++) {
+   for_each_set_bit(i, list_entries_in_use, ARRAY_SIZE(list_entries)) {
+   entry = list_entries + i;
if (entry->class != class && entry->links_to != class)
continue;
+   if (__test_and_set_bit(i, list_entries_being_freed))
+   continue;
links_to = entry->links_to;
WARN_ON_ONCE(entry->class == links_to);
list_del_rcu(>entry);
@@ -4287,8 +4296,9 @@ static inline int within(const void *addr, void *start, 
unsigned long size)
 }
 
 /*
- * Free all lock classes that are on the zapped_classes list. Called as an
- * RCU callback function.
+ * Free all lock classes that are on the zapped_classes list and also all list
+ * entries that have been marked as being freed. Called as an RCU callback
+ * function.
  */
 static void free_zapped_classes(struct callback_head *ch)
 {
@@ -4304,6 +4314,9 @@ static void free_zapped_classes(struct callback_head *ch)
nr_lock_classes--;
}
list_splice_init(_classes, _lock_classes);
+   bitmap_andnot(list_entries_in_use, list_entries_in_use,
+ list_entries_being_freed, ARRAY_SIZE(list_entries));
+   bitmap_clear(list_entries_being_freed, 0, ARRAY_SIZE(list_entries));
if (locked)
graph_unlock();
raw_local_irq_restore(flags);
-- 
2.20.0.rc2.403.gdbc3b29805-goog



[PATCH v3 20/24] locking/lockdep: Introduce __lockdep_free_key_range()

2018-12-06 Thread Bart Van Assche
This patch does not change any functionality but makes the next patch
in this series easier to read.

Cc: Peter Zijlstra 
Cc: Waiman Long 
Cc: Johannes Berg 
Signed-off-by: Bart Van Assche 
---
 kernel/locking/lockdep.c | 37 ++---
 1 file changed, 22 insertions(+), 15 deletions(-)

diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c
index 78f14c151407..8c69516b1283 100644
--- a/kernel/locking/lockdep.c
+++ b/kernel/locking/lockdep.c
@@ -4478,27 +4478,17 @@ static void schedule_free_zapped_classes(void)
 }
 
 /*
- * Used in module.c to remove lock classes from memory that is going to be
- * freed; and possibly re-used by other modules.
- *
- * We will have had one sync_sched() before getting here, so we're guaranteed
- * nobody will look up these exact classes -- they're properly dead but still
- * allocated.
+ * Remove all lock classes from the class hash table and from the
+ * all_lock_classes list whose key or name is in the address range [start,
+ * start + size). Move these lock classes to the zapped_classes list. Must
+ * be called with the graph lock held.
  */
-void lockdep_free_key_range(void *start, unsigned long size)
+static void __lockdep_free_key_range(void *start, unsigned long size)
 {
struct lock_class *class;
struct hlist_head *head;
-   unsigned long flags;
int i;
-   int locked;
-
-   raw_local_irq_save(flags);
-   locked = graph_lock();
 
-   /*
-* Unhash all classes that were created by this module:
-*/
for (i = 0; i < CLASSHASH_SIZE; i++) {
head = classhash_table + i;
hlist_for_each_entry_rcu(class, head, hash_entry) {
@@ -4511,7 +4501,24 @@ void lockdep_free_key_range(void *start, unsigned long 
size)
}
 
schedule_free_zapped_classes();
+}
 
+/*
+ * Used in module.c to remove lock classes from memory that is going to be
+ * freed; and possibly re-used by other modules.
+ *
+ * We will have had one sync_sched() before getting here, so we're guaranteed
+ * nobody will look up these exact classes -- they're properly dead but still
+ * allocated.
+ */
+void lockdep_free_key_range(void *start, unsigned long size)
+{
+   unsigned long flags;
+   int locked;
+
+   raw_local_irq_save(flags);
+   locked = graph_lock();
+   __lockdep_free_key_range(start, size);
if (locked)
graph_unlock();
raw_local_irq_restore(flags);
-- 
2.20.0.rc2.403.gdbc3b29805-goog



[PATCH v3 19/24] locking/lockdep: Check data structure consistency

2018-12-06 Thread Bart Van Assche
Debugging lockdep data structure inconsistencies is challenging. Add
disabled code that verifies data structure consistency at runtime.

Cc: Peter Zijlstra 
Cc: Waiman Long 
Cc: Johannes Berg 
Signed-off-by: Bart Van Assche 
---
 kernel/locking/lockdep.c | 146 +++
 1 file changed, 146 insertions(+)

diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c
index 6f338f852f7e..78f14c151407 100644
--- a/kernel/locking/lockdep.c
+++ b/kernel/locking/lockdep.c
@@ -72,6 +72,8 @@ module_param(lock_stat, int, 0644);
 #define lock_stat 0
 #endif
 
+static bool check_data_structure_consistency;
+
 /*
  * lockdep_lock: protects the lockdep graph, the hashes and the
  *   class/list/hash allocators.
@@ -744,6 +746,148 @@ static bool assign_lock_key(struct lockdep_map *lock)
return true;
 }
 
+/* Check whether element @e occurs in list @h */
+static bool in_list(struct list_head *e, struct list_head *h)
+{
+   struct list_head *f;
+
+   list_for_each(f, h) {
+   if (e == f)
+   return true;
+   }
+
+   return false;
+}
+
+/*
+ * Check whether entry @e occurs in any of the locks_after or locks_before
+ * lists.
+ */
+static bool in_any_class_list(struct list_head *e)
+{
+   struct lock_class *class;
+   int i;
+
+   for (i = 0; i < ARRAY_SIZE(lock_classes); i++) {
+   class = _classes[i];
+   if (in_list(e, >locks_after) ||
+   in_list(e, >locks_before))
+   return true;
+   }
+   return false;
+}
+
+static bool class_lock_list_valid(struct lock_class *c, struct list_head *h)
+{
+   struct lock_list *e;
+
+   list_for_each_entry(e, h, entry) {
+   if (e->links_to != c) {
+   printk(KERN_INFO "class %s: mismatch for lock entry 
%ld; class %s <> %s",
+  c->name ? : "(?)",
+  (unsigned long)(e - list_entries),
+  e->links_to && e->links_to->name ?
+  e->links_to->name : "(?)",
+  e->class && e->class->name ? e->class->name :
+  "(?)");
+   return false;
+   }
+   }
+   return true;
+}
+
+static u16 chain_hlocks[];
+
+static bool check_lock_chain_key(struct lock_chain *chain)
+{
+#ifdef CONFIG_PROVE_LOCKING
+   u64 chain_key = 0;
+   int i;
+
+   for (i = chain->base; i < chain->base + chain->depth; i++)
+   chain_key = iterate_chain_key(chain_key, chain_hlocks[i] + 1);
+   /*
+* The 'unsigned long long' casts avoid that a compiler warning
+* is reported when building tools/lib/lockdep.
+*/
+   if (chain->chain_key != chain_key)
+   printk(KERN_INFO "chain %lld: key %#llx <> %#llx\n",
+  (unsigned long long)(chain - lock_chains),
+  (unsigned long long)chain->chain_key,
+  (unsigned long long)chain_key);
+   return chain->chain_key == chain_key;
+#else
+   return true;
+#endif
+}
+
+static bool check_data_structures(void)
+{
+   struct lock_class *class;
+   struct lock_chain *chain;
+   struct hlist_head *head;
+   struct lock_list *e;
+   int i;
+
+   /*
+* Check whether all list entries that are in use occur in a class
+* lock list.
+*/
+   for_each_set_bit(i, list_entries_in_use, ARRAY_SIZE(list_entries)) {
+   if (test_bit(i, list_entries_being_freed))
+   continue;
+   e = list_entries + i;
+   if (!in_any_class_list(>entry)) {
+   printk(KERN_INFO "list entry %d is not in any class 
list; class %s <> %s\n",
+  (unsigned int)(e - list_entries),
+  e->class->name ? : "(?)",
+  e->links_to->name ? : "(?)");
+   return false;
+   }
+   }
+
+   /*
+* Check whether all list entries that are not in use do not occur in
+* a class lock list.
+*/
+   for_each_clear_bit(i, list_entries_in_use, ARRAY_SIZE(list_entries)) {
+   e = list_entries + i;
+   if (WARN_ON_ONCE(test_bit(i, list_entries_being_freed)))
+   return false;
+   if (in_any_class_list(>entry)) {
+   printk(KERN_INFO "list entry %d occurs in a class list; 
class %s <> %s\n",
+  (unsigned int)(e - list_entries),
+  e->class && e->class->name ? e->class->name :
+  "(?)",
+  e->links_to && e->links_to->name ?
+  e->links_to->name : "(?)");
+   return false;
+   }
+   }
+
+   

Re: [PATCH v2] clocksource/drivers/fttmr010: fix invalid interrupt register access

2018-12-06 Thread Tao Ren
On 11/5/18, 11:00 AM, "Tao Ren"  wrote:

> On 11/5/18, 10:51 AM, "Daniel Lezcano"  wrote:
>> Oh right, sorry. Should it go to stable also ? Is there a Fixes tag it can 
>> apply ?
>>
> Personally I don't think it needs to go to stable, because I don't see any 
> functional failures caused by this invalid register access.

Hi Daniel,

Not sure if I missed any emails from you, but looks like the patch is not 
included in your tree? Are we planning to include the patch in 4.21 merge 
window?

Thanks,

Tao Ren



[PATCH v3 18/24] locking/lockdep: Reuse list entries that are no longer in use

2018-12-06 Thread Bart Van Assche
Instead of abandoning elements of list_entries[] that are no longer in
use, make alloc_list_entry() reuse array elements that have been freed.

Cc: Peter Zijlstra 
Cc: Waiman Long 
Cc: Johannes Berg 
Signed-off-by: Bart Van Assche 
---
 kernel/locking/lockdep.c | 27 ---
 1 file changed, 20 insertions(+), 7 deletions(-)

diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c
index ce8abd268727..6f338f852f7e 100644
--- a/kernel/locking/lockdep.c
+++ b/kernel/locking/lockdep.c
@@ -130,6 +130,8 @@ static inline int debug_locks_off_graph_unlock(void)
 
 unsigned long nr_list_entries;
 static struct lock_list list_entries[MAX_LOCKDEP_ENTRIES];
+static DECLARE_BITMAP(list_entries_in_use, MAX_LOCKDEP_ENTRIES);
+static DECLARE_BITMAP(list_entries_being_freed, MAX_LOCKDEP_ENTRIES);
 
 /*
  * All data structures here are protected by the global debug_lock.
@@ -870,7 +872,10 @@ register_lock_class(struct lockdep_map *lock, unsigned int 
subclass, int force)
  */
 static struct lock_list *alloc_list_entry(void)
 {
-   if (nr_list_entries >= MAX_LOCKDEP_ENTRIES) {
+   int idx = find_first_zero_bit(list_entries_in_use,
+ ARRAY_SIZE(list_entries));
+
+   if (idx >= ARRAY_SIZE(list_entries)) {
if (!debug_locks_off_graph_unlock())
return NULL;
 
@@ -878,7 +883,8 @@ static struct lock_list *alloc_list_entry(void)
dump_stack();
return NULL;
}
-   return list_entries + nr_list_entries++;
+   __set_bit(idx, list_entries_in_use);
+   return list_entries + idx;
 }
 
 /*
@@ -983,7 +989,7 @@ static inline void mark_lock_accessed(struct lock_list 
*lock,
unsigned long nr;
 
nr = lock - list_entries;
-   WARN_ON(nr >= nr_list_entries); /* Out-of-bounds, input fail */
+   WARN_ON(nr >= ARRAY_SIZE(list_entries)); /* Out-of-bounds, input fail */
lock->parent = parent;
lock->class->dep_gen_id = lockdep_dependency_gen_id;
 }
@@ -993,7 +999,7 @@ static inline unsigned long lock_accessed(struct lock_list 
*lock)
unsigned long nr;
 
nr = lock - list_entries;
-   WARN_ON(nr >= nr_list_entries); /* Out-of-bounds, input fail */
+   WARN_ON(nr >= ARRAY_SIZE(list_entries)); /* Out-of-bounds, input fail */
return lock->class->dep_gen_id == lockdep_dependency_gen_id;
 }
 
@@ -4252,9 +4258,12 @@ static void zap_class(struct lock_class *class)
 * Remove all dependencies this lock is
 * involved in:
 */
-   for (i = 0, entry = list_entries; i < nr_list_entries; i++, entry++) {
+   for_each_set_bit(i, list_entries_in_use, ARRAY_SIZE(list_entries)) {
+   entry = list_entries + i;
if (entry->class != class && entry->links_to != class)
continue;
+   if (__test_and_set_bit(i, list_entries_being_freed))
+   continue;
links_to = entry->links_to;
WARN_ON_ONCE(entry->class == links_to);
list_del_rcu(>entry);
@@ -4287,8 +4296,9 @@ static inline int within(const void *addr, void *start, 
unsigned long size)
 }
 
 /*
- * Free all lock classes that are on the zapped_classes list. Called as an
- * RCU callback function.
+ * Free all lock classes that are on the zapped_classes list and also all list
+ * entries that have been marked as being freed. Called as an RCU callback
+ * function.
  */
 static void free_zapped_classes(struct callback_head *ch)
 {
@@ -4304,6 +4314,9 @@ static void free_zapped_classes(struct callback_head *ch)
nr_lock_classes--;
}
list_splice_init(_classes, _lock_classes);
+   bitmap_andnot(list_entries_in_use, list_entries_in_use,
+ list_entries_being_freed, ARRAY_SIZE(list_entries));
+   bitmap_clear(list_entries_being_freed, 0, ARRAY_SIZE(list_entries));
if (locked)
graph_unlock();
raw_local_irq_restore(flags);
-- 
2.20.0.rc2.403.gdbc3b29805-goog



[PATCH v3 24/24] lockdep tests: Test dynamic key registration

2018-12-06 Thread Bart Van Assche
Make sure that the lockdep_register_key() and lockdep_unregister_key()
code is tested when running the lockdep tests.

Cc: Peter Zijlstra 
Cc: Waiman Long 
Cc: Johannes Berg 
Signed-off-by: Bart Van Assche 
---
 tools/lib/lockdep/include/liblockdep/common.h |  2 ++
 tools/lib/lockdep/include/liblockdep/mutex.h  | 11 ++-
 tools/lib/lockdep/tests/ABBA.c|  9 +
 3 files changed, 17 insertions(+), 5 deletions(-)

diff --git a/tools/lib/lockdep/include/liblockdep/common.h 
b/tools/lib/lockdep/include/liblockdep/common.h
index d640a9761f09..a81d91d4fc78 100644
--- a/tools/lib/lockdep/include/liblockdep/common.h
+++ b/tools/lib/lockdep/include/liblockdep/common.h
@@ -45,6 +45,8 @@ void lock_acquire(struct lockdep_map *lock, unsigned int 
subclass,
 void lock_release(struct lockdep_map *lock, int nested,
unsigned long ip);
 void lockdep_reset_lock(struct lockdep_map *lock);
+void lockdep_register_key(struct lock_class_key *key);
+void lockdep_unregister_key(struct lock_class_key *key);
 extern void debug_check_no_locks_freed(const void *from, unsigned long len);
 
 #define STATIC_LOCKDEP_MAP_INIT(_name, _key) \
diff --git a/tools/lib/lockdep/include/liblockdep/mutex.h 
b/tools/lib/lockdep/include/liblockdep/mutex.h
index 2073d4e1f2f0..783dd0df06f9 100644
--- a/tools/lib/lockdep/include/liblockdep/mutex.h
+++ b/tools/lib/lockdep/include/liblockdep/mutex.h
@@ -7,6 +7,7 @@
 
 struct liblockdep_pthread_mutex {
pthread_mutex_t mutex;
+   struct lock_class_key key;
struct lockdep_map dep_map;
 };
 
@@ -27,11 +28,10 @@ static inline int __mutex_init(liblockdep_pthread_mutex_t 
*lock,
return pthread_mutex_init(>mutex, __mutexattr);
 }
 
-#define liblockdep_pthread_mutex_init(mutex, mutexattr)\
-({ \
-   static struct lock_class_key __key; \
-   \
-   __mutex_init((mutex), #mutex, &__key, (mutexattr)); \
+#define liblockdep_pthread_mutex_init(mutex, mutexattr)
\
+({ \
+   lockdep_register_key(&(mutex)->key);\
+   __mutex_init((mutex), #mutex, &(mutex)->key, (mutexattr));  \
 })
 
 static inline int liblockdep_pthread_mutex_lock(liblockdep_pthread_mutex_t 
*lock)
@@ -55,6 +55,7 @@ static inline int 
liblockdep_pthread_mutex_trylock(liblockdep_pthread_mutex_t *l
 static inline int liblockdep_pthread_mutex_destroy(liblockdep_pthread_mutex_t 
*lock)
 {
lockdep_reset_lock(>dep_map);
+   lockdep_unregister_key(>key);
return pthread_mutex_destroy(>mutex);
 }
 
diff --git a/tools/lib/lockdep/tests/ABBA.c b/tools/lib/lockdep/tests/ABBA.c
index 623313f54720..543789bc3e37 100644
--- a/tools/lib/lockdep/tests/ABBA.c
+++ b/tools/lib/lockdep/tests/ABBA.c
@@ -14,4 +14,13 @@ void main(void)
 
pthread_mutex_destroy();
pthread_mutex_destroy();
+
+   pthread_mutex_init(, NULL);
+   pthread_mutex_init(, NULL);
+
+   LOCK_UNLOCK_2(a, b);
+   LOCK_UNLOCK_2(b, a);
+
+   pthread_mutex_destroy();
+   pthread_mutex_destroy();
 }
-- 
2.20.0.rc2.403.gdbc3b29805-goog



[PATCH v3 23/24] kernel/workqueue: Use dynamic lockdep keys for workqueues

2018-12-06 Thread Bart Van Assche
Commit 87915adc3f0a ("workqueue: re-add lockdep dependencies for flushing")
improved deadlock checking in the workqueue implementation. Unfortunately
that patch also introduced a few false positive lockdep complaints. This
patch suppresses these false positives by allocating the workqueue mutex
lockdep key dynamically. An example of a false positive lockdep complaint
suppressed by this report can be found below. The root cause of the
lockdep complaint shown below is that the direct I/O code can call
alloc_workqueue() from inside a work item created by another
alloc_workqueue() call and that both workqueues share the same lockdep
key. This patch avoids that that lockdep complaint is triggered by
allocating the work queue lockdep keys dynamically. In other words, this
patch guarantees that a unique lockdep key is associated with each work
queue mutex.

==
WARNING: possible circular locking dependency detected
4.19.0-dbg+ #1 Not tainted
--
fio/4129 is trying to acquire lock:
a01cfe1a ((wq_completion)"dio/%s"sb->s_id){+.+.}, at: 
flush_workqueue+0xd0/0x970

but task is already holding lock:
a0acecf9 (>s_type->i_mutex_key#14){+.+.}, at: 
ext4_file_write_iter+0x154/0x710

which lock already depends on the new lock.

the existing dependency chain (in reverse order) is:

-> #2 (>s_type->i_mutex_key#14){+.+.}:
   down_write+0x3d/0x80
   __generic_file_fsync+0x77/0xf0
   ext4_sync_file+0x3c9/0x780
   vfs_fsync_range+0x66/0x100
   dio_complete+0x2f5/0x360
   dio_aio_complete_work+0x1c/0x20
   process_one_work+0x481/0x9f0
   worker_thread+0x63/0x5a0
   kthread+0x1cf/0x1f0
   ret_from_fork+0x24/0x30

-> #1 ((work_completion)(>complete_work)){+.+.}:
   process_one_work+0x447/0x9f0
   worker_thread+0x63/0x5a0
   kthread+0x1cf/0x1f0
   ret_from_fork+0x24/0x30

-> #0 ((wq_completion)"dio/%s"sb->s_id){+.+.}:
   lock_acquire+0xc5/0x200
   flush_workqueue+0xf3/0x970
   drain_workqueue+0xec/0x220
   destroy_workqueue+0x23/0x350
   sb_init_dio_done_wq+0x6a/0x80
   do_blockdev_direct_IO+0x1f33/0x4be0
   __blockdev_direct_IO+0x79/0x86
   ext4_direct_IO+0x5df/0xbb0
   generic_file_direct_write+0x119/0x220
   __generic_file_write_iter+0x131/0x2d0
   ext4_file_write_iter+0x3fa/0x710
   aio_write+0x235/0x330
   io_submit_one+0x510/0xeb0
   __x64_sys_io_submit+0x122/0x340
   do_syscall_64+0x71/0x220
   entry_SYSCALL_64_after_hwframe+0x49/0xbe

other info that might help us debug this:

Chain exists of:
  (wq_completion)"dio/%s"sb->s_id --> (work_completion)(>complete_work) 
--> >s_type->i_mutex_key#14

 Possible unsafe locking scenario:

   CPU0CPU1
   
  lock(>s_type->i_mutex_key#14);
   lock((work_completion)(>complete_work));
   lock(>s_type->i_mutex_key#14);
  lock((wq_completion)"dio/%s"sb->s_id);

 *** DEADLOCK ***

1 lock held by fio/4129:
 #0: a0acecf9 (>s_type->i_mutex_key#14){+.+.}, at: 
ext4_file_write_iter+0x154/0x710

stack backtrace:
CPU: 3 PID: 4129 Comm: fio Not tainted 4.19.0-dbg+ #1
Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.10.2-1 04/01/2014
Call Trace:
 dump_stack+0x86/0xc5
 print_circular_bug.isra.32+0x20a/0x218
 __lock_acquire+0x1c68/0x1cf0
 lock_acquire+0xc5/0x200
 flush_workqueue+0xf3/0x970
 drain_workqueue+0xec/0x220
 destroy_workqueue+0x23/0x350
 sb_init_dio_done_wq+0x6a/0x80
 do_blockdev_direct_IO+0x1f33/0x4be0
 __blockdev_direct_IO+0x79/0x86
 ext4_direct_IO+0x5df/0xbb0
 generic_file_direct_write+0x119/0x220
 __generic_file_write_iter+0x131/0x2d0
 ext4_file_write_iter+0x3fa/0x710
 aio_write+0x235/0x330
 io_submit_one+0x510/0xeb0
 __x64_sys_io_submit+0x122/0x340
 do_syscall_64+0x71/0x220
 entry_SYSCALL_64_after_hwframe+0x49/0xbe

Cc: Ingo Molnar 
Cc: Peter Zijlstra 
Cc: Will Deacon 
Cc: Tejun Heo 
Cc: Waiman Long 
Cc: Johannes Berg 
Signed-off-by: Bart Van Assche 
---
 include/linux/workqueue.h | 28 +++---
 kernel/workqueue.c| 60 +--
 2 files changed, 55 insertions(+), 33 deletions(-)

diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h
index 60d673e15632..d9a1a480e920 100644
--- a/include/linux/workqueue.h
+++ b/include/linux/workqueue.h
@@ -390,43 +390,23 @@ extern struct workqueue_struct *system_freezable_wq;
 extern struct workqueue_struct *system_power_efficient_wq;
 extern struct workqueue_struct *system_freezable_power_efficient_wq;
 
-extern struct workqueue_struct *
-__alloc_workqueue_key(const char *fmt, unsigned int flags, int max_active,
-   struct lock_class_key *key, const char *lock_name, ...) __printf(1, 6);
-
 /**
  * alloc_workqueue - allocate a workqueue
  * @fmt: printf format for the name of the workqueue
  * @flags: WQ_* flags
  * 

[PATCH v3 17/24] locking/lockdep: Free lock classes that are no longer in use

2018-12-06 Thread Bart Van Assche
Instead of leaving lock classes that are no longer in use in the
lock_classes array, reuse entries from that array that are no longer
in use. Maintain a linked list of free lock classes with list head
'free_lock_class'. Initialize that list from inside register_lock_class()
instead of from inside lockdep_init() because register_lock_class() can
be called before lockdep_init() has been called. Only add freed lock
classes to the free_lock_classes list after a grace period to avoid that
a lock_classes[] element would be reused while an RCU reader is
accessing it.

Cc: Peter Zijlstra 
Cc: Waiman Long 
Cc: Johannes Berg 
Signed-off-by: Bart Van Assche 
---
 include/linux/lockdep.h  |   9 +-
 kernel/locking/lockdep.c | 242 ---
 2 files changed, 208 insertions(+), 43 deletions(-)

diff --git a/include/linux/lockdep.h b/include/linux/lockdep.h
index 9421f028c26c..72b4d5bb409f 100644
--- a/include/linux/lockdep.h
+++ b/include/linux/lockdep.h
@@ -63,7 +63,8 @@ extern struct lock_class_key __lockdep_no_validate__;
 #define LOCKSTAT_POINTS4
 
 /*
- * The lock-class itself:
+ * The lock-class itself. The order of the structure members matters.
+ * reinit_class() zeroes the key member and all subsequent members.
  */
 struct lock_class {
/*
@@ -72,7 +73,9 @@ struct lock_class {
struct hlist_node   hash_entry;
 
/*
-* global list of all lock-classes:
+* Entry in all_lock_classes when in use. Entry in free_lock_classes
+* when not in use. Instances that are being freed are on the
+* zapped_classes list.
 */
struct list_headlock_entry;
 
@@ -106,7 +109,7 @@ struct lock_class {
unsigned long   contention_point[LOCKSTAT_POINTS];
unsigned long   contending_point[LOCKSTAT_POINTS];
 #endif
-};
+} __no_randomize_layout;
 
 #ifdef CONFIG_LOCK_STAT
 struct lock_time {
diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c
index 92bdb187987f..ce8abd268727 100644
--- a/kernel/locking/lockdep.c
+++ b/kernel/locking/lockdep.c
@@ -134,8 +134,8 @@ static struct lock_list list_entries[MAX_LOCKDEP_ENTRIES];
 /*
  * All data structures here are protected by the global debug_lock.
  *
- * Mutex key structs only get allocated, once during bootup, and never
- * get freed - this significantly simplifies the debugging code.
+ * nr_lock_classes is the number of elements of lock_classes[] that is
+ * in use.
  */
 unsigned long nr_lock_classes;
 #ifndef CONFIG_DEBUG_LOCKDEP
@@ -277,11 +277,18 @@ static inline void lock_release_holdtime(struct held_lock 
*hlock)
 #endif
 
 /*
- * We keep a global list of all lock classes. The list only grows,
- * never shrinks. The list is only accessed with the lockdep
- * spinlock lock held.
+ * We keep a global list of all lock classes. The list is only accessed with
+ * the lockdep spinlock lock held. The zapped_classes list contains lock
+ * classes that are about to be freed but that may still be accessed by an RCU
+ * reader. free_lock_classes is a list with free elements. These elements are
+ * linked together by the lock_entry member in struct lock_class.
  */
 LIST_HEAD(all_lock_classes);
+static LIST_HEAD(zapped_classes);
+static LIST_HEAD(free_lock_classes);
+static bool initialization_happened;
+static bool rcu_callback_scheduled;
+static struct rcu_head free_zapped_classes_rcu_head;
 
 /*
  * The lockdep classes are in a hash-table as well, for fast lookup:
@@ -735,6 +742,21 @@ static bool assign_lock_key(struct lockdep_map *lock)
return true;
 }
 
+/*
+ * Initialize the lock_classes[] array elements and also the free_lock_classes
+ * list.
+ */
+static void init_data_structures(void)
+{
+   int i;
+
+   for (i = 0; i < ARRAY_SIZE(lock_classes); i++) {
+   list_add_tail(_classes[i].lock_entry, _lock_classes);
+   INIT_LIST_HEAD(_classes[i].locks_after);
+   INIT_LIST_HEAD(_classes[i].locks_before);
+   }
+}
+
 /*
  * Register a lock's class in the hash-table, if the class is not present
  * yet. Otherwise we look it up. We cache the result in the lock object
@@ -775,11 +797,14 @@ register_lock_class(struct lockdep_map *lock, unsigned 
int subclass, int force)
goto out_unlock_set;
}
 
-   /*
-* Allocate a new key from the static array, and add it to
-* the hash:
-*/
-   if (nr_lock_classes >= MAX_LOCKDEP_KEYS) {
+   /* Allocate a new lock class and add it to the hash. */
+   if (unlikely(!initialization_happened)) {
+   initialization_happened = true;
+   init_data_structures();
+   }
+   class = list_first_entry_or_null(_lock_classes, typeof(*class),
+lock_entry);
+   if (!class) {
if (!debug_locks_off_graph_unlock()) {
return NULL;
}
@@ 

[PATCH v3 03/24] lockdep tests: Improve testing accuracy

2018-12-06 Thread Bart Van Assche
Instead of checking whether the tests produced any output, check the
output itself. This patch avoids that e.g. debug output causes the
message "PASSED!" to be reported for failed tests.

Cc: Peter Zijlstra 
Cc: Waiman Long 
Cc: Johannes Berg 
Signed-off-by: Bart Van Assche 
---
 tools/lib/lockdep/run_tests.sh| 5 +++--
 tools/lib/lockdep/tests/AA.sh | 2 ++
 tools/lib/lockdep/tests/ABA.sh| 2 ++
 tools/lib/lockdep/tests/ABBA.sh   | 2 ++
 tools/lib/lockdep/tests/ABBA_2threads.sh  | 2 ++
 tools/lib/lockdep/tests/ABBCCA.sh | 2 ++
 tools/lib/lockdep/tests/ABBCCDDA.sh   | 2 ++
 tools/lib/lockdep/tests/ABCABC.sh | 2 ++
 tools/lib/lockdep/tests/ABCDBCDA.sh   | 2 ++
 tools/lib/lockdep/tests/ABCDBDDA.sh   | 2 ++
 tools/lib/lockdep/tests/WW.sh | 2 ++
 tools/lib/lockdep/tests/unlock_balance.sh | 2 ++
 12 files changed, 25 insertions(+), 2 deletions(-)
 create mode 100755 tools/lib/lockdep/tests/AA.sh
 create mode 100755 tools/lib/lockdep/tests/ABA.sh
 create mode 100755 tools/lib/lockdep/tests/ABBA.sh
 create mode 100755 tools/lib/lockdep/tests/ABBA_2threads.sh
 create mode 100755 tools/lib/lockdep/tests/ABBCCA.sh
 create mode 100755 tools/lib/lockdep/tests/ABBCCDDA.sh
 create mode 100755 tools/lib/lockdep/tests/ABCABC.sh
 create mode 100755 tools/lib/lockdep/tests/ABCDBCDA.sh
 create mode 100755 tools/lib/lockdep/tests/ABCDBDDA.sh
 create mode 100755 tools/lib/lockdep/tests/WW.sh
 create mode 100755 tools/lib/lockdep/tests/unlock_balance.sh

diff --git a/tools/lib/lockdep/run_tests.sh b/tools/lib/lockdep/run_tests.sh
index 253719ee6377..bc36178329a8 100755
--- a/tools/lib/lockdep/run_tests.sh
+++ b/tools/lib/lockdep/run_tests.sh
@@ -11,7 +11,7 @@ find tests -name '*.c' | sort | while read -r i; do
testname=$(basename "$i" .c)
echo -ne "$testname... "
if gcc -o "tests/$testname" -pthread "$i" liblockdep.a -Iinclude 
-D__USE_LIBLOCKDEP &&
-   [ "$(timeout 1 "./tests/$testname" 2>&1 | wc -l)" -gt 0 ]; then
+   timeout 1 "tests/$testname" 2>&1 | "tests/${testname}.sh"; then
echo "PASSED!"
else
echo "FAILED!"
@@ -23,7 +23,8 @@ find tests -name '*.c' | sort | while read -r i; do
testname=$(basename "$i" .c)
echo -ne "(PRELOAD) $testname... "
if gcc -o "tests/$testname" -pthread -Iinclude "$i" &&
-   [ "$(timeout 1 ./lockdep "./tests/$testname" 2>&1 | wc -l)" -gt 
0 ]; then
+   timeout 1 ./lockdep "tests/$testname" 2>&1 |
+   "tests/${testname}.sh"; then
echo "PASSED!"
else
echo "FAILED!"
diff --git a/tools/lib/lockdep/tests/AA.sh b/tools/lib/lockdep/tests/AA.sh
new file mode 100755
index ..f39b32865074
--- /dev/null
+++ b/tools/lib/lockdep/tests/AA.sh
@@ -0,0 +1,2 @@
+#!/bin/bash
+grep -q 'WARNING: possible recursive locking detected'
diff --git a/tools/lib/lockdep/tests/ABA.sh b/tools/lib/lockdep/tests/ABA.sh
new file mode 100755
index ..f39b32865074
--- /dev/null
+++ b/tools/lib/lockdep/tests/ABA.sh
@@ -0,0 +1,2 @@
+#!/bin/bash
+grep -q 'WARNING: possible recursive locking detected'
diff --git a/tools/lib/lockdep/tests/ABBA.sh b/tools/lib/lockdep/tests/ABBA.sh
new file mode 100755
index ..fc31c607a5a8
--- /dev/null
+++ b/tools/lib/lockdep/tests/ABBA.sh
@@ -0,0 +1,2 @@
+#!/bin/bash
+grep -q 'WARNING: possible circular locking dependency detected'
diff --git a/tools/lib/lockdep/tests/ABBA_2threads.sh 
b/tools/lib/lockdep/tests/ABBA_2threads.sh
new file mode 100755
index ..fc31c607a5a8
--- /dev/null
+++ b/tools/lib/lockdep/tests/ABBA_2threads.sh
@@ -0,0 +1,2 @@
+#!/bin/bash
+grep -q 'WARNING: possible circular locking dependency detected'
diff --git a/tools/lib/lockdep/tests/ABBCCA.sh 
b/tools/lib/lockdep/tests/ABBCCA.sh
new file mode 100755
index ..fc31c607a5a8
--- /dev/null
+++ b/tools/lib/lockdep/tests/ABBCCA.sh
@@ -0,0 +1,2 @@
+#!/bin/bash
+grep -q 'WARNING: possible circular locking dependency detected'
diff --git a/tools/lib/lockdep/tests/ABBCCDDA.sh 
b/tools/lib/lockdep/tests/ABBCCDDA.sh
new file mode 100755
index ..fc31c607a5a8
--- /dev/null
+++ b/tools/lib/lockdep/tests/ABBCCDDA.sh
@@ -0,0 +1,2 @@
+#!/bin/bash
+grep -q 'WARNING: possible circular locking dependency detected'
diff --git a/tools/lib/lockdep/tests/ABCABC.sh 
b/tools/lib/lockdep/tests/ABCABC.sh
new file mode 100755
index ..fc31c607a5a8
--- /dev/null
+++ b/tools/lib/lockdep/tests/ABCABC.sh
@@ -0,0 +1,2 @@
+#!/bin/bash
+grep -q 'WARNING: possible circular locking dependency detected'
diff --git a/tools/lib/lockdep/tests/ABCDBCDA.sh 
b/tools/lib/lockdep/tests/ABCDBCDA.sh
new file mode 100755
index ..fc31c607a5a8
--- /dev/null
+++ b/tools/lib/lockdep/tests/ABCDBCDA.sh
@@ -0,0 +1,2 @@
+#!/bin/bash
+grep -q 'WARNING: possible circular locking dependency detected'
diff --git 

[PATCH v3 15/24] locking/lockdep: Reorder struct lock_class members

2018-12-06 Thread Bart Van Assche
This patch does not change any functionality but makes the patch that
frees lock classes that are no longer in use easier to read.

Cc: Peter Zijlstra 
Cc: Waiman Long 
Cc: Johannes Berg 
Signed-off-by: Bart Van Assche 
---
 include/linux/lockdep.h | 14 +++---
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/include/linux/lockdep.h b/include/linux/lockdep.h
index 6d0f8d1c2bee..9421f028c26c 100644
--- a/include/linux/lockdep.h
+++ b/include/linux/lockdep.h
@@ -76,6 +76,13 @@ struct lock_class {
 */
struct list_headlock_entry;
 
+   /*
+* These fields represent a directed graph of lock dependencies,
+* to every node we attach a list of "forward" and a list of
+* "backward" graph nodes.
+*/
+   struct list_headlocks_after, locks_before;
+
struct lockdep_subclass_key *key;
unsigned intsubclass;
unsigned intdep_gen_id;
@@ -86,13 +93,6 @@ struct lock_class {
unsigned long   usage_mask;
struct stack_trace  usage_traces[XXX_LOCK_USAGE_STATES];
 
-   /*
-* These fields represent a directed graph of lock dependencies,
-* to every node we attach a list of "forward" and a list of
-* "backward" graph nodes.
-*/
-   struct list_headlocks_after, locks_before;
-
/*
 * Generation counter, when doing certain classes of graph walking,
 * to ensure that we check one node only once:
-- 
2.20.0.rc2.403.gdbc3b29805-goog



[PATCH v3 16/24] locking/lockdep: Retain the class key and name while freeing a lock class

2018-12-06 Thread Bart Van Assche
The next patch in this series uses the class name in code that
detects calls to lock_acquire() while a class key is being freed. Hence
retain the class name for lock classes that are being freed.

Cc: Peter Zijlstra 
Cc: Waiman Long 
Cc: Johannes Berg 
Signed-off-by: Bart Van Assche 
---
 kernel/locking/lockdep.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c
index ecd92969674c..92bdb187987f 100644
--- a/kernel/locking/lockdep.c
+++ b/kernel/locking/lockdep.c
@@ -4147,10 +4147,8 @@ static void zap_class(struct lock_class *class)
 * Unhash the class and remove it from the all_lock_classes list:
 */
hlist_del_rcu(>hash_entry);
+   class->hash_entry.pprev = NULL;
list_del(>lock_entry);
-
-   RCU_INIT_POINTER(class->key, NULL);
-   RCU_INIT_POINTER(class->name, NULL);
 }
 
 static inline int within(const void *addr, void *start, unsigned long size)
-- 
2.20.0.rc2.403.gdbc3b29805-goog



[PATCH v3 05/24] liblockdep: Rename "trywlock" into "trywrlock"

2018-12-06 Thread Bart Van Assche
This patch avoids that the following compiler warning is reported while
compiling the lockdep unit tests:

include/liblockdep/rwlock.h: In function 'liblockdep_pthread_rwlock_trywlock':
include/liblockdep/rwlock.h:66:9: warning: implicit declaration of function 
'pthread_rwlock_trywlock'; did you mean 'pthread_rwlock_trywrlock'? 
[-Wimplicit-function-declaration]
  return pthread_rwlock_trywlock(>rwlock) == 0 ? 1 : 0;
 ^~~
 pthread_rwlock_trywrlock

Fixes: 5a52c9b480e0 ("liblockdep: Add public headers for pthread_rwlock_t 
implementation")
Cc: Sasha Levin 
Cc: Peter Zijlstra 
Cc: Waiman Long 
Cc: Johannes Berg 
Signed-off-by: Bart Van Assche 
---
 tools/lib/lockdep/include/liblockdep/rwlock.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/tools/lib/lockdep/include/liblockdep/rwlock.h 
b/tools/lib/lockdep/include/liblockdep/rwlock.h
index a96c3bf0fef1..365762e3a1ea 100644
--- a/tools/lib/lockdep/include/liblockdep/rwlock.h
+++ b/tools/lib/lockdep/include/liblockdep/rwlock.h
@@ -60,10 +60,10 @@ static inline int 
liblockdep_pthread_rwlock_tryrdlock(liblockdep_pthread_rwlock_
return pthread_rwlock_tryrdlock(>rwlock) == 0 ? 1 : 0;
 }
 
-static inline int 
liblockdep_pthread_rwlock_trywlock(liblockdep_pthread_rwlock_t *lock)
+static inline int 
liblockdep_pthread_rwlock_trywrlock(liblockdep_pthread_rwlock_t *lock)
 {
lock_acquire(>dep_map, 0, 1, 0, 1, NULL, (unsigned long)_RET_IP_);
-   return pthread_rwlock_trywlock(>rwlock) == 0 ? 1 : 0;
+   return pthread_rwlock_trywrlock(>rwlock) == 0 ? 1 : 0;
 }
 
 static inline int liblockdep_rwlock_destroy(liblockdep_pthread_rwlock_t *lock)
@@ -79,7 +79,7 @@ static inline int 
liblockdep_rwlock_destroy(liblockdep_pthread_rwlock_t *lock)
 #define pthread_rwlock_unlock  liblockdep_pthread_rwlock_unlock
 #define pthread_rwlock_wrlock  liblockdep_pthread_rwlock_wrlock
 #define pthread_rwlock_tryrdlock   liblockdep_pthread_rwlock_tryrdlock
-#define pthread_rwlock_trywlock
liblockdep_pthread_rwlock_trywlock
+#define pthread_rwlock_trywrlock   liblockdep_pthread_rwlock_trywrlock
 #define pthread_rwlock_destroy liblockdep_rwlock_destroy
 
 #endif
-- 
2.20.0.rc2.403.gdbc3b29805-goog



[PATCH v3 16/24] locking/lockdep: Retain the class key and name while freeing a lock class

2018-12-06 Thread Bart Van Assche
The next patch in this series uses the class name in code that
detects calls to lock_acquire() while a class key is being freed. Hence
retain the class name for lock classes that are being freed.

Cc: Peter Zijlstra 
Cc: Waiman Long 
Cc: Johannes Berg 
Signed-off-by: Bart Van Assche 
---
 kernel/locking/lockdep.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c
index ecd92969674c..92bdb187987f 100644
--- a/kernel/locking/lockdep.c
+++ b/kernel/locking/lockdep.c
@@ -4147,10 +4147,8 @@ static void zap_class(struct lock_class *class)
 * Unhash the class and remove it from the all_lock_classes list:
 */
hlist_del_rcu(>hash_entry);
+   class->hash_entry.pprev = NULL;
list_del(>lock_entry);
-
-   RCU_INIT_POINTER(class->key, NULL);
-   RCU_INIT_POINTER(class->name, NULL);
 }
 
 static inline int within(const void *addr, void *start, unsigned long size)
-- 
2.20.0.rc2.403.gdbc3b29805-goog



[PATCH v3 05/24] liblockdep: Rename "trywlock" into "trywrlock"

2018-12-06 Thread Bart Van Assche
This patch avoids that the following compiler warning is reported while
compiling the lockdep unit tests:

include/liblockdep/rwlock.h: In function 'liblockdep_pthread_rwlock_trywlock':
include/liblockdep/rwlock.h:66:9: warning: implicit declaration of function 
'pthread_rwlock_trywlock'; did you mean 'pthread_rwlock_trywrlock'? 
[-Wimplicit-function-declaration]
  return pthread_rwlock_trywlock(>rwlock) == 0 ? 1 : 0;
 ^~~
 pthread_rwlock_trywrlock

Fixes: 5a52c9b480e0 ("liblockdep: Add public headers for pthread_rwlock_t 
implementation")
Cc: Sasha Levin 
Cc: Peter Zijlstra 
Cc: Waiman Long 
Cc: Johannes Berg 
Signed-off-by: Bart Van Assche 
---
 tools/lib/lockdep/include/liblockdep/rwlock.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/tools/lib/lockdep/include/liblockdep/rwlock.h 
b/tools/lib/lockdep/include/liblockdep/rwlock.h
index a96c3bf0fef1..365762e3a1ea 100644
--- a/tools/lib/lockdep/include/liblockdep/rwlock.h
+++ b/tools/lib/lockdep/include/liblockdep/rwlock.h
@@ -60,10 +60,10 @@ static inline int 
liblockdep_pthread_rwlock_tryrdlock(liblockdep_pthread_rwlock_
return pthread_rwlock_tryrdlock(>rwlock) == 0 ? 1 : 0;
 }
 
-static inline int 
liblockdep_pthread_rwlock_trywlock(liblockdep_pthread_rwlock_t *lock)
+static inline int 
liblockdep_pthread_rwlock_trywrlock(liblockdep_pthread_rwlock_t *lock)
 {
lock_acquire(>dep_map, 0, 1, 0, 1, NULL, (unsigned long)_RET_IP_);
-   return pthread_rwlock_trywlock(>rwlock) == 0 ? 1 : 0;
+   return pthread_rwlock_trywrlock(>rwlock) == 0 ? 1 : 0;
 }
 
 static inline int liblockdep_rwlock_destroy(liblockdep_pthread_rwlock_t *lock)
@@ -79,7 +79,7 @@ static inline int 
liblockdep_rwlock_destroy(liblockdep_pthread_rwlock_t *lock)
 #define pthread_rwlock_unlock  liblockdep_pthread_rwlock_unlock
 #define pthread_rwlock_wrlock  liblockdep_pthread_rwlock_wrlock
 #define pthread_rwlock_tryrdlock   liblockdep_pthread_rwlock_tryrdlock
-#define pthread_rwlock_trywlock
liblockdep_pthread_rwlock_trywlock
+#define pthread_rwlock_trywrlock   liblockdep_pthread_rwlock_trywrlock
 #define pthread_rwlock_destroy liblockdep_rwlock_destroy
 
 #endif
-- 
2.20.0.rc2.403.gdbc3b29805-goog



[PATCH v3 17/24] locking/lockdep: Free lock classes that are no longer in use

2018-12-06 Thread Bart Van Assche
Instead of leaving lock classes that are no longer in use in the
lock_classes array, reuse entries from that array that are no longer
in use. Maintain a linked list of free lock classes with list head
'free_lock_class'. Initialize that list from inside register_lock_class()
instead of from inside lockdep_init() because register_lock_class() can
be called before lockdep_init() has been called. Only add freed lock
classes to the free_lock_classes list after a grace period to avoid that
a lock_classes[] element would be reused while an RCU reader is
accessing it.

Cc: Peter Zijlstra 
Cc: Waiman Long 
Cc: Johannes Berg 
Signed-off-by: Bart Van Assche 
---
 include/linux/lockdep.h  |   9 +-
 kernel/locking/lockdep.c | 242 ---
 2 files changed, 208 insertions(+), 43 deletions(-)

diff --git a/include/linux/lockdep.h b/include/linux/lockdep.h
index 9421f028c26c..72b4d5bb409f 100644
--- a/include/linux/lockdep.h
+++ b/include/linux/lockdep.h
@@ -63,7 +63,8 @@ extern struct lock_class_key __lockdep_no_validate__;
 #define LOCKSTAT_POINTS4
 
 /*
- * The lock-class itself:
+ * The lock-class itself. The order of the structure members matters.
+ * reinit_class() zeroes the key member and all subsequent members.
  */
 struct lock_class {
/*
@@ -72,7 +73,9 @@ struct lock_class {
struct hlist_node   hash_entry;
 
/*
-* global list of all lock-classes:
+* Entry in all_lock_classes when in use. Entry in free_lock_classes
+* when not in use. Instances that are being freed are on the
+* zapped_classes list.
 */
struct list_headlock_entry;
 
@@ -106,7 +109,7 @@ struct lock_class {
unsigned long   contention_point[LOCKSTAT_POINTS];
unsigned long   contending_point[LOCKSTAT_POINTS];
 #endif
-};
+} __no_randomize_layout;
 
 #ifdef CONFIG_LOCK_STAT
 struct lock_time {
diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c
index 92bdb187987f..ce8abd268727 100644
--- a/kernel/locking/lockdep.c
+++ b/kernel/locking/lockdep.c
@@ -134,8 +134,8 @@ static struct lock_list list_entries[MAX_LOCKDEP_ENTRIES];
 /*
  * All data structures here are protected by the global debug_lock.
  *
- * Mutex key structs only get allocated, once during bootup, and never
- * get freed - this significantly simplifies the debugging code.
+ * nr_lock_classes is the number of elements of lock_classes[] that is
+ * in use.
  */
 unsigned long nr_lock_classes;
 #ifndef CONFIG_DEBUG_LOCKDEP
@@ -277,11 +277,18 @@ static inline void lock_release_holdtime(struct held_lock 
*hlock)
 #endif
 
 /*
- * We keep a global list of all lock classes. The list only grows,
- * never shrinks. The list is only accessed with the lockdep
- * spinlock lock held.
+ * We keep a global list of all lock classes. The list is only accessed with
+ * the lockdep spinlock lock held. The zapped_classes list contains lock
+ * classes that are about to be freed but that may still be accessed by an RCU
+ * reader. free_lock_classes is a list with free elements. These elements are
+ * linked together by the lock_entry member in struct lock_class.
  */
 LIST_HEAD(all_lock_classes);
+static LIST_HEAD(zapped_classes);
+static LIST_HEAD(free_lock_classes);
+static bool initialization_happened;
+static bool rcu_callback_scheduled;
+static struct rcu_head free_zapped_classes_rcu_head;
 
 /*
  * The lockdep classes are in a hash-table as well, for fast lookup:
@@ -735,6 +742,21 @@ static bool assign_lock_key(struct lockdep_map *lock)
return true;
 }
 
+/*
+ * Initialize the lock_classes[] array elements and also the free_lock_classes
+ * list.
+ */
+static void init_data_structures(void)
+{
+   int i;
+
+   for (i = 0; i < ARRAY_SIZE(lock_classes); i++) {
+   list_add_tail(_classes[i].lock_entry, _lock_classes);
+   INIT_LIST_HEAD(_classes[i].locks_after);
+   INIT_LIST_HEAD(_classes[i].locks_before);
+   }
+}
+
 /*
  * Register a lock's class in the hash-table, if the class is not present
  * yet. Otherwise we look it up. We cache the result in the lock object
@@ -775,11 +797,14 @@ register_lock_class(struct lockdep_map *lock, unsigned 
int subclass, int force)
goto out_unlock_set;
}
 
-   /*
-* Allocate a new key from the static array, and add it to
-* the hash:
-*/
-   if (nr_lock_classes >= MAX_LOCKDEP_KEYS) {
+   /* Allocate a new lock class and add it to the hash. */
+   if (unlikely(!initialization_happened)) {
+   initialization_happened = true;
+   init_data_structures();
+   }
+   class = list_first_entry_or_null(_lock_classes, typeof(*class),
+lock_entry);
+   if (!class) {
if (!debug_locks_off_graph_unlock()) {
return NULL;
}
@@ 

[PATCH v3 03/24] lockdep tests: Improve testing accuracy

2018-12-06 Thread Bart Van Assche
Instead of checking whether the tests produced any output, check the
output itself. This patch avoids that e.g. debug output causes the
message "PASSED!" to be reported for failed tests.

Cc: Peter Zijlstra 
Cc: Waiman Long 
Cc: Johannes Berg 
Signed-off-by: Bart Van Assche 
---
 tools/lib/lockdep/run_tests.sh| 5 +++--
 tools/lib/lockdep/tests/AA.sh | 2 ++
 tools/lib/lockdep/tests/ABA.sh| 2 ++
 tools/lib/lockdep/tests/ABBA.sh   | 2 ++
 tools/lib/lockdep/tests/ABBA_2threads.sh  | 2 ++
 tools/lib/lockdep/tests/ABBCCA.sh | 2 ++
 tools/lib/lockdep/tests/ABBCCDDA.sh   | 2 ++
 tools/lib/lockdep/tests/ABCABC.sh | 2 ++
 tools/lib/lockdep/tests/ABCDBCDA.sh   | 2 ++
 tools/lib/lockdep/tests/ABCDBDDA.sh   | 2 ++
 tools/lib/lockdep/tests/WW.sh | 2 ++
 tools/lib/lockdep/tests/unlock_balance.sh | 2 ++
 12 files changed, 25 insertions(+), 2 deletions(-)
 create mode 100755 tools/lib/lockdep/tests/AA.sh
 create mode 100755 tools/lib/lockdep/tests/ABA.sh
 create mode 100755 tools/lib/lockdep/tests/ABBA.sh
 create mode 100755 tools/lib/lockdep/tests/ABBA_2threads.sh
 create mode 100755 tools/lib/lockdep/tests/ABBCCA.sh
 create mode 100755 tools/lib/lockdep/tests/ABBCCDDA.sh
 create mode 100755 tools/lib/lockdep/tests/ABCABC.sh
 create mode 100755 tools/lib/lockdep/tests/ABCDBCDA.sh
 create mode 100755 tools/lib/lockdep/tests/ABCDBDDA.sh
 create mode 100755 tools/lib/lockdep/tests/WW.sh
 create mode 100755 tools/lib/lockdep/tests/unlock_balance.sh

diff --git a/tools/lib/lockdep/run_tests.sh b/tools/lib/lockdep/run_tests.sh
index 253719ee6377..bc36178329a8 100755
--- a/tools/lib/lockdep/run_tests.sh
+++ b/tools/lib/lockdep/run_tests.sh
@@ -11,7 +11,7 @@ find tests -name '*.c' | sort | while read -r i; do
testname=$(basename "$i" .c)
echo -ne "$testname... "
if gcc -o "tests/$testname" -pthread "$i" liblockdep.a -Iinclude 
-D__USE_LIBLOCKDEP &&
-   [ "$(timeout 1 "./tests/$testname" 2>&1 | wc -l)" -gt 0 ]; then
+   timeout 1 "tests/$testname" 2>&1 | "tests/${testname}.sh"; then
echo "PASSED!"
else
echo "FAILED!"
@@ -23,7 +23,8 @@ find tests -name '*.c' | sort | while read -r i; do
testname=$(basename "$i" .c)
echo -ne "(PRELOAD) $testname... "
if gcc -o "tests/$testname" -pthread -Iinclude "$i" &&
-   [ "$(timeout 1 ./lockdep "./tests/$testname" 2>&1 | wc -l)" -gt 
0 ]; then
+   timeout 1 ./lockdep "tests/$testname" 2>&1 |
+   "tests/${testname}.sh"; then
echo "PASSED!"
else
echo "FAILED!"
diff --git a/tools/lib/lockdep/tests/AA.sh b/tools/lib/lockdep/tests/AA.sh
new file mode 100755
index ..f39b32865074
--- /dev/null
+++ b/tools/lib/lockdep/tests/AA.sh
@@ -0,0 +1,2 @@
+#!/bin/bash
+grep -q 'WARNING: possible recursive locking detected'
diff --git a/tools/lib/lockdep/tests/ABA.sh b/tools/lib/lockdep/tests/ABA.sh
new file mode 100755
index ..f39b32865074
--- /dev/null
+++ b/tools/lib/lockdep/tests/ABA.sh
@@ -0,0 +1,2 @@
+#!/bin/bash
+grep -q 'WARNING: possible recursive locking detected'
diff --git a/tools/lib/lockdep/tests/ABBA.sh b/tools/lib/lockdep/tests/ABBA.sh
new file mode 100755
index ..fc31c607a5a8
--- /dev/null
+++ b/tools/lib/lockdep/tests/ABBA.sh
@@ -0,0 +1,2 @@
+#!/bin/bash
+grep -q 'WARNING: possible circular locking dependency detected'
diff --git a/tools/lib/lockdep/tests/ABBA_2threads.sh 
b/tools/lib/lockdep/tests/ABBA_2threads.sh
new file mode 100755
index ..fc31c607a5a8
--- /dev/null
+++ b/tools/lib/lockdep/tests/ABBA_2threads.sh
@@ -0,0 +1,2 @@
+#!/bin/bash
+grep -q 'WARNING: possible circular locking dependency detected'
diff --git a/tools/lib/lockdep/tests/ABBCCA.sh 
b/tools/lib/lockdep/tests/ABBCCA.sh
new file mode 100755
index ..fc31c607a5a8
--- /dev/null
+++ b/tools/lib/lockdep/tests/ABBCCA.sh
@@ -0,0 +1,2 @@
+#!/bin/bash
+grep -q 'WARNING: possible circular locking dependency detected'
diff --git a/tools/lib/lockdep/tests/ABBCCDDA.sh 
b/tools/lib/lockdep/tests/ABBCCDDA.sh
new file mode 100755
index ..fc31c607a5a8
--- /dev/null
+++ b/tools/lib/lockdep/tests/ABBCCDDA.sh
@@ -0,0 +1,2 @@
+#!/bin/bash
+grep -q 'WARNING: possible circular locking dependency detected'
diff --git a/tools/lib/lockdep/tests/ABCABC.sh 
b/tools/lib/lockdep/tests/ABCABC.sh
new file mode 100755
index ..fc31c607a5a8
--- /dev/null
+++ b/tools/lib/lockdep/tests/ABCABC.sh
@@ -0,0 +1,2 @@
+#!/bin/bash
+grep -q 'WARNING: possible circular locking dependency detected'
diff --git a/tools/lib/lockdep/tests/ABCDBCDA.sh 
b/tools/lib/lockdep/tests/ABCDBCDA.sh
new file mode 100755
index ..fc31c607a5a8
--- /dev/null
+++ b/tools/lib/lockdep/tests/ABCDBCDA.sh
@@ -0,0 +1,2 @@
+#!/bin/bash
+grep -q 'WARNING: possible circular locking dependency detected'
diff --git 

[PATCH v3 24/24] lockdep tests: Test dynamic key registration

2018-12-06 Thread Bart Van Assche
Make sure that the lockdep_register_key() and lockdep_unregister_key()
code is tested when running the lockdep tests.

Cc: Peter Zijlstra 
Cc: Waiman Long 
Cc: Johannes Berg 
Signed-off-by: Bart Van Assche 
---
 tools/lib/lockdep/include/liblockdep/common.h |  2 ++
 tools/lib/lockdep/include/liblockdep/mutex.h  | 11 ++-
 tools/lib/lockdep/tests/ABBA.c|  9 +
 3 files changed, 17 insertions(+), 5 deletions(-)

diff --git a/tools/lib/lockdep/include/liblockdep/common.h 
b/tools/lib/lockdep/include/liblockdep/common.h
index d640a9761f09..a81d91d4fc78 100644
--- a/tools/lib/lockdep/include/liblockdep/common.h
+++ b/tools/lib/lockdep/include/liblockdep/common.h
@@ -45,6 +45,8 @@ void lock_acquire(struct lockdep_map *lock, unsigned int 
subclass,
 void lock_release(struct lockdep_map *lock, int nested,
unsigned long ip);
 void lockdep_reset_lock(struct lockdep_map *lock);
+void lockdep_register_key(struct lock_class_key *key);
+void lockdep_unregister_key(struct lock_class_key *key);
 extern void debug_check_no_locks_freed(const void *from, unsigned long len);
 
 #define STATIC_LOCKDEP_MAP_INIT(_name, _key) \
diff --git a/tools/lib/lockdep/include/liblockdep/mutex.h 
b/tools/lib/lockdep/include/liblockdep/mutex.h
index 2073d4e1f2f0..783dd0df06f9 100644
--- a/tools/lib/lockdep/include/liblockdep/mutex.h
+++ b/tools/lib/lockdep/include/liblockdep/mutex.h
@@ -7,6 +7,7 @@
 
 struct liblockdep_pthread_mutex {
pthread_mutex_t mutex;
+   struct lock_class_key key;
struct lockdep_map dep_map;
 };
 
@@ -27,11 +28,10 @@ static inline int __mutex_init(liblockdep_pthread_mutex_t 
*lock,
return pthread_mutex_init(>mutex, __mutexattr);
 }
 
-#define liblockdep_pthread_mutex_init(mutex, mutexattr)\
-({ \
-   static struct lock_class_key __key; \
-   \
-   __mutex_init((mutex), #mutex, &__key, (mutexattr)); \
+#define liblockdep_pthread_mutex_init(mutex, mutexattr)
\
+({ \
+   lockdep_register_key(&(mutex)->key);\
+   __mutex_init((mutex), #mutex, &(mutex)->key, (mutexattr));  \
 })
 
 static inline int liblockdep_pthread_mutex_lock(liblockdep_pthread_mutex_t 
*lock)
@@ -55,6 +55,7 @@ static inline int 
liblockdep_pthread_mutex_trylock(liblockdep_pthread_mutex_t *l
 static inline int liblockdep_pthread_mutex_destroy(liblockdep_pthread_mutex_t 
*lock)
 {
lockdep_reset_lock(>dep_map);
+   lockdep_unregister_key(>key);
return pthread_mutex_destroy(>mutex);
 }
 
diff --git a/tools/lib/lockdep/tests/ABBA.c b/tools/lib/lockdep/tests/ABBA.c
index 623313f54720..543789bc3e37 100644
--- a/tools/lib/lockdep/tests/ABBA.c
+++ b/tools/lib/lockdep/tests/ABBA.c
@@ -14,4 +14,13 @@ void main(void)
 
pthread_mutex_destroy();
pthread_mutex_destroy();
+
+   pthread_mutex_init(, NULL);
+   pthread_mutex_init(, NULL);
+
+   LOCK_UNLOCK_2(a, b);
+   LOCK_UNLOCK_2(b, a);
+
+   pthread_mutex_destroy();
+   pthread_mutex_destroy();
 }
-- 
2.20.0.rc2.403.gdbc3b29805-goog



[PATCH v3 23/24] kernel/workqueue: Use dynamic lockdep keys for workqueues

2018-12-06 Thread Bart Van Assche
Commit 87915adc3f0a ("workqueue: re-add lockdep dependencies for flushing")
improved deadlock checking in the workqueue implementation. Unfortunately
that patch also introduced a few false positive lockdep complaints. This
patch suppresses these false positives by allocating the workqueue mutex
lockdep key dynamically. An example of a false positive lockdep complaint
suppressed by this report can be found below. The root cause of the
lockdep complaint shown below is that the direct I/O code can call
alloc_workqueue() from inside a work item created by another
alloc_workqueue() call and that both workqueues share the same lockdep
key. This patch avoids that that lockdep complaint is triggered by
allocating the work queue lockdep keys dynamically. In other words, this
patch guarantees that a unique lockdep key is associated with each work
queue mutex.

==
WARNING: possible circular locking dependency detected
4.19.0-dbg+ #1 Not tainted
--
fio/4129 is trying to acquire lock:
a01cfe1a ((wq_completion)"dio/%s"sb->s_id){+.+.}, at: 
flush_workqueue+0xd0/0x970

but task is already holding lock:
a0acecf9 (>s_type->i_mutex_key#14){+.+.}, at: 
ext4_file_write_iter+0x154/0x710

which lock already depends on the new lock.

the existing dependency chain (in reverse order) is:

-> #2 (>s_type->i_mutex_key#14){+.+.}:
   down_write+0x3d/0x80
   __generic_file_fsync+0x77/0xf0
   ext4_sync_file+0x3c9/0x780
   vfs_fsync_range+0x66/0x100
   dio_complete+0x2f5/0x360
   dio_aio_complete_work+0x1c/0x20
   process_one_work+0x481/0x9f0
   worker_thread+0x63/0x5a0
   kthread+0x1cf/0x1f0
   ret_from_fork+0x24/0x30

-> #1 ((work_completion)(>complete_work)){+.+.}:
   process_one_work+0x447/0x9f0
   worker_thread+0x63/0x5a0
   kthread+0x1cf/0x1f0
   ret_from_fork+0x24/0x30

-> #0 ((wq_completion)"dio/%s"sb->s_id){+.+.}:
   lock_acquire+0xc5/0x200
   flush_workqueue+0xf3/0x970
   drain_workqueue+0xec/0x220
   destroy_workqueue+0x23/0x350
   sb_init_dio_done_wq+0x6a/0x80
   do_blockdev_direct_IO+0x1f33/0x4be0
   __blockdev_direct_IO+0x79/0x86
   ext4_direct_IO+0x5df/0xbb0
   generic_file_direct_write+0x119/0x220
   __generic_file_write_iter+0x131/0x2d0
   ext4_file_write_iter+0x3fa/0x710
   aio_write+0x235/0x330
   io_submit_one+0x510/0xeb0
   __x64_sys_io_submit+0x122/0x340
   do_syscall_64+0x71/0x220
   entry_SYSCALL_64_after_hwframe+0x49/0xbe

other info that might help us debug this:

Chain exists of:
  (wq_completion)"dio/%s"sb->s_id --> (work_completion)(>complete_work) 
--> >s_type->i_mutex_key#14

 Possible unsafe locking scenario:

   CPU0CPU1
   
  lock(>s_type->i_mutex_key#14);
   lock((work_completion)(>complete_work));
   lock(>s_type->i_mutex_key#14);
  lock((wq_completion)"dio/%s"sb->s_id);

 *** DEADLOCK ***

1 lock held by fio/4129:
 #0: a0acecf9 (>s_type->i_mutex_key#14){+.+.}, at: 
ext4_file_write_iter+0x154/0x710

stack backtrace:
CPU: 3 PID: 4129 Comm: fio Not tainted 4.19.0-dbg+ #1
Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.10.2-1 04/01/2014
Call Trace:
 dump_stack+0x86/0xc5
 print_circular_bug.isra.32+0x20a/0x218
 __lock_acquire+0x1c68/0x1cf0
 lock_acquire+0xc5/0x200
 flush_workqueue+0xf3/0x970
 drain_workqueue+0xec/0x220
 destroy_workqueue+0x23/0x350
 sb_init_dio_done_wq+0x6a/0x80
 do_blockdev_direct_IO+0x1f33/0x4be0
 __blockdev_direct_IO+0x79/0x86
 ext4_direct_IO+0x5df/0xbb0
 generic_file_direct_write+0x119/0x220
 __generic_file_write_iter+0x131/0x2d0
 ext4_file_write_iter+0x3fa/0x710
 aio_write+0x235/0x330
 io_submit_one+0x510/0xeb0
 __x64_sys_io_submit+0x122/0x340
 do_syscall_64+0x71/0x220
 entry_SYSCALL_64_after_hwframe+0x49/0xbe

Cc: Ingo Molnar 
Cc: Peter Zijlstra 
Cc: Will Deacon 
Cc: Tejun Heo 
Cc: Waiman Long 
Cc: Johannes Berg 
Signed-off-by: Bart Van Assche 
---
 include/linux/workqueue.h | 28 +++---
 kernel/workqueue.c| 60 +--
 2 files changed, 55 insertions(+), 33 deletions(-)

diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h
index 60d673e15632..d9a1a480e920 100644
--- a/include/linux/workqueue.h
+++ b/include/linux/workqueue.h
@@ -390,43 +390,23 @@ extern struct workqueue_struct *system_freezable_wq;
 extern struct workqueue_struct *system_power_efficient_wq;
 extern struct workqueue_struct *system_freezable_power_efficient_wq;
 
-extern struct workqueue_struct *
-__alloc_workqueue_key(const char *fmt, unsigned int flags, int max_active,
-   struct lock_class_key *key, const char *lock_name, ...) __printf(1, 6);
-
 /**
  * alloc_workqueue - allocate a workqueue
  * @fmt: printf format for the name of the workqueue
  * @flags: WQ_* flags
  * 

[PATCH v3 15/24] locking/lockdep: Reorder struct lock_class members

2018-12-06 Thread Bart Van Assche
This patch does not change any functionality but makes the patch that
frees lock classes that are no longer in use easier to read.

Cc: Peter Zijlstra 
Cc: Waiman Long 
Cc: Johannes Berg 
Signed-off-by: Bart Van Assche 
---
 include/linux/lockdep.h | 14 +++---
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/include/linux/lockdep.h b/include/linux/lockdep.h
index 6d0f8d1c2bee..9421f028c26c 100644
--- a/include/linux/lockdep.h
+++ b/include/linux/lockdep.h
@@ -76,6 +76,13 @@ struct lock_class {
 */
struct list_headlock_entry;
 
+   /*
+* These fields represent a directed graph of lock dependencies,
+* to every node we attach a list of "forward" and a list of
+* "backward" graph nodes.
+*/
+   struct list_headlocks_after, locks_before;
+
struct lockdep_subclass_key *key;
unsigned intsubclass;
unsigned intdep_gen_id;
@@ -86,13 +93,6 @@ struct lock_class {
unsigned long   usage_mask;
struct stack_trace  usage_traces[XXX_LOCK_USAGE_STATES];
 
-   /*
-* These fields represent a directed graph of lock dependencies,
-* to every node we attach a list of "forward" and a list of
-* "backward" graph nodes.
-*/
-   struct list_headlocks_after, locks_before;
-
/*
 * Generation counter, when doing certain classes of graph walking,
 * to ensure that we check one node only once:
-- 
2.20.0.rc2.403.gdbc3b29805-goog



[PATCH v3 14/24] locking/lockdep: Make zap_class() remove all matching lock order entries

2018-12-06 Thread Bart Van Assche
Make sure that all lock order entries that refer to a class are removed
from the list_entries[] array when a kernel module is unloaded.

Cc: Peter Zijlstra 
Cc: Waiman Long 
Cc: Johannes Berg 
Signed-off-by: Bart Van Assche 
---
 include/linux/lockdep.h  |  1 +
 kernel/locking/lockdep.c | 17 +++--
 2 files changed, 12 insertions(+), 6 deletions(-)

diff --git a/include/linux/lockdep.h b/include/linux/lockdep.h
index 1fd82ff99c65..6d0f8d1c2bee 100644
--- a/include/linux/lockdep.h
+++ b/include/linux/lockdep.h
@@ -180,6 +180,7 @@ static inline void lockdep_copy_map(struct lockdep_map *to,
 struct lock_list {
struct list_headentry;
struct lock_class   *class;
+   struct lock_class   *links_to;
struct stack_trace  trace;
int distance;
 
diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c
index 5c837a537273..ecd92969674c 100644
--- a/kernel/locking/lockdep.c
+++ b/kernel/locking/lockdep.c
@@ -859,7 +859,8 @@ static struct lock_list *alloc_list_entry(void)
 /*
  * Add a new dependency to the head of the list:
  */
-static int add_lock_to_list(struct lock_class *this, struct list_head *head,
+static int add_lock_to_list(struct lock_class *this,
+   struct lock_class *links_to, struct list_head *head,
unsigned long ip, int distance,
struct stack_trace *trace)
 {
@@ -872,7 +873,9 @@ static int add_lock_to_list(struct lock_class *this, struct 
list_head *head,
if (!entry)
return 0;
 
+   WARN_ON_ONCE(this == links_to);
entry->class = this;
+   entry->links_to = links_to;
entry->distance = distance;
entry->trace = *trace;
/*
@@ -1918,14 +1921,14 @@ check_prev_add(struct task_struct *curr, struct 
held_lock *prev,
 * Ok, all validations passed, add the new lock
 * to the previous lock's dependency list:
 */
-   ret = add_lock_to_list(hlock_class(next),
+   ret = add_lock_to_list(hlock_class(next), hlock_class(prev),
   _class(prev)->locks_after,
   next->acquire_ip, distance, trace);
 
if (!ret)
return 0;
 
-   ret = add_lock_to_list(hlock_class(prev),
+   ret = add_lock_to_list(hlock_class(prev), hlock_class(next),
   _class(next)->locks_before,
   next->acquire_ip, distance, trace);
if (!ret)
@@ -4128,15 +4131,17 @@ void lockdep_reset(void)
  */
 static void zap_class(struct lock_class *class)
 {
+   struct lock_list *entry;
int i;
 
/*
 * Remove all dependencies this lock is
 * involved in:
 */
-   for (i = 0; i < nr_list_entries; i++) {
-   if (list_entries[i].class == class)
-   list_del_rcu(_entries[i].entry);
+   for (i = 0, entry = list_entries; i < nr_list_entries; i++, entry++) {
+   if (entry->class != class && entry->links_to != class)
+   continue;
+   list_del_rcu(>entry);
}
/*
 * Unhash the class and remove it from the all_lock_classes list:
-- 
2.20.0.rc2.403.gdbc3b29805-goog



[PATCH v3 04/24] lockdep tests: Run lockdep tests a second time under Valgrind

2018-12-06 Thread Bart Van Assche
This improves test coverage.

Cc: Peter Zijlstra 
Cc: Waiman Long 
Cc: Johannes Berg 
Signed-off-by: Bart Van Assche 
---
 tools/lib/lockdep/run_tests.sh | 14 ++
 1 file changed, 14 insertions(+)

diff --git a/tools/lib/lockdep/run_tests.sh b/tools/lib/lockdep/run_tests.sh
index bc36178329a8..c8fbd0306960 100755
--- a/tools/lib/lockdep/run_tests.sh
+++ b/tools/lib/lockdep/run_tests.sh
@@ -31,3 +31,17 @@ find tests -name '*.c' | sort | while read -r i; do
fi
rm -f "tests/$testname"
 done
+
+find tests -name '*.c' | sort | while read -r i; do
+   testname=$(basename "$i" .c)
+   echo -ne "(PRELOAD + Valgrind) $testname... "
+   if gcc -o "tests/$testname" -pthread -Iinclude "$i" &&
+   { timeout 10 valgrind --read-var-info=yes ./lockdep 
"./tests/$testname" >& "tests/${testname}.vg.out"; true; } &&
+   "tests/${testname}.sh" < "tests/${testname}.vg.out" &&
+   ! grep -Eq '(^==[0-9]*== (Invalid |Uninitialised ))|Mismatched 
free|Source and destination overlap| UME ' "tests/${testname}.vg.out"; then
+   echo "PASSED!"
+   else
+   echo "FAILED!"
+   fi
+   rm -f "tests/$testname"
+done
-- 
2.20.0.rc2.403.gdbc3b29805-goog



[PATCH v3 01/24] lockdep tests: Display compiler warning and error messages

2018-12-06 Thread Bart Van Assche
If compilation of liblockdep fails, display an error message and exit
immediately. Display compiler warning and error messages that are
generated while building a test. Only run a test if compilation of it
succeeded.

Cc: Peter Zijlstra 
Cc: Waiman Long 
Cc: Johannes Berg 
Signed-off-by: Bart Van Assche 
---
 tools/lib/lockdep/run_tests.sh | 14 +-
 1 file changed, 9 insertions(+), 5 deletions(-)

diff --git a/tools/lib/lockdep/run_tests.sh b/tools/lib/lockdep/run_tests.sh
index 2e570a188f16..9f31f84e7fac 100755
--- a/tools/lib/lockdep/run_tests.sh
+++ b/tools/lib/lockdep/run_tests.sh
@@ -1,13 +1,17 @@
 #! /bin/bash
 # SPDX-License-Identifier: GPL-2.0
 
-make &> /dev/null
+if ! make >/dev/null; then
+echo "Building liblockdep failed."
+echo "FAILED!"
+exit 1
+fi
 
 for i in `ls tests/*.c`; do
testname=$(basename "$i" .c)
-   gcc -o tests/$testname -pthread $i liblockdep.a -Iinclude 
-D__USE_LIBLOCKDEP &> /dev/null
echo -ne "$testname... "
-   if [ $(timeout 1 ./tests/$testname 2>&1 | wc -l) -gt 0 ]; then
+   if gcc -o "tests/$testname" -pthread "$i" liblockdep.a -Iinclude 
-D__USE_LIBLOCKDEP &&
+   [ "$(timeout 1 "./tests/$testname" 2>&1 | wc -l)" -gt 0 ]; then
echo "PASSED!"
else
echo "FAILED!"
@@ -19,9 +23,9 @@ done
 
 for i in `ls tests/*.c`; do
testname=$(basename "$i" .c)
-   gcc -o tests/$testname -pthread -Iinclude $i &> /dev/null
echo -ne "(PRELOAD) $testname... "
-   if [ $(timeout 1 ./lockdep ./tests/$testname 2>&1 | wc -l) -gt 0 ]; then
+   if gcc -o "tests/$testname" -pthread -Iinclude "$i" &&
+   [ "$(timeout 1 ./lockdep "./tests/$testname" 2>&1 | wc -l)" -gt 
0 ]; then
echo "PASSED!"
else
echo "FAILED!"
-- 
2.20.0.rc2.403.gdbc3b29805-goog



[PATCH v3 07/24] lockdep tests: Test the lockdep_reset_lock() implementation

2018-12-06 Thread Bart Van Assche
This patch makes sure that the lockdep_reset_lock() function gets
tested.

Cc: Peter Zijlstra 
Cc: Waiman Long 
Cc: Johannes Berg 
Signed-off-by: Bart Van Assche 
---
 tools/lib/lockdep/include/liblockdep/common.h | 1 +
 tools/lib/lockdep/include/liblockdep/mutex.h  | 1 +
 tools/lib/lockdep/tests/ABBA.c| 3 +++
 tools/lib/lockdep/tests/ABBCCA.c  | 4 
 tools/lib/lockdep/tests/ABBCCDDA.c| 5 +
 tools/lib/lockdep/tests/ABCABC.c  | 4 
 tools/lib/lockdep/tests/ABCDBCDA.c| 5 +
 tools/lib/lockdep/tests/ABCDBDDA.c| 5 +
 tools/lib/lockdep/tests/unlock_balance.c  | 2 ++
 9 files changed, 30 insertions(+)

diff --git a/tools/lib/lockdep/include/liblockdep/common.h 
b/tools/lib/lockdep/include/liblockdep/common.h
index 8862da80995a..d640a9761f09 100644
--- a/tools/lib/lockdep/include/liblockdep/common.h
+++ b/tools/lib/lockdep/include/liblockdep/common.h
@@ -44,6 +44,7 @@ void lock_acquire(struct lockdep_map *lock, unsigned int 
subclass,
struct lockdep_map *nest_lock, unsigned long ip);
 void lock_release(struct lockdep_map *lock, int nested,
unsigned long ip);
+void lockdep_reset_lock(struct lockdep_map *lock);
 extern void debug_check_no_locks_freed(const void *from, unsigned long len);
 
 #define STATIC_LOCKDEP_MAP_INIT(_name, _key) \
diff --git a/tools/lib/lockdep/include/liblockdep/mutex.h 
b/tools/lib/lockdep/include/liblockdep/mutex.h
index a80ac39f966e..2073d4e1f2f0 100644
--- a/tools/lib/lockdep/include/liblockdep/mutex.h
+++ b/tools/lib/lockdep/include/liblockdep/mutex.h
@@ -54,6 +54,7 @@ static inline int 
liblockdep_pthread_mutex_trylock(liblockdep_pthread_mutex_t *l
 
 static inline int liblockdep_pthread_mutex_destroy(liblockdep_pthread_mutex_t 
*lock)
 {
+   lockdep_reset_lock(>dep_map);
return pthread_mutex_destroy(>mutex);
 }
 
diff --git a/tools/lib/lockdep/tests/ABBA.c b/tools/lib/lockdep/tests/ABBA.c
index 1460afd33d71..623313f54720 100644
--- a/tools/lib/lockdep/tests/ABBA.c
+++ b/tools/lib/lockdep/tests/ABBA.c
@@ -11,4 +11,7 @@ void main(void)
 
LOCK_UNLOCK_2(a, b);
LOCK_UNLOCK_2(b, a);
+
+   pthread_mutex_destroy();
+   pthread_mutex_destroy();
 }
diff --git a/tools/lib/lockdep/tests/ABBCCA.c b/tools/lib/lockdep/tests/ABBCCA.c
index a54c1b2af118..48446129d496 100644
--- a/tools/lib/lockdep/tests/ABBCCA.c
+++ b/tools/lib/lockdep/tests/ABBCCA.c
@@ -13,4 +13,8 @@ void main(void)
LOCK_UNLOCK_2(a, b);
LOCK_UNLOCK_2(b, c);
LOCK_UNLOCK_2(c, a);
+
+   pthread_mutex_destroy();
+   pthread_mutex_destroy();
+   pthread_mutex_destroy();
 }
diff --git a/tools/lib/lockdep/tests/ABBCCDDA.c 
b/tools/lib/lockdep/tests/ABBCCDDA.c
index aa5d194e8869..3570bf7b3804 100644
--- a/tools/lib/lockdep/tests/ABBCCDDA.c
+++ b/tools/lib/lockdep/tests/ABBCCDDA.c
@@ -15,4 +15,9 @@ void main(void)
LOCK_UNLOCK_2(b, c);
LOCK_UNLOCK_2(c, d);
LOCK_UNLOCK_2(d, a);
+
+   pthread_mutex_destroy();
+   pthread_mutex_destroy();
+   pthread_mutex_destroy();
+   pthread_mutex_destroy();
 }
diff --git a/tools/lib/lockdep/tests/ABCABC.c b/tools/lib/lockdep/tests/ABCABC.c
index b54a08e60416..a1c4659894cd 100644
--- a/tools/lib/lockdep/tests/ABCABC.c
+++ b/tools/lib/lockdep/tests/ABCABC.c
@@ -13,4 +13,8 @@ void main(void)
LOCK_UNLOCK_2(a, b);
LOCK_UNLOCK_2(c, a);
LOCK_UNLOCK_2(b, c);
+
+   pthread_mutex_destroy();
+   pthread_mutex_destroy();
+   pthread_mutex_destroy();
 }
diff --git a/tools/lib/lockdep/tests/ABCDBCDA.c 
b/tools/lib/lockdep/tests/ABCDBCDA.c
index a56742250d86..335af1c90ab5 100644
--- a/tools/lib/lockdep/tests/ABCDBCDA.c
+++ b/tools/lib/lockdep/tests/ABCDBCDA.c
@@ -15,4 +15,9 @@ void main(void)
LOCK_UNLOCK_2(c, d);
LOCK_UNLOCK_2(b, c);
LOCK_UNLOCK_2(d, a);
+
+   pthread_mutex_destroy();
+   pthread_mutex_destroy();
+   pthread_mutex_destroy();
+   pthread_mutex_destroy();
 }
diff --git a/tools/lib/lockdep/tests/ABCDBDDA.c 
b/tools/lib/lockdep/tests/ABCDBDDA.c
index 238a3353f3c3..3c5972863049 100644
--- a/tools/lib/lockdep/tests/ABCDBDDA.c
+++ b/tools/lib/lockdep/tests/ABCDBDDA.c
@@ -15,4 +15,9 @@ void main(void)
LOCK_UNLOCK_2(c, d);
LOCK_UNLOCK_2(b, d);
LOCK_UNLOCK_2(d, a);
+
+   pthread_mutex_destroy();
+   pthread_mutex_destroy();
+   pthread_mutex_destroy();
+   pthread_mutex_destroy();
 }
diff --git a/tools/lib/lockdep/tests/unlock_balance.c 
b/tools/lib/lockdep/tests/unlock_balance.c
index 34cf32f689de..dba25064b50a 100644
--- a/tools/lib/lockdep/tests/unlock_balance.c
+++ b/tools/lib/lockdep/tests/unlock_balance.c
@@ -10,4 +10,6 @@ void main(void)
pthread_mutex_lock();
pthread_mutex_unlock();
pthread_mutex_unlock();
+
+   pthread_mutex_destroy();
 }
-- 
2.20.0.rc2.403.gdbc3b29805-goog



[PATCH v3 14/24] locking/lockdep: Make zap_class() remove all matching lock order entries

2018-12-06 Thread Bart Van Assche
Make sure that all lock order entries that refer to a class are removed
from the list_entries[] array when a kernel module is unloaded.

Cc: Peter Zijlstra 
Cc: Waiman Long 
Cc: Johannes Berg 
Signed-off-by: Bart Van Assche 
---
 include/linux/lockdep.h  |  1 +
 kernel/locking/lockdep.c | 17 +++--
 2 files changed, 12 insertions(+), 6 deletions(-)

diff --git a/include/linux/lockdep.h b/include/linux/lockdep.h
index 1fd82ff99c65..6d0f8d1c2bee 100644
--- a/include/linux/lockdep.h
+++ b/include/linux/lockdep.h
@@ -180,6 +180,7 @@ static inline void lockdep_copy_map(struct lockdep_map *to,
 struct lock_list {
struct list_headentry;
struct lock_class   *class;
+   struct lock_class   *links_to;
struct stack_trace  trace;
int distance;
 
diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c
index 5c837a537273..ecd92969674c 100644
--- a/kernel/locking/lockdep.c
+++ b/kernel/locking/lockdep.c
@@ -859,7 +859,8 @@ static struct lock_list *alloc_list_entry(void)
 /*
  * Add a new dependency to the head of the list:
  */
-static int add_lock_to_list(struct lock_class *this, struct list_head *head,
+static int add_lock_to_list(struct lock_class *this,
+   struct lock_class *links_to, struct list_head *head,
unsigned long ip, int distance,
struct stack_trace *trace)
 {
@@ -872,7 +873,9 @@ static int add_lock_to_list(struct lock_class *this, struct 
list_head *head,
if (!entry)
return 0;
 
+   WARN_ON_ONCE(this == links_to);
entry->class = this;
+   entry->links_to = links_to;
entry->distance = distance;
entry->trace = *trace;
/*
@@ -1918,14 +1921,14 @@ check_prev_add(struct task_struct *curr, struct 
held_lock *prev,
 * Ok, all validations passed, add the new lock
 * to the previous lock's dependency list:
 */
-   ret = add_lock_to_list(hlock_class(next),
+   ret = add_lock_to_list(hlock_class(next), hlock_class(prev),
   _class(prev)->locks_after,
   next->acquire_ip, distance, trace);
 
if (!ret)
return 0;
 
-   ret = add_lock_to_list(hlock_class(prev),
+   ret = add_lock_to_list(hlock_class(prev), hlock_class(next),
   _class(next)->locks_before,
   next->acquire_ip, distance, trace);
if (!ret)
@@ -4128,15 +4131,17 @@ void lockdep_reset(void)
  */
 static void zap_class(struct lock_class *class)
 {
+   struct lock_list *entry;
int i;
 
/*
 * Remove all dependencies this lock is
 * involved in:
 */
-   for (i = 0; i < nr_list_entries; i++) {
-   if (list_entries[i].class == class)
-   list_del_rcu(_entries[i].entry);
+   for (i = 0, entry = list_entries; i < nr_list_entries; i++, entry++) {
+   if (entry->class != class && entry->links_to != class)
+   continue;
+   list_del_rcu(>entry);
}
/*
 * Unhash the class and remove it from the all_lock_classes list:
-- 
2.20.0.rc2.403.gdbc3b29805-goog



[PATCH v3 04/24] lockdep tests: Run lockdep tests a second time under Valgrind

2018-12-06 Thread Bart Van Assche
This improves test coverage.

Cc: Peter Zijlstra 
Cc: Waiman Long 
Cc: Johannes Berg 
Signed-off-by: Bart Van Assche 
---
 tools/lib/lockdep/run_tests.sh | 14 ++
 1 file changed, 14 insertions(+)

diff --git a/tools/lib/lockdep/run_tests.sh b/tools/lib/lockdep/run_tests.sh
index bc36178329a8..c8fbd0306960 100755
--- a/tools/lib/lockdep/run_tests.sh
+++ b/tools/lib/lockdep/run_tests.sh
@@ -31,3 +31,17 @@ find tests -name '*.c' | sort | while read -r i; do
fi
rm -f "tests/$testname"
 done
+
+find tests -name '*.c' | sort | while read -r i; do
+   testname=$(basename "$i" .c)
+   echo -ne "(PRELOAD + Valgrind) $testname... "
+   if gcc -o "tests/$testname" -pthread -Iinclude "$i" &&
+   { timeout 10 valgrind --read-var-info=yes ./lockdep 
"./tests/$testname" >& "tests/${testname}.vg.out"; true; } &&
+   "tests/${testname}.sh" < "tests/${testname}.vg.out" &&
+   ! grep -Eq '(^==[0-9]*== (Invalid |Uninitialised ))|Mismatched 
free|Source and destination overlap| UME ' "tests/${testname}.vg.out"; then
+   echo "PASSED!"
+   else
+   echo "FAILED!"
+   fi
+   rm -f "tests/$testname"
+done
-- 
2.20.0.rc2.403.gdbc3b29805-goog



[PATCH v3 01/24] lockdep tests: Display compiler warning and error messages

2018-12-06 Thread Bart Van Assche
If compilation of liblockdep fails, display an error message and exit
immediately. Display compiler warning and error messages that are
generated while building a test. Only run a test if compilation of it
succeeded.

Cc: Peter Zijlstra 
Cc: Waiman Long 
Cc: Johannes Berg 
Signed-off-by: Bart Van Assche 
---
 tools/lib/lockdep/run_tests.sh | 14 +-
 1 file changed, 9 insertions(+), 5 deletions(-)

diff --git a/tools/lib/lockdep/run_tests.sh b/tools/lib/lockdep/run_tests.sh
index 2e570a188f16..9f31f84e7fac 100755
--- a/tools/lib/lockdep/run_tests.sh
+++ b/tools/lib/lockdep/run_tests.sh
@@ -1,13 +1,17 @@
 #! /bin/bash
 # SPDX-License-Identifier: GPL-2.0
 
-make &> /dev/null
+if ! make >/dev/null; then
+echo "Building liblockdep failed."
+echo "FAILED!"
+exit 1
+fi
 
 for i in `ls tests/*.c`; do
testname=$(basename "$i" .c)
-   gcc -o tests/$testname -pthread $i liblockdep.a -Iinclude 
-D__USE_LIBLOCKDEP &> /dev/null
echo -ne "$testname... "
-   if [ $(timeout 1 ./tests/$testname 2>&1 | wc -l) -gt 0 ]; then
+   if gcc -o "tests/$testname" -pthread "$i" liblockdep.a -Iinclude 
-D__USE_LIBLOCKDEP &&
+   [ "$(timeout 1 "./tests/$testname" 2>&1 | wc -l)" -gt 0 ]; then
echo "PASSED!"
else
echo "FAILED!"
@@ -19,9 +23,9 @@ done
 
 for i in `ls tests/*.c`; do
testname=$(basename "$i" .c)
-   gcc -o tests/$testname -pthread -Iinclude $i &> /dev/null
echo -ne "(PRELOAD) $testname... "
-   if [ $(timeout 1 ./lockdep ./tests/$testname 2>&1 | wc -l) -gt 0 ]; then
+   if gcc -o "tests/$testname" -pthread -Iinclude "$i" &&
+   [ "$(timeout 1 ./lockdep "./tests/$testname" 2>&1 | wc -l)" -gt 
0 ]; then
echo "PASSED!"
else
echo "FAILED!"
-- 
2.20.0.rc2.403.gdbc3b29805-goog



[PATCH v3 07/24] lockdep tests: Test the lockdep_reset_lock() implementation

2018-12-06 Thread Bart Van Assche
This patch makes sure that the lockdep_reset_lock() function gets
tested.

Cc: Peter Zijlstra 
Cc: Waiman Long 
Cc: Johannes Berg 
Signed-off-by: Bart Van Assche 
---
 tools/lib/lockdep/include/liblockdep/common.h | 1 +
 tools/lib/lockdep/include/liblockdep/mutex.h  | 1 +
 tools/lib/lockdep/tests/ABBA.c| 3 +++
 tools/lib/lockdep/tests/ABBCCA.c  | 4 
 tools/lib/lockdep/tests/ABBCCDDA.c| 5 +
 tools/lib/lockdep/tests/ABCABC.c  | 4 
 tools/lib/lockdep/tests/ABCDBCDA.c| 5 +
 tools/lib/lockdep/tests/ABCDBDDA.c| 5 +
 tools/lib/lockdep/tests/unlock_balance.c  | 2 ++
 9 files changed, 30 insertions(+)

diff --git a/tools/lib/lockdep/include/liblockdep/common.h 
b/tools/lib/lockdep/include/liblockdep/common.h
index 8862da80995a..d640a9761f09 100644
--- a/tools/lib/lockdep/include/liblockdep/common.h
+++ b/tools/lib/lockdep/include/liblockdep/common.h
@@ -44,6 +44,7 @@ void lock_acquire(struct lockdep_map *lock, unsigned int 
subclass,
struct lockdep_map *nest_lock, unsigned long ip);
 void lock_release(struct lockdep_map *lock, int nested,
unsigned long ip);
+void lockdep_reset_lock(struct lockdep_map *lock);
 extern void debug_check_no_locks_freed(const void *from, unsigned long len);
 
 #define STATIC_LOCKDEP_MAP_INIT(_name, _key) \
diff --git a/tools/lib/lockdep/include/liblockdep/mutex.h 
b/tools/lib/lockdep/include/liblockdep/mutex.h
index a80ac39f966e..2073d4e1f2f0 100644
--- a/tools/lib/lockdep/include/liblockdep/mutex.h
+++ b/tools/lib/lockdep/include/liblockdep/mutex.h
@@ -54,6 +54,7 @@ static inline int 
liblockdep_pthread_mutex_trylock(liblockdep_pthread_mutex_t *l
 
 static inline int liblockdep_pthread_mutex_destroy(liblockdep_pthread_mutex_t 
*lock)
 {
+   lockdep_reset_lock(>dep_map);
return pthread_mutex_destroy(>mutex);
 }
 
diff --git a/tools/lib/lockdep/tests/ABBA.c b/tools/lib/lockdep/tests/ABBA.c
index 1460afd33d71..623313f54720 100644
--- a/tools/lib/lockdep/tests/ABBA.c
+++ b/tools/lib/lockdep/tests/ABBA.c
@@ -11,4 +11,7 @@ void main(void)
 
LOCK_UNLOCK_2(a, b);
LOCK_UNLOCK_2(b, a);
+
+   pthread_mutex_destroy();
+   pthread_mutex_destroy();
 }
diff --git a/tools/lib/lockdep/tests/ABBCCA.c b/tools/lib/lockdep/tests/ABBCCA.c
index a54c1b2af118..48446129d496 100644
--- a/tools/lib/lockdep/tests/ABBCCA.c
+++ b/tools/lib/lockdep/tests/ABBCCA.c
@@ -13,4 +13,8 @@ void main(void)
LOCK_UNLOCK_2(a, b);
LOCK_UNLOCK_2(b, c);
LOCK_UNLOCK_2(c, a);
+
+   pthread_mutex_destroy();
+   pthread_mutex_destroy();
+   pthread_mutex_destroy();
 }
diff --git a/tools/lib/lockdep/tests/ABBCCDDA.c 
b/tools/lib/lockdep/tests/ABBCCDDA.c
index aa5d194e8869..3570bf7b3804 100644
--- a/tools/lib/lockdep/tests/ABBCCDDA.c
+++ b/tools/lib/lockdep/tests/ABBCCDDA.c
@@ -15,4 +15,9 @@ void main(void)
LOCK_UNLOCK_2(b, c);
LOCK_UNLOCK_2(c, d);
LOCK_UNLOCK_2(d, a);
+
+   pthread_mutex_destroy();
+   pthread_mutex_destroy();
+   pthread_mutex_destroy();
+   pthread_mutex_destroy();
 }
diff --git a/tools/lib/lockdep/tests/ABCABC.c b/tools/lib/lockdep/tests/ABCABC.c
index b54a08e60416..a1c4659894cd 100644
--- a/tools/lib/lockdep/tests/ABCABC.c
+++ b/tools/lib/lockdep/tests/ABCABC.c
@@ -13,4 +13,8 @@ void main(void)
LOCK_UNLOCK_2(a, b);
LOCK_UNLOCK_2(c, a);
LOCK_UNLOCK_2(b, c);
+
+   pthread_mutex_destroy();
+   pthread_mutex_destroy();
+   pthread_mutex_destroy();
 }
diff --git a/tools/lib/lockdep/tests/ABCDBCDA.c 
b/tools/lib/lockdep/tests/ABCDBCDA.c
index a56742250d86..335af1c90ab5 100644
--- a/tools/lib/lockdep/tests/ABCDBCDA.c
+++ b/tools/lib/lockdep/tests/ABCDBCDA.c
@@ -15,4 +15,9 @@ void main(void)
LOCK_UNLOCK_2(c, d);
LOCK_UNLOCK_2(b, c);
LOCK_UNLOCK_2(d, a);
+
+   pthread_mutex_destroy();
+   pthread_mutex_destroy();
+   pthread_mutex_destroy();
+   pthread_mutex_destroy();
 }
diff --git a/tools/lib/lockdep/tests/ABCDBDDA.c 
b/tools/lib/lockdep/tests/ABCDBDDA.c
index 238a3353f3c3..3c5972863049 100644
--- a/tools/lib/lockdep/tests/ABCDBDDA.c
+++ b/tools/lib/lockdep/tests/ABCDBDDA.c
@@ -15,4 +15,9 @@ void main(void)
LOCK_UNLOCK_2(c, d);
LOCK_UNLOCK_2(b, d);
LOCK_UNLOCK_2(d, a);
+
+   pthread_mutex_destroy();
+   pthread_mutex_destroy();
+   pthread_mutex_destroy();
+   pthread_mutex_destroy();
 }
diff --git a/tools/lib/lockdep/tests/unlock_balance.c 
b/tools/lib/lockdep/tests/unlock_balance.c
index 34cf32f689de..dba25064b50a 100644
--- a/tools/lib/lockdep/tests/unlock_balance.c
+++ b/tools/lib/lockdep/tests/unlock_balance.c
@@ -10,4 +10,6 @@ void main(void)
pthread_mutex_lock();
pthread_mutex_unlock();
pthread_mutex_unlock();
+
+   pthread_mutex_destroy();
 }
-- 
2.20.0.rc2.403.gdbc3b29805-goog



[PATCH v3 21/24] locking/lockdep: Verify whether lock objects are small enough to be used as class keys

2018-12-06 Thread Bart Van Assche
Cc: Peter Zijlstra 
Cc: Waiman Long 
Cc: Johannes Berg 
Signed-off-by: Bart Van Assche 
---
 kernel/locking/lockdep.c | 9 +
 1 file changed, 9 insertions(+)

diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c
index 8c69516b1283..a47357d3dca7 100644
--- a/kernel/locking/lockdep.c
+++ b/kernel/locking/lockdep.c
@@ -727,6 +727,15 @@ static bool assign_lock_key(struct lockdep_map *lock)
 {
unsigned long can_addr, addr = (unsigned long)lock;
 
+   /*
+* lockdep_free_key_range() assumes that struct lock_class_key
+* objects do not overlap. Since we use the address of lock
+* objects as class key for static objects, check whether the
+* size of lock_class_key objects does not exceed the size of
+* the smallest lock object.
+*/
+   BUILD_BUG_ON(sizeof(struct lock_class_key) > sizeof(raw_spinlock_t));
+
if (__is_kernel_percpu_address(addr, _addr))
lock->key = (void *)can_addr;
else if (__is_module_percpu_address(addr, _addr))
-- 
2.20.0.rc2.403.gdbc3b29805-goog



[PATCH v3 22/24] locking/lockdep: Add support for dynamic keys

2018-12-06 Thread Bart Van Assche
A shortcoming of the current lockdep implementation is that it requires
lock keys to be allocated statically. That forces certain lock objects
to share lock keys. Since lock dependency analysis groups lock objects
per key sharing lock keys can cause false positive lockdep reports.
Make it possible to avoid such false positive reports by allowing lock
keys to be allocated dynamically. Require that dynamically allocated
lock keys are registered before use by calling lockdep_register_key().
Complain about attempts to register the same lock key pointer twice
without calling lockdep_unregister_key() between successive
registration calls.

The purpose of the new lock_keys_hash[] data structure that keeps
track of all dynamic keys is twofold:
- Verify whether the lockdep_register_key() and  lockdep_unregister_key()
  functions are used correctly.
- Avoid that lockdep_init_map() complains when encountering a dynamically
  allocated key.

Cc: Peter Zijlstra 
Cc: Waiman Long 
Cc: Johannes Berg 
Signed-off-by: Bart Van Assche 
---
 include/linux/lockdep.h  |  13 -
 kernel/locking/lockdep.c | 115 ---
 2 files changed, 117 insertions(+), 11 deletions(-)

diff --git a/include/linux/lockdep.h b/include/linux/lockdep.h
index 72b4d5bb409f..52853baa591d 100644
--- a/include/linux/lockdep.h
+++ b/include/linux/lockdep.h
@@ -46,15 +46,19 @@ extern int lock_stat;
 #define NR_LOCKDEP_CACHING_CLASSES 2
 
 /*
- * Lock-classes are keyed via unique addresses, by embedding the
- * lockclass-key into the kernel (or module) .data section. (For
- * static locks we use the lock address itself as the key.)
+ * A lockdep key is associated with each lock object. For static locks we use
+ * the lock address itself as the key. Dynamically allocated lock objects can
+ * have a statically or dynamically allocated key. Dynamically allocated lock
+ * keys must be registered before being used and must be unregistered before
+ * the key memory is freed.
  */
 struct lockdep_subclass_key {
char __one_byte;
 } __attribute__ ((__packed__));
 
+/* hash_entry is used to keep track of dynamically allocated keys. */
 struct lock_class_key {
+   struct hlist_node   hash_entry;
struct lockdep_subclass_key subkeys[MAX_LOCKDEP_SUBCLASSES];
 };
 
@@ -274,6 +278,9 @@ extern asmlinkage void lockdep_sys_exit(void);
 extern void lockdep_off(void);
 extern void lockdep_on(void);
 
+extern void lockdep_register_key(struct lock_class_key *key);
+extern void lockdep_unregister_key(struct lock_class_key *key);
+
 /*
  * These methods are used by specific locking variants (spinlocks,
  * rwlocks, mutexes and rwsems) to pass init/acquire/release events
diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c
index a47357d3dca7..a84c9ef13afb 100644
--- a/kernel/locking/lockdep.c
+++ b/kernel/locking/lockdep.c
@@ -141,6 +141,9 @@ static DECLARE_BITMAP(list_entries_being_freed, 
MAX_LOCKDEP_ENTRIES);
  * nr_lock_classes is the number of elements of lock_classes[] that is
  * in use.
  */
+#define KEYHASH_BITS   (MAX_LOCKDEP_KEYS_BITS - 1)
+#define KEYHASH_SIZE   (1UL << KEYHASH_BITS)
+static struct hlist_head lock_keys_hash[KEYHASH_SIZE];
 unsigned long nr_lock_classes;
 #ifndef CONFIG_DEBUG_LOCKDEP
 static
@@ -610,7 +613,7 @@ static int very_verbose(struct lock_class *class)
  * Is this the address of a static object:
  */
 #ifdef __KERNEL__
-static int static_obj(void *obj)
+static int static_obj(const void *obj)
 {
unsigned long start = (unsigned long) &_stext,
  end   = (unsigned long) &_end,
@@ -912,6 +915,68 @@ static void init_data_structures(void)
}
 }
 
+static inline struct hlist_head *keyhashentry(const struct lock_class_key *key)
+{
+   unsigned long hash = hash_long((uintptr_t)key, KEYHASH_BITS);
+
+   return lock_keys_hash + hash;
+}
+
+/*
+ * Register a dynamically allocated key.
+ */
+void lockdep_register_key(struct lock_class_key *key)
+{
+   struct hlist_head *hash_head;
+   struct lock_class_key *k;
+   unsigned long flags;
+   int locked;
+
+   if (WARN_ON_ONCE(static_obj(key)))
+   return;
+   hash_head = keyhashentry(key);
+
+   raw_local_irq_save(flags);
+   locked = graph_lock();
+   hlist_for_each_entry_rcu(k, hash_head, hash_entry) {
+   if (WARN_ON_ONCE(k == key))
+   goto out_unlock;
+   }
+   hlist_add_head_rcu(>hash_entry, hash_head);
+out_unlock:
+   if (locked)
+   graph_unlock();
+   raw_local_irq_restore(flags);
+}
+EXPORT_SYMBOL_GPL(lockdep_register_key);
+
+/*
+ * Check whether a key has been registered as a dynamic key. Must not be called
+ * from interrupt context.
+ */
+static bool is_dynamic_key(const struct lock_class_key *key)
+{
+   struct hlist_head *hash_head;
+   struct lock_class_key *k;
+   bool found = false;
+
+   if (WARN_ON_ONCE(static_obj(key)))
+  

[PATCH v3 10/24] locking/lockdep: Introduce lock_class_cache_is_registered()

2018-12-06 Thread Bart Van Assche
This patch does not change any functionality but makes the
lockdep_reset_lock() function easier to read.

Cc: Peter Zijlstra 
Cc: Waiman Long 
Cc: Johannes Berg 
Signed-off-by: Bart Van Assche 
---
 kernel/locking/lockdep.c | 50 
 1 file changed, 30 insertions(+), 20 deletions(-)

diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c
index b5c8fcb6c070..81388d028ac7 100644
--- a/kernel/locking/lockdep.c
+++ b/kernel/locking/lockdep.c
@@ -4201,13 +4201,33 @@ void lockdep_free_key_range(void *start, unsigned long 
size)
 */
 }
 
-void lockdep_reset_lock(struct lockdep_map *lock)
+/*
+ * Check whether any element of the @lock->class_cache[] array refers to a
+ * registered lock class. The caller must hold either the graph lock or the
+ * RCU read lock.
+ */
+static bool lock_class_cache_is_registered(struct lockdep_map *lock)
 {
struct lock_class *class;
struct hlist_head *head;
-   unsigned long flags;
int i, j;
-   int locked;
+
+   for (i = 0; i < CLASSHASH_SIZE; i++) {
+   head = classhash_table + i;
+   hlist_for_each_entry_rcu(class, head, hash_entry) {
+   for (j = 0; j < NR_LOCKDEP_CACHING_CLASSES; j++)
+   if (lock->class_cache[j] == class)
+   return true;
+   }
+   }
+   return false;
+}
+
+void lockdep_reset_lock(struct lockdep_map *lock)
+{
+   struct lock_class *class;
+   unsigned long flags;
+   int j, locked;
 
raw_local_irq_save(flags);
 
@@ -4227,24 +4247,14 @@ void lockdep_reset_lock(struct lockdep_map *lock)
 * be gone.
 */
locked = graph_lock();
-   for (i = 0; i < CLASSHASH_SIZE; i++) {
-   head = classhash_table + i;
-   hlist_for_each_entry_rcu(class, head, hash_entry) {
-   int match = 0;
-
-   for (j = 0; j < NR_LOCKDEP_CACHING_CLASSES; j++)
-   match |= class == lock->class_cache[j];
-
-   if (unlikely(match)) {
-   if (debug_locks_off_graph_unlock()) {
-   /*
-* We all just reset everything, how 
did it match?
-*/
-   WARN_ON(1);
-   }
-   goto out_restore;
-   }
+   if (unlikely(lock_class_cache_is_registered(lock))) {
+   if (debug_locks_off_graph_unlock()) {
+   /*
+* We all just reset everything, how did it match?
+*/
+   WARN_ON(1);
}
+   goto out_restore;
}
if (locked)
graph_unlock();
-- 
2.20.0.rc2.403.gdbc3b29805-goog



[PATCH v3 08/24] locking/lockdep: Declare local symbols static

2018-12-06 Thread Bart Van Assche
This patch avoids that sparse complains about a missing declaration for
the lock_classes array when building with CONFIG_DEBUG_LOCKDEP=n.

Cc: Peter Zijlstra 
Cc: Waiman Long 
Cc: Johannes Berg 
Signed-off-by: Bart Van Assche 
---
 kernel/locking/lockdep.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c
index 1efada2dd9dd..7434a00b2b2f 100644
--- a/kernel/locking/lockdep.c
+++ b/kernel/locking/lockdep.c
@@ -138,6 +138,9 @@ static struct lock_list list_entries[MAX_LOCKDEP_ENTRIES];
  * get freed - this significantly simplifies the debugging code.
  */
 unsigned long nr_lock_classes;
+#ifndef CONFIG_DEBUG_LOCKDEP
+static
+#endif
 struct lock_class lock_classes[MAX_LOCKDEP_KEYS];
 
 static inline struct lock_class *hlock_class(struct held_lock *hlock)
-- 
2.20.0.rc2.403.gdbc3b29805-goog



[PATCH v3 21/24] locking/lockdep: Verify whether lock objects are small enough to be used as class keys

2018-12-06 Thread Bart Van Assche
Cc: Peter Zijlstra 
Cc: Waiman Long 
Cc: Johannes Berg 
Signed-off-by: Bart Van Assche 
---
 kernel/locking/lockdep.c | 9 +
 1 file changed, 9 insertions(+)

diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c
index 8c69516b1283..a47357d3dca7 100644
--- a/kernel/locking/lockdep.c
+++ b/kernel/locking/lockdep.c
@@ -727,6 +727,15 @@ static bool assign_lock_key(struct lockdep_map *lock)
 {
unsigned long can_addr, addr = (unsigned long)lock;
 
+   /*
+* lockdep_free_key_range() assumes that struct lock_class_key
+* objects do not overlap. Since we use the address of lock
+* objects as class key for static objects, check whether the
+* size of lock_class_key objects does not exceed the size of
+* the smallest lock object.
+*/
+   BUILD_BUG_ON(sizeof(struct lock_class_key) > sizeof(raw_spinlock_t));
+
if (__is_kernel_percpu_address(addr, _addr))
lock->key = (void *)can_addr;
else if (__is_module_percpu_address(addr, _addr))
-- 
2.20.0.rc2.403.gdbc3b29805-goog



[PATCH v3 22/24] locking/lockdep: Add support for dynamic keys

2018-12-06 Thread Bart Van Assche
A shortcoming of the current lockdep implementation is that it requires
lock keys to be allocated statically. That forces certain lock objects
to share lock keys. Since lock dependency analysis groups lock objects
per key sharing lock keys can cause false positive lockdep reports.
Make it possible to avoid such false positive reports by allowing lock
keys to be allocated dynamically. Require that dynamically allocated
lock keys are registered before use by calling lockdep_register_key().
Complain about attempts to register the same lock key pointer twice
without calling lockdep_unregister_key() between successive
registration calls.

The purpose of the new lock_keys_hash[] data structure that keeps
track of all dynamic keys is twofold:
- Verify whether the lockdep_register_key() and  lockdep_unregister_key()
  functions are used correctly.
- Avoid that lockdep_init_map() complains when encountering a dynamically
  allocated key.

Cc: Peter Zijlstra 
Cc: Waiman Long 
Cc: Johannes Berg 
Signed-off-by: Bart Van Assche 
---
 include/linux/lockdep.h  |  13 -
 kernel/locking/lockdep.c | 115 ---
 2 files changed, 117 insertions(+), 11 deletions(-)

diff --git a/include/linux/lockdep.h b/include/linux/lockdep.h
index 72b4d5bb409f..52853baa591d 100644
--- a/include/linux/lockdep.h
+++ b/include/linux/lockdep.h
@@ -46,15 +46,19 @@ extern int lock_stat;
 #define NR_LOCKDEP_CACHING_CLASSES 2
 
 /*
- * Lock-classes are keyed via unique addresses, by embedding the
- * lockclass-key into the kernel (or module) .data section. (For
- * static locks we use the lock address itself as the key.)
+ * A lockdep key is associated with each lock object. For static locks we use
+ * the lock address itself as the key. Dynamically allocated lock objects can
+ * have a statically or dynamically allocated key. Dynamically allocated lock
+ * keys must be registered before being used and must be unregistered before
+ * the key memory is freed.
  */
 struct lockdep_subclass_key {
char __one_byte;
 } __attribute__ ((__packed__));
 
+/* hash_entry is used to keep track of dynamically allocated keys. */
 struct lock_class_key {
+   struct hlist_node   hash_entry;
struct lockdep_subclass_key subkeys[MAX_LOCKDEP_SUBCLASSES];
 };
 
@@ -274,6 +278,9 @@ extern asmlinkage void lockdep_sys_exit(void);
 extern void lockdep_off(void);
 extern void lockdep_on(void);
 
+extern void lockdep_register_key(struct lock_class_key *key);
+extern void lockdep_unregister_key(struct lock_class_key *key);
+
 /*
  * These methods are used by specific locking variants (spinlocks,
  * rwlocks, mutexes and rwsems) to pass init/acquire/release events
diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c
index a47357d3dca7..a84c9ef13afb 100644
--- a/kernel/locking/lockdep.c
+++ b/kernel/locking/lockdep.c
@@ -141,6 +141,9 @@ static DECLARE_BITMAP(list_entries_being_freed, 
MAX_LOCKDEP_ENTRIES);
  * nr_lock_classes is the number of elements of lock_classes[] that is
  * in use.
  */
+#define KEYHASH_BITS   (MAX_LOCKDEP_KEYS_BITS - 1)
+#define KEYHASH_SIZE   (1UL << KEYHASH_BITS)
+static struct hlist_head lock_keys_hash[KEYHASH_SIZE];
 unsigned long nr_lock_classes;
 #ifndef CONFIG_DEBUG_LOCKDEP
 static
@@ -610,7 +613,7 @@ static int very_verbose(struct lock_class *class)
  * Is this the address of a static object:
  */
 #ifdef __KERNEL__
-static int static_obj(void *obj)
+static int static_obj(const void *obj)
 {
unsigned long start = (unsigned long) &_stext,
  end   = (unsigned long) &_end,
@@ -912,6 +915,68 @@ static void init_data_structures(void)
}
 }
 
+static inline struct hlist_head *keyhashentry(const struct lock_class_key *key)
+{
+   unsigned long hash = hash_long((uintptr_t)key, KEYHASH_BITS);
+
+   return lock_keys_hash + hash;
+}
+
+/*
+ * Register a dynamically allocated key.
+ */
+void lockdep_register_key(struct lock_class_key *key)
+{
+   struct hlist_head *hash_head;
+   struct lock_class_key *k;
+   unsigned long flags;
+   int locked;
+
+   if (WARN_ON_ONCE(static_obj(key)))
+   return;
+   hash_head = keyhashentry(key);
+
+   raw_local_irq_save(flags);
+   locked = graph_lock();
+   hlist_for_each_entry_rcu(k, hash_head, hash_entry) {
+   if (WARN_ON_ONCE(k == key))
+   goto out_unlock;
+   }
+   hlist_add_head_rcu(>hash_entry, hash_head);
+out_unlock:
+   if (locked)
+   graph_unlock();
+   raw_local_irq_restore(flags);
+}
+EXPORT_SYMBOL_GPL(lockdep_register_key);
+
+/*
+ * Check whether a key has been registered as a dynamic key. Must not be called
+ * from interrupt context.
+ */
+static bool is_dynamic_key(const struct lock_class_key *key)
+{
+   struct hlist_head *hash_head;
+   struct lock_class_key *k;
+   bool found = false;
+
+   if (WARN_ON_ONCE(static_obj(key)))
+  

[PATCH v3 10/24] locking/lockdep: Introduce lock_class_cache_is_registered()

2018-12-06 Thread Bart Van Assche
This patch does not change any functionality but makes the
lockdep_reset_lock() function easier to read.

Cc: Peter Zijlstra 
Cc: Waiman Long 
Cc: Johannes Berg 
Signed-off-by: Bart Van Assche 
---
 kernel/locking/lockdep.c | 50 
 1 file changed, 30 insertions(+), 20 deletions(-)

diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c
index b5c8fcb6c070..81388d028ac7 100644
--- a/kernel/locking/lockdep.c
+++ b/kernel/locking/lockdep.c
@@ -4201,13 +4201,33 @@ void lockdep_free_key_range(void *start, unsigned long 
size)
 */
 }
 
-void lockdep_reset_lock(struct lockdep_map *lock)
+/*
+ * Check whether any element of the @lock->class_cache[] array refers to a
+ * registered lock class. The caller must hold either the graph lock or the
+ * RCU read lock.
+ */
+static bool lock_class_cache_is_registered(struct lockdep_map *lock)
 {
struct lock_class *class;
struct hlist_head *head;
-   unsigned long flags;
int i, j;
-   int locked;
+
+   for (i = 0; i < CLASSHASH_SIZE; i++) {
+   head = classhash_table + i;
+   hlist_for_each_entry_rcu(class, head, hash_entry) {
+   for (j = 0; j < NR_LOCKDEP_CACHING_CLASSES; j++)
+   if (lock->class_cache[j] == class)
+   return true;
+   }
+   }
+   return false;
+}
+
+void lockdep_reset_lock(struct lockdep_map *lock)
+{
+   struct lock_class *class;
+   unsigned long flags;
+   int j, locked;
 
raw_local_irq_save(flags);
 
@@ -4227,24 +4247,14 @@ void lockdep_reset_lock(struct lockdep_map *lock)
 * be gone.
 */
locked = graph_lock();
-   for (i = 0; i < CLASSHASH_SIZE; i++) {
-   head = classhash_table + i;
-   hlist_for_each_entry_rcu(class, head, hash_entry) {
-   int match = 0;
-
-   for (j = 0; j < NR_LOCKDEP_CACHING_CLASSES; j++)
-   match |= class == lock->class_cache[j];
-
-   if (unlikely(match)) {
-   if (debug_locks_off_graph_unlock()) {
-   /*
-* We all just reset everything, how 
did it match?
-*/
-   WARN_ON(1);
-   }
-   goto out_restore;
-   }
+   if (unlikely(lock_class_cache_is_registered(lock))) {
+   if (debug_locks_off_graph_unlock()) {
+   /*
+* We all just reset everything, how did it match?
+*/
+   WARN_ON(1);
}
+   goto out_restore;
}
if (locked)
graph_unlock();
-- 
2.20.0.rc2.403.gdbc3b29805-goog



[PATCH v3 08/24] locking/lockdep: Declare local symbols static

2018-12-06 Thread Bart Van Assche
This patch avoids that sparse complains about a missing declaration for
the lock_classes array when building with CONFIG_DEBUG_LOCKDEP=n.

Cc: Peter Zijlstra 
Cc: Waiman Long 
Cc: Johannes Berg 
Signed-off-by: Bart Van Assche 
---
 kernel/locking/lockdep.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c
index 1efada2dd9dd..7434a00b2b2f 100644
--- a/kernel/locking/lockdep.c
+++ b/kernel/locking/lockdep.c
@@ -138,6 +138,9 @@ static struct lock_list list_entries[MAX_LOCKDEP_ENTRIES];
  * get freed - this significantly simplifies the debugging code.
  */
 unsigned long nr_lock_classes;
+#ifndef CONFIG_DEBUG_LOCKDEP
+static
+#endif
 struct lock_class lock_classes[MAX_LOCKDEP_KEYS];
 
 static inline struct lock_class *hlock_class(struct held_lock *hlock)
-- 
2.20.0.rc2.403.gdbc3b29805-goog



Re: [PATCH] riscv: remove unused variable in ftrace

2018-12-06 Thread Steven Rostedt
On Thu, 6 Dec 2018 11:20:31 -0800
Olof Johansson  wrote:

> On Thu, Dec 6, 2018 at 2:26 AM David Abdurachmanov
>  wrote:
> >
> > Noticed while building kernel-4.20.0-0.rc5.git2.1.fc30 for
> > Fedora 30/RISCV.
> >
> > [..]
> > BUILDSTDERR: arch/riscv/kernel/ftrace.c: In function 
> > 'prepare_ftrace_return':
> > BUILDSTDERR: arch/riscv/kernel/ftrace.c:135:6: warning: unused variable 
> > 'err' [-Wunused-variable]
> > BUILDSTDERR:   int err;
> > BUILDSTDERR:   ^~~

Bah. I could have sworn I checked for all the error messages when I did
my cross-compiling of the architectures. I fixed this issue in other
places, not sure how I missed riscv.

Thanks for fixing it.

Acked-by: Steven Rostedt (VMware) 

-- Steve

 
> > [..]
> >
> > Signed-off-by: David Abdurachmanov   
> 
> Please add a:
> Fixes: e949b6db51dc1 ("riscv/function_graph: Simplify with
> function_graph_enter()")
> Reviewed-by: Olof Johansson 



Re: [PATCH] riscv: remove unused variable in ftrace

2018-12-06 Thread Steven Rostedt
On Thu, 6 Dec 2018 11:20:31 -0800
Olof Johansson  wrote:

> On Thu, Dec 6, 2018 at 2:26 AM David Abdurachmanov
>  wrote:
> >
> > Noticed while building kernel-4.20.0-0.rc5.git2.1.fc30 for
> > Fedora 30/RISCV.
> >
> > [..]
> > BUILDSTDERR: arch/riscv/kernel/ftrace.c: In function 
> > 'prepare_ftrace_return':
> > BUILDSTDERR: arch/riscv/kernel/ftrace.c:135:6: warning: unused variable 
> > 'err' [-Wunused-variable]
> > BUILDSTDERR:   int err;
> > BUILDSTDERR:   ^~~

Bah. I could have sworn I checked for all the error messages when I did
my cross-compiling of the architectures. I fixed this issue in other
places, not sure how I missed riscv.

Thanks for fixing it.

Acked-by: Steven Rostedt (VMware) 

-- Steve

 
> > [..]
> >
> > Signed-off-by: David Abdurachmanov   
> 
> Please add a:
> Fixes: e949b6db51dc1 ("riscv/function_graph: Simplify with
> function_graph_enter()")
> Reviewed-by: Olof Johansson 



[PATCH] thermal: uniphier: Convert to SPDX identifier

2018-12-06 Thread Kunihiko Hayashi
This converts license boilerplate to SPDX identifier, and removes
unnecessary lines.

Signed-off-by: Kunihiko Hayashi 
---
 drivers/thermal/uniphier_thermal.c | 13 +
 1 file changed, 1 insertion(+), 12 deletions(-)

diff --git a/drivers/thermal/uniphier_thermal.c 
b/drivers/thermal/uniphier_thermal.c
index 55477d7..bba2284 100644
--- a/drivers/thermal/uniphier_thermal.c
+++ b/drivers/thermal/uniphier_thermal.c
@@ -1,21 +1,10 @@
+// SPDX-License-Identifier: GPL-2.0
 /**
  * uniphier_thermal.c - Socionext UniPhier thermal driver
- *
  * Copyright 2014  Panasonic Corporation
  * Copyright 2016-2017 Socionext Inc.
- * All rights reserved.
- *
  * Author:
  * Kunihiko Hayashi 
- *
- * This program is free software: you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2  of
- * the License as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
  */
 
 #include 
-- 
2.7.4



[PATCH] thermal: uniphier: Convert to SPDX identifier

2018-12-06 Thread Kunihiko Hayashi
This converts license boilerplate to SPDX identifier, and removes
unnecessary lines.

Signed-off-by: Kunihiko Hayashi 
---
 drivers/thermal/uniphier_thermal.c | 13 +
 1 file changed, 1 insertion(+), 12 deletions(-)

diff --git a/drivers/thermal/uniphier_thermal.c 
b/drivers/thermal/uniphier_thermal.c
index 55477d7..bba2284 100644
--- a/drivers/thermal/uniphier_thermal.c
+++ b/drivers/thermal/uniphier_thermal.c
@@ -1,21 +1,10 @@
+// SPDX-License-Identifier: GPL-2.0
 /**
  * uniphier_thermal.c - Socionext UniPhier thermal driver
- *
  * Copyright 2014  Panasonic Corporation
  * Copyright 2016-2017 Socionext Inc.
- * All rights reserved.
- *
  * Author:
  * Kunihiko Hayashi 
- *
- * This program is free software: you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2  of
- * the License as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
  */
 
 #include 
-- 
2.7.4



Re: [RFC v3 11/19] kunit: add Python libraries for handing KUnit config and kernel

2018-12-06 Thread Luis Chamberlain
On Thu, Dec 06, 2018 at 12:32:47PM +, Kieran Bingham wrote:
> My main initial idea for a libumlinux is to provide infrastructure such
> as our linked-lists and other kernel formatting so that we can take
> kernel code directly to userspace for test and debug (assuming that
> there are no hardware dependencies or things that we can't mock out)

The tools/ directory already does this for a tons of things. Its where
I ended up placing some API I tested a long time ago when I wanted to
test it in userspace, and provide the unit test in userspace (for my
linker table patches).

> Now we just have to start the race to see who can tweak the kernel build
> system to produce an output library first :)

Should be relatively easy if the tools directory used. Yes, there is
an inherent risk of duplication, but that was decided long ago.

  Luis


Re: [PATCH v4] signal: add taskfd_send_signal() syscall

2018-12-06 Thread Serge E. Hallyn
On Thu, Dec 06, 2018 at 04:34:54PM -0800, Daniel Colascione wrote:
> On Thu, Dec 6, 2018 at 4:31 PM Serge E. Hallyn  wrote:
> >
> > On Fri, Dec 07, 2018 at 12:17:45AM +0100, Christian Brauner wrote:
> > > On Thu, Dec 06, 2018 at 11:39:48PM +0100, Christian Brauner wrote:
> > > > On Thu, Dec 06, 2018 at 03:46:53PM -0600, Eric W. Biederman wrote:
> > > > > Christian Brauner  writes:
> > > > >
> > > > > >> Your intention is to add the thread case to support pthreads once 
> > > > > >> the
> > > > > >> process case is sorted out.  So this is something that needs to be 
> > > > > >> made
> > > > > >> clear.  Did I miss how you plan to handle threads?
> > > > > >
> > > > > > Yeah, maybe you missed it in the commit message [2] which is based 
> > > > > > on a
> > > > > > discussion with Andy [3] and Arnd [4]:
> > > > >
> > > > > Looking at your references I haven't missed it.  You are not deciding
> > > > > anything as of yet to keep it simple.  Except you are returning
> > > > > EOPNOTSUPP.  You are very much intending to do something.
> > > >
> > > > That was clear all along and was pointed at every occassion in the
> > > > threads. I even went through the hazzle to give you all of the
> > > > references when there's lore.kernel.org.
> > > >
> > > > >
> > > > > Decide.  Do you use the flags parameter or is the width of the
> > > > > target depending on the flags.
> > >
> > > Ok, let's try to be constructive. I understand the general concern for
> > > the future so let's put a contract into the commit message stating that
> > > the width of the target aka *what is signaled* will be based on a flag
> > > parameter if we ever extend it:
> > >
> > > taskfd_send_signal(fd, SIGSTOP, NULL, TASKFD_PGID);
> > > taskfd_send_signal(fd, SIGSTOP, NULL, TASKFD_TID);
> > >
> > > with the current default being
> > >
> > > taskfd_send_signal(fd, SIGSTOP, NULL, TASKFD_PID);
> > >
> > > This seems to me the cleanest solution as we only use one type of file
> > > descriptor. Can everyone be on board with this? If so I'm going to send
> > > out a new version of the patch.
> > >
> > > Christian
> >
> > I'm on board with this, but I think you need to also clarify what exactly
> > the fd stands for.  I think that (a) userspace should not have to care
> > about the struct pid implementation, and so (b) the procfd should stand
> > for all the pids.  So when taskfd_send_signal(fd, SIGSTOP, NULL, 
> > TASKFD_PGID)
> > becomes implemented, then open(/proc/5) will pin all three pids, as will
> > open(/proc/5/task/6).
> 
> This change doesn't "pin" any PID, and it makes no sense to make a
> process FD stand for all its threads. What does that even mean?

Currently the patch relies on the procfd inode saving a copy to the PIDTYPE_PID
pid.  I'm not sure offhand, can it go to the PIDTYPE_PGID from that after the
task has died, or not?   I didn't think so.  If it can then great.

The point is (a) these are details which should not have to bother userspace,
and (b) how to decide who we're sending the signal to (tid/pid/pgid) should
be specified in precisely one way.  So either a flag, or comign from the type
of fd that was opened.

-serge


Re: [PATCH v4] signal: add taskfd_send_signal() syscall

2018-12-06 Thread Serge E. Hallyn
On Thu, Dec 06, 2018 at 04:34:54PM -0800, Daniel Colascione wrote:
> On Thu, Dec 6, 2018 at 4:31 PM Serge E. Hallyn  wrote:
> >
> > On Fri, Dec 07, 2018 at 12:17:45AM +0100, Christian Brauner wrote:
> > > On Thu, Dec 06, 2018 at 11:39:48PM +0100, Christian Brauner wrote:
> > > > On Thu, Dec 06, 2018 at 03:46:53PM -0600, Eric W. Biederman wrote:
> > > > > Christian Brauner  writes:
> > > > >
> > > > > >> Your intention is to add the thread case to support pthreads once 
> > > > > >> the
> > > > > >> process case is sorted out.  So this is something that needs to be 
> > > > > >> made
> > > > > >> clear.  Did I miss how you plan to handle threads?
> > > > > >
> > > > > > Yeah, maybe you missed it in the commit message [2] which is based 
> > > > > > on a
> > > > > > discussion with Andy [3] and Arnd [4]:
> > > > >
> > > > > Looking at your references I haven't missed it.  You are not deciding
> > > > > anything as of yet to keep it simple.  Except you are returning
> > > > > EOPNOTSUPP.  You are very much intending to do something.
> > > >
> > > > That was clear all along and was pointed at every occassion in the
> > > > threads. I even went through the hazzle to give you all of the
> > > > references when there's lore.kernel.org.
> > > >
> > > > >
> > > > > Decide.  Do you use the flags parameter or is the width of the
> > > > > target depending on the flags.
> > >
> > > Ok, let's try to be constructive. I understand the general concern for
> > > the future so let's put a contract into the commit message stating that
> > > the width of the target aka *what is signaled* will be based on a flag
> > > parameter if we ever extend it:
> > >
> > > taskfd_send_signal(fd, SIGSTOP, NULL, TASKFD_PGID);
> > > taskfd_send_signal(fd, SIGSTOP, NULL, TASKFD_TID);
> > >
> > > with the current default being
> > >
> > > taskfd_send_signal(fd, SIGSTOP, NULL, TASKFD_PID);
> > >
> > > This seems to me the cleanest solution as we only use one type of file
> > > descriptor. Can everyone be on board with this? If so I'm going to send
> > > out a new version of the patch.
> > >
> > > Christian
> >
> > I'm on board with this, but I think you need to also clarify what exactly
> > the fd stands for.  I think that (a) userspace should not have to care
> > about the struct pid implementation, and so (b) the procfd should stand
> > for all the pids.  So when taskfd_send_signal(fd, SIGSTOP, NULL, 
> > TASKFD_PGID)
> > becomes implemented, then open(/proc/5) will pin all three pids, as will
> > open(/proc/5/task/6).
> 
> This change doesn't "pin" any PID, and it makes no sense to make a
> process FD stand for all its threads. What does that even mean?

Currently the patch relies on the procfd inode saving a copy to the PIDTYPE_PID
pid.  I'm not sure offhand, can it go to the PIDTYPE_PGID from that after the
task has died, or not?   I didn't think so.  If it can then great.

The point is (a) these are details which should not have to bother userspace,
and (b) how to decide who we're sending the signal to (tid/pid/pgid) should
be specified in precisely one way.  So either a flag, or comign from the type
of fd that was opened.

-serge


linux-next: manual merge of the jc_docs tree with the fscrypt tree

2018-12-06 Thread Stephen Rothwell
Hi all,

Today's linux-next merge of the jc_docs tree got a conflict in:

  Documentation/filesystems/index.rst

between commit:

  1b71a6809f96 ("fs-verity: add a documentation file")

from the fscrypt tree and commit:

  7bbfd9ad8eb2 ("Documentation: convert path-lookup from markdown to 
resturctured text")

from the jc_docs tree.

I fixed it up (see below) and can carry the fix as necessary. This
is now fixed as far as linux-next is concerned, but any non trivial
conflicts should be mentioned to your upstream maintainer when your tree
is submitted for merging.  You may also want to consider cooperating
with the maintainer of the conflicting tree to minimise any particularly
complex conflicts.

-- 
Cheers,
Stephen Rothwell

diff --cc Documentation/filesystems/index.rst
index 818390c32be6,ba921bdd5b06..
--- a/Documentation/filesystems/index.rst
+++ b/Documentation/filesystems/index.rst
@@@ -360,13 -360,13 +360,24 @@@ encryption of files and directories
  
  fscrypt
  
 +Verity API
 +==
 +
 +A library which filesystems can hook into to support transparent
 +authentication of read-only files.
 +
 +.. toctree::
 +:maxdepth: 2
 +
 +fsverity
++
+ Pathname lookup
+ ===
+ 
+ Pathname lookup in Linux is a complex beast; the document linked below
+ provides a comprehensive summary for those looking for the details.
+ 
+ .. toctree::
+:maxdepth: 2
+ 
+path-lookup.rst


pgpQNzTArkqTc.pgp
Description: OpenPGP digital signature


linux-next: manual merge of the jc_docs tree with the fscrypt tree

2018-12-06 Thread Stephen Rothwell
Hi all,

Today's linux-next merge of the jc_docs tree got a conflict in:

  Documentation/filesystems/index.rst

between commit:

  1b71a6809f96 ("fs-verity: add a documentation file")

from the fscrypt tree and commit:

  7bbfd9ad8eb2 ("Documentation: convert path-lookup from markdown to 
resturctured text")

from the jc_docs tree.

I fixed it up (see below) and can carry the fix as necessary. This
is now fixed as far as linux-next is concerned, but any non trivial
conflicts should be mentioned to your upstream maintainer when your tree
is submitted for merging.  You may also want to consider cooperating
with the maintainer of the conflicting tree to minimise any particularly
complex conflicts.

-- 
Cheers,
Stephen Rothwell

diff --cc Documentation/filesystems/index.rst
index 818390c32be6,ba921bdd5b06..
--- a/Documentation/filesystems/index.rst
+++ b/Documentation/filesystems/index.rst
@@@ -360,13 -360,13 +360,24 @@@ encryption of files and directories
  
  fscrypt
  
 +Verity API
 +==
 +
 +A library which filesystems can hook into to support transparent
 +authentication of read-only files.
 +
 +.. toctree::
 +:maxdepth: 2
 +
 +fsverity
++
+ Pathname lookup
+ ===
+ 
+ Pathname lookup in Linux is a complex beast; the document linked below
+ provides a comprehensive summary for those looking for the details.
+ 
+ .. toctree::
+:maxdepth: 2
+ 
+path-lookup.rst


pgpQNzTArkqTc.pgp
Description: OpenPGP digital signature


[PATCH v5 1/2] dt-bindings: PCI: Add UniPhier PCIe host controller description

2018-12-06 Thread Kunihiko Hayashi
Add DT bindings for PCIe controller implemented in UniPhier SoCs when
configured in Root Complex (host) mode. This controller is based on
the DesignWare PCIe core.

Signed-off-by: Kunihiko Hayashi 
Reviewed-by: Rob Herring 
---
 .../devicetree/bindings/pci/uniphier-pcie.txt  | 81 ++
 1 file changed, 81 insertions(+)
 create mode 100644 Documentation/devicetree/bindings/pci/uniphier-pcie.txt

diff --git a/Documentation/devicetree/bindings/pci/uniphier-pcie.txt 
b/Documentation/devicetree/bindings/pci/uniphier-pcie.txt
new file mode 100644
index 000..1fa2c59
--- /dev/null
+++ b/Documentation/devicetree/bindings/pci/uniphier-pcie.txt
@@ -0,0 +1,81 @@
+Socionext UniPhier PCIe host controller bindings
+
+This describes the devicetree bindings for PCIe host controller implemented
+on Socionext UniPhier SoCs.
+
+UniPhier PCIe host controller is based on the Synopsys DesignWare PCI core.
+It shares common functions with the PCIe DesignWare core driver and inherits
+common properties defined in
+Documentation/devicetree/bindings/pci/designware-pcie.txt.
+
+Required properties:
+- compatible: Should be "socionext,uniphier-pcie".
+- reg: Specifies offset and length of the register set for the device.
+   According to the reg-names, appropriate register sets are required.
+- reg-names: Must include the following entries:
+"dbi"- controller configuration registers
+"link"   - SoC-specific glue layer registers
+"config" - PCIe configuration space
+- clocks: A phandle to the clock gate for PCIe glue layer including
+   the host controller.
+- resets: A phandle to the reset line for PCIe glue layer including
+   the host controller.
+- interrupts: A list of interrupt specifiers. According to the
+   interrupt-names, appropriate interrupts are required.
+- interrupt-names: Must include the following entries:
+"dma" - DMA interrupt
+"msi" - MSI interrupt
+
+Optional properties:
+- phys: A phandle to generic PCIe PHY. According to the phy-names, appropriate
+   phys are required.
+- phy-names: Must be "pcie-phy".
+
+Required sub-node:
+- legacy-interrupt-controller: Specifies interrupt controller for legacy PCI
+   interrupts.
+
+Required properties for legacy-interrupt-controller:
+- interrupt-controller: identifies the node as an interrupt controller.
+- #interrupt-cells: specifies the number of cells needed to encode an
+   interrupt source. The value must be 1.
+- interrupt-parent: Phandle to the parent interrupt controller.
+- interrupts: An interrupt specifier for legacy interrupt.
+
+Example:
+
+   pcie: pcie@6600 {
+   compatible = "socionext,uniphier-pcie", "snps,dw-pcie";
+   status = "disabled";
+   reg-names = "dbi", "link", "config";
+   reg = <0x6600 0x1000>, <0x6601 0x1>,
+ <0x2fff 0x1>;
+   #address-cells = <3>;
+   #size-cells = <2>;
+   clocks = <_clk 24>;
+   resets = <_rst 24>;
+   num-lanes = <1>;
+   num-viewport = <1>;
+   bus-range = <0x0 0xff>;
+   device_type = "pci";
+   ranges =
+   /* downstream I/O */
+   <0x8100 0 0x  0x2ffe  0 0x0001
+   /* non-prefetchable memory */
+0x8200 0 0x  0x2000  0 0x0ffe>;
+   #interrupt-cells = <1>;
+   interrupt-names = "dma", "msi";
+   interrupts = <0 224 4>, <0 225 4>;
+   interrupt-map-mask = <0 0 0  7>;
+   interrupt-map = <0 0 0  1  _intc 0>,   /* INTA */
+   <0 0 0  2  _intc 1>,   /* INTB */
+   <0 0 0  3  _intc 2>,   /* INTC */
+   <0 0 0  4  _intc 3>;   /* INTD */
+
+   pcie_intc: legacy-interrupt-controller {
+   interrupt-controller;
+   #interrupt-cells = <1>;
+   interrupt-parent = <>;
+   interrupts = <0 226 4>;
+   };
+   };
-- 
2.7.4



[PATCH v5 2/2] PCI: uniphier: Add UniPhier PCIe host controller support

2018-12-06 Thread Kunihiko Hayashi
This introduces specific glue layer for UniPhier platform to support
PCIe host controller that is based on the DesignWare PCIe core, and
this driver supports Root Complex (host) mode.

Signed-off-by: Kunihiko Hayashi 
---
 MAINTAINERS|   7 +
 drivers/pci/controller/dwc/Kconfig |  10 +
 drivers/pci/controller/dwc/Makefile|   1 +
 drivers/pci/controller/dwc/pcie-uniphier.c | 471 +
 4 files changed, 489 insertions(+)
 create mode 100644 drivers/pci/controller/dwc/pcie-uniphier.c

diff --git a/MAINTAINERS b/MAINTAINERS
index 48a65c3..f735af2 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -11332,6 +11332,13 @@ S: Maintained
 F: Documentation/devicetree/bindings/pci/v3-v360epc-pci.txt
 F: drivers/pci/controller/pci-v3-semi.c
 
+PCIE DRIVER FOR SOCIONEXT UNIPHIER
+M: Kunihiko Hayashi 
+L: linux-...@vger.kernel.org
+S: Maintained
+F: Documentation/devicetree/bindings/pci/uniphier-pcie.txt
+F: drivers/pci/controller/dwc/pcie-uniphier.c
+
 PCIE DRIVER FOR ST SPEAR13XX
 M: Pratyush Anand 
 L: linux-...@vger.kernel.org
diff --git a/drivers/pci/controller/dwc/Kconfig 
b/drivers/pci/controller/dwc/Kconfig
index 91b0194..9550688 100644
--- a/drivers/pci/controller/dwc/Kconfig
+++ b/drivers/pci/controller/dwc/Kconfig
@@ -193,4 +193,14 @@ config PCIE_HISI_STB
help
   Say Y here if you want PCIe controller support on HiSilicon STB SoCs
 
+config PCIE_UNIPHIER
+   bool "Socionext UniPhier PCIe controllers"
+   depends on ARCH_UNIPHIER || COMPILE_TEST
+   depends on OF && HAS_IOMEM
+   depends on PCI_MSI_IRQ_DOMAIN
+   select PCIE_DW_HOST
+   help
+ Say Y here if you want PCIe controller support on UniPhier SoCs.
+ This driver supports LD20 and PXs3 SoCs.
+
 endmenu
diff --git a/drivers/pci/controller/dwc/Makefile 
b/drivers/pci/controller/dwc/Makefile
index 5d2ce72..cbde733 100644
--- a/drivers/pci/controller/dwc/Makefile
+++ b/drivers/pci/controller/dwc/Makefile
@@ -14,6 +14,7 @@ obj-$(CONFIG_PCIE_ARMADA_8K) += pcie-armada8k.o
 obj-$(CONFIG_PCIE_ARTPEC6) += pcie-artpec6.o
 obj-$(CONFIG_PCIE_KIRIN) += pcie-kirin.o
 obj-$(CONFIG_PCIE_HISI_STB) += pcie-histb.o
+obj-$(CONFIG_PCIE_UNIPHIER) += pcie-uniphier.o
 
 # The following drivers are for devices that use the generic ACPI
 # pci_root.c driver but don't support standard ECAM config access.
diff --git a/drivers/pci/controller/dwc/pcie-uniphier.c 
b/drivers/pci/controller/dwc/pcie-uniphier.c
new file mode 100644
index 000..d5dc402
--- /dev/null
+++ b/drivers/pci/controller/dwc/pcie-uniphier.c
@@ -0,0 +1,471 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * PCIe host controller driver for UniPhier SoCs
+ * Copyright 2018 Socionext Inc.
+ * Author: Kunihiko Hayashi 
+ */
+
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+
+#include "pcie-designware.h"
+
+#define PCL_PINCTRL0   0x002c
+#define PCL_PERST_PLDN_REGEN   BIT(12)
+#define PCL_PERST_NOE_REGENBIT(11)
+#define PCL_PERST_OUT_REGENBIT(8)
+#define PCL_PERST_PLDN_REGVAL  BIT(4)
+#define PCL_PERST_NOE_REGVAL   BIT(3)
+#define PCL_PERST_OUT_REGVAL   BIT(0)
+
+#define PCL_PIPEMON0x0044
+#define PCL_PCLK_ALIVE BIT(15)
+
+#define PCL_APP_READY_CTRL 0x8008
+#define PCL_APP_LTSSM_ENABLE   BIT(0)
+
+#define PCL_APP_PM00x8078
+#define PCL_SYS_AUX_PWR_DETBIT(8)
+
+#define PCL_RCV_INT0x8108
+#define PCL_RCV_INT_ALL_ENABLE GENMASK(20, 17)
+#define PCL_CFG_BW_MGT_STATUS  BIT(4)
+#define PCL_CFG_LINK_AUTO_BW_STATUSBIT(3)
+#define PCL_CFG_AER_RC_ERR_MSI_STATUS  BIT(2)
+#define PCL_CFG_PME_MSI_STATUS BIT(1)
+
+#define PCL_RCV_INTX   0x810c
+#define PCL_RCV_INTX_ALL_ENABLEGENMASK(19, 16)
+#define PCL_RCV_INTX_ALL_MASK  GENMASK(11, 8)
+#define PCL_RCV_INTX_MASK_SHIFT8
+#define PCL_RCV_INTX_ALL_STATUSGENMASK(3, 0)
+#define PCL_RCV_INTX_STATUS_SHIFT  0
+
+#define PCL_STATUS_LINK0x8140
+#define PCL_RDLH_LINK_UP   BIT(1)
+#define PCL_XMLH_LINK_UP   BIT(0)
+
+struct uniphier_pcie_priv {
+   void __iomem *base;
+   struct dw_pcie pci;
+   struct clk *clk;
+   struct reset_control *rst;
+   struct phy *phy;
+   struct irq_domain *legacy_irq_domain;
+};
+
+#define to_uniphier_pcie(x)dev_get_drvdata((x)->dev)
+
+static void uniphier_pcie_ltssm_enable(struct uniphier_pcie_priv *priv,
+  bool enable)
+{
+   u32 val;
+
+   val = readl(priv->base + PCL_APP_READY_CTRL);
+   if (enable)
+   val |= PCL_APP_LTSSM_ENABLE;
+   else
+   val &= 

[PATCH v5 1/2] dt-bindings: PCI: Add UniPhier PCIe host controller description

2018-12-06 Thread Kunihiko Hayashi
Add DT bindings for PCIe controller implemented in UniPhier SoCs when
configured in Root Complex (host) mode. This controller is based on
the DesignWare PCIe core.

Signed-off-by: Kunihiko Hayashi 
Reviewed-by: Rob Herring 
---
 .../devicetree/bindings/pci/uniphier-pcie.txt  | 81 ++
 1 file changed, 81 insertions(+)
 create mode 100644 Documentation/devicetree/bindings/pci/uniphier-pcie.txt

diff --git a/Documentation/devicetree/bindings/pci/uniphier-pcie.txt 
b/Documentation/devicetree/bindings/pci/uniphier-pcie.txt
new file mode 100644
index 000..1fa2c59
--- /dev/null
+++ b/Documentation/devicetree/bindings/pci/uniphier-pcie.txt
@@ -0,0 +1,81 @@
+Socionext UniPhier PCIe host controller bindings
+
+This describes the devicetree bindings for PCIe host controller implemented
+on Socionext UniPhier SoCs.
+
+UniPhier PCIe host controller is based on the Synopsys DesignWare PCI core.
+It shares common functions with the PCIe DesignWare core driver and inherits
+common properties defined in
+Documentation/devicetree/bindings/pci/designware-pcie.txt.
+
+Required properties:
+- compatible: Should be "socionext,uniphier-pcie".
+- reg: Specifies offset and length of the register set for the device.
+   According to the reg-names, appropriate register sets are required.
+- reg-names: Must include the following entries:
+"dbi"- controller configuration registers
+"link"   - SoC-specific glue layer registers
+"config" - PCIe configuration space
+- clocks: A phandle to the clock gate for PCIe glue layer including
+   the host controller.
+- resets: A phandle to the reset line for PCIe glue layer including
+   the host controller.
+- interrupts: A list of interrupt specifiers. According to the
+   interrupt-names, appropriate interrupts are required.
+- interrupt-names: Must include the following entries:
+"dma" - DMA interrupt
+"msi" - MSI interrupt
+
+Optional properties:
+- phys: A phandle to generic PCIe PHY. According to the phy-names, appropriate
+   phys are required.
+- phy-names: Must be "pcie-phy".
+
+Required sub-node:
+- legacy-interrupt-controller: Specifies interrupt controller for legacy PCI
+   interrupts.
+
+Required properties for legacy-interrupt-controller:
+- interrupt-controller: identifies the node as an interrupt controller.
+- #interrupt-cells: specifies the number of cells needed to encode an
+   interrupt source. The value must be 1.
+- interrupt-parent: Phandle to the parent interrupt controller.
+- interrupts: An interrupt specifier for legacy interrupt.
+
+Example:
+
+   pcie: pcie@6600 {
+   compatible = "socionext,uniphier-pcie", "snps,dw-pcie";
+   status = "disabled";
+   reg-names = "dbi", "link", "config";
+   reg = <0x6600 0x1000>, <0x6601 0x1>,
+ <0x2fff 0x1>;
+   #address-cells = <3>;
+   #size-cells = <2>;
+   clocks = <_clk 24>;
+   resets = <_rst 24>;
+   num-lanes = <1>;
+   num-viewport = <1>;
+   bus-range = <0x0 0xff>;
+   device_type = "pci";
+   ranges =
+   /* downstream I/O */
+   <0x8100 0 0x  0x2ffe  0 0x0001
+   /* non-prefetchable memory */
+0x8200 0 0x  0x2000  0 0x0ffe>;
+   #interrupt-cells = <1>;
+   interrupt-names = "dma", "msi";
+   interrupts = <0 224 4>, <0 225 4>;
+   interrupt-map-mask = <0 0 0  7>;
+   interrupt-map = <0 0 0  1  _intc 0>,   /* INTA */
+   <0 0 0  2  _intc 1>,   /* INTB */
+   <0 0 0  3  _intc 2>,   /* INTC */
+   <0 0 0  4  _intc 3>;   /* INTD */
+
+   pcie_intc: legacy-interrupt-controller {
+   interrupt-controller;
+   #interrupt-cells = <1>;
+   interrupt-parent = <>;
+   interrupts = <0 226 4>;
+   };
+   };
-- 
2.7.4



[PATCH v5 2/2] PCI: uniphier: Add UniPhier PCIe host controller support

2018-12-06 Thread Kunihiko Hayashi
This introduces specific glue layer for UniPhier platform to support
PCIe host controller that is based on the DesignWare PCIe core, and
this driver supports Root Complex (host) mode.

Signed-off-by: Kunihiko Hayashi 
---
 MAINTAINERS|   7 +
 drivers/pci/controller/dwc/Kconfig |  10 +
 drivers/pci/controller/dwc/Makefile|   1 +
 drivers/pci/controller/dwc/pcie-uniphier.c | 471 +
 4 files changed, 489 insertions(+)
 create mode 100644 drivers/pci/controller/dwc/pcie-uniphier.c

diff --git a/MAINTAINERS b/MAINTAINERS
index 48a65c3..f735af2 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -11332,6 +11332,13 @@ S: Maintained
 F: Documentation/devicetree/bindings/pci/v3-v360epc-pci.txt
 F: drivers/pci/controller/pci-v3-semi.c
 
+PCIE DRIVER FOR SOCIONEXT UNIPHIER
+M: Kunihiko Hayashi 
+L: linux-...@vger.kernel.org
+S: Maintained
+F: Documentation/devicetree/bindings/pci/uniphier-pcie.txt
+F: drivers/pci/controller/dwc/pcie-uniphier.c
+
 PCIE DRIVER FOR ST SPEAR13XX
 M: Pratyush Anand 
 L: linux-...@vger.kernel.org
diff --git a/drivers/pci/controller/dwc/Kconfig 
b/drivers/pci/controller/dwc/Kconfig
index 91b0194..9550688 100644
--- a/drivers/pci/controller/dwc/Kconfig
+++ b/drivers/pci/controller/dwc/Kconfig
@@ -193,4 +193,14 @@ config PCIE_HISI_STB
help
   Say Y here if you want PCIe controller support on HiSilicon STB SoCs
 
+config PCIE_UNIPHIER
+   bool "Socionext UniPhier PCIe controllers"
+   depends on ARCH_UNIPHIER || COMPILE_TEST
+   depends on OF && HAS_IOMEM
+   depends on PCI_MSI_IRQ_DOMAIN
+   select PCIE_DW_HOST
+   help
+ Say Y here if you want PCIe controller support on UniPhier SoCs.
+ This driver supports LD20 and PXs3 SoCs.
+
 endmenu
diff --git a/drivers/pci/controller/dwc/Makefile 
b/drivers/pci/controller/dwc/Makefile
index 5d2ce72..cbde733 100644
--- a/drivers/pci/controller/dwc/Makefile
+++ b/drivers/pci/controller/dwc/Makefile
@@ -14,6 +14,7 @@ obj-$(CONFIG_PCIE_ARMADA_8K) += pcie-armada8k.o
 obj-$(CONFIG_PCIE_ARTPEC6) += pcie-artpec6.o
 obj-$(CONFIG_PCIE_KIRIN) += pcie-kirin.o
 obj-$(CONFIG_PCIE_HISI_STB) += pcie-histb.o
+obj-$(CONFIG_PCIE_UNIPHIER) += pcie-uniphier.o
 
 # The following drivers are for devices that use the generic ACPI
 # pci_root.c driver but don't support standard ECAM config access.
diff --git a/drivers/pci/controller/dwc/pcie-uniphier.c 
b/drivers/pci/controller/dwc/pcie-uniphier.c
new file mode 100644
index 000..d5dc402
--- /dev/null
+++ b/drivers/pci/controller/dwc/pcie-uniphier.c
@@ -0,0 +1,471 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * PCIe host controller driver for UniPhier SoCs
+ * Copyright 2018 Socionext Inc.
+ * Author: Kunihiko Hayashi 
+ */
+
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+
+#include "pcie-designware.h"
+
+#define PCL_PINCTRL0   0x002c
+#define PCL_PERST_PLDN_REGEN   BIT(12)
+#define PCL_PERST_NOE_REGENBIT(11)
+#define PCL_PERST_OUT_REGENBIT(8)
+#define PCL_PERST_PLDN_REGVAL  BIT(4)
+#define PCL_PERST_NOE_REGVAL   BIT(3)
+#define PCL_PERST_OUT_REGVAL   BIT(0)
+
+#define PCL_PIPEMON0x0044
+#define PCL_PCLK_ALIVE BIT(15)
+
+#define PCL_APP_READY_CTRL 0x8008
+#define PCL_APP_LTSSM_ENABLE   BIT(0)
+
+#define PCL_APP_PM00x8078
+#define PCL_SYS_AUX_PWR_DETBIT(8)
+
+#define PCL_RCV_INT0x8108
+#define PCL_RCV_INT_ALL_ENABLE GENMASK(20, 17)
+#define PCL_CFG_BW_MGT_STATUS  BIT(4)
+#define PCL_CFG_LINK_AUTO_BW_STATUSBIT(3)
+#define PCL_CFG_AER_RC_ERR_MSI_STATUS  BIT(2)
+#define PCL_CFG_PME_MSI_STATUS BIT(1)
+
+#define PCL_RCV_INTX   0x810c
+#define PCL_RCV_INTX_ALL_ENABLEGENMASK(19, 16)
+#define PCL_RCV_INTX_ALL_MASK  GENMASK(11, 8)
+#define PCL_RCV_INTX_MASK_SHIFT8
+#define PCL_RCV_INTX_ALL_STATUSGENMASK(3, 0)
+#define PCL_RCV_INTX_STATUS_SHIFT  0
+
+#define PCL_STATUS_LINK0x8140
+#define PCL_RDLH_LINK_UP   BIT(1)
+#define PCL_XMLH_LINK_UP   BIT(0)
+
+struct uniphier_pcie_priv {
+   void __iomem *base;
+   struct dw_pcie pci;
+   struct clk *clk;
+   struct reset_control *rst;
+   struct phy *phy;
+   struct irq_domain *legacy_irq_domain;
+};
+
+#define to_uniphier_pcie(x)dev_get_drvdata((x)->dev)
+
+static void uniphier_pcie_ltssm_enable(struct uniphier_pcie_priv *priv,
+  bool enable)
+{
+   u32 val;
+
+   val = readl(priv->base + PCL_APP_READY_CTRL);
+   if (enable)
+   val |= PCL_APP_LTSSM_ENABLE;
+   else
+   val &= 

[PATCH v5 0/2] Add new UniPhier PCIe host driver

2018-12-06 Thread Kunihiko Hayashi
This series adds PCIe host controller driver for Socionext UniPhier SoCs.
This controller is based on the DesignWare PCIe core. This driver
supports LD20 and PXs3 SoCs.

v4: https://www.spinics.net/lists/linux-pci/msg78278.html

About legacy IRQ, it might be necessary to share common view from
keystone driver that have been cleaned up[1].

[1] https://lore.kernel.org/patchwork/patch/989541/

Changes since v4:
- fix an issue of using the wrong value to mask IRQ mask register

Changes since v3:
- dt-bindings: fix INTX numbering of legacy interrupt map
- change interrupts to level ones
- remove .xlate function
- merge uniphier_pcie_ltssm_disable() into uniphier_pcie_ltssm_enable()
- remove an error message on uniphier_pcie_establish_link()
- change the order between irq_domain_add_liniear() and
  irq_set_chained_handler_and_data()
- replace dummy_irq_chip with uniphier_pcie_irq_chip and its functions
- add dependency on CONFIG_HAS_IOMEM
- MAINTAINERS: add pcie-uniphier entry

Changes since v2:
- dt-bindings: remove a comment that the node name isn't important
- dt-bindings: remove "intx" interrupt
- dt-bindings: define 'legacy-interrupt-controller' node and its properties
- return an error value when link up fails
- remove devm_request_irq() and a handler for MSI IRQ
- use chained interrupt instead of devm_request_irq() for legacy IRQ
- add unipher_pcie_config_legacy_irq() to get legacy IRQ from
  'legacy-interrupt controller' node
- replace 4 statments to handle INTX with for_each_set_bit()
- remove initialization of pp->root_bus_nr
- remove indivisual interrupt enable bit definitions
- rename 'irq_domain' member to 'legacy_irq_domain' in private structure
- use pci_irqd_intx_xlate() for irq_domain_ops.xlate function

Changes since v1:
- follow capitalization conventions in the descriptions
- use C style comments except for the SPDX line

Kunihiko Hayashi (2):
  dt-bindings: PCI: Add UniPhier PCIe host controller description
  PCI: uniphier: Add UniPhier PCIe host controller support

 .../devicetree/bindings/pci/uniphier-pcie.txt  |  81 
 MAINTAINERS|   7 +
 drivers/pci/controller/dwc/Kconfig |  10 +
 drivers/pci/controller/dwc/Makefile|   1 +
 drivers/pci/controller/dwc/pcie-uniphier.c | 471 +
 5 files changed, 570 insertions(+)
 create mode 100644 Documentation/devicetree/bindings/pci/uniphier-pcie.txt
 create mode 100644 drivers/pci/controller/dwc/pcie-uniphier.c

-- 
2.7.4



[PATCH v5 0/2] Add new UniPhier PCIe host driver

2018-12-06 Thread Kunihiko Hayashi
This series adds PCIe host controller driver for Socionext UniPhier SoCs.
This controller is based on the DesignWare PCIe core. This driver
supports LD20 and PXs3 SoCs.

v4: https://www.spinics.net/lists/linux-pci/msg78278.html

About legacy IRQ, it might be necessary to share common view from
keystone driver that have been cleaned up[1].

[1] https://lore.kernel.org/patchwork/patch/989541/

Changes since v4:
- fix an issue of using the wrong value to mask IRQ mask register

Changes since v3:
- dt-bindings: fix INTX numbering of legacy interrupt map
- change interrupts to level ones
- remove .xlate function
- merge uniphier_pcie_ltssm_disable() into uniphier_pcie_ltssm_enable()
- remove an error message on uniphier_pcie_establish_link()
- change the order between irq_domain_add_liniear() and
  irq_set_chained_handler_and_data()
- replace dummy_irq_chip with uniphier_pcie_irq_chip and its functions
- add dependency on CONFIG_HAS_IOMEM
- MAINTAINERS: add pcie-uniphier entry

Changes since v2:
- dt-bindings: remove a comment that the node name isn't important
- dt-bindings: remove "intx" interrupt
- dt-bindings: define 'legacy-interrupt-controller' node and its properties
- return an error value when link up fails
- remove devm_request_irq() and a handler for MSI IRQ
- use chained interrupt instead of devm_request_irq() for legacy IRQ
- add unipher_pcie_config_legacy_irq() to get legacy IRQ from
  'legacy-interrupt controller' node
- replace 4 statments to handle INTX with for_each_set_bit()
- remove initialization of pp->root_bus_nr
- remove indivisual interrupt enable bit definitions
- rename 'irq_domain' member to 'legacy_irq_domain' in private structure
- use pci_irqd_intx_xlate() for irq_domain_ops.xlate function

Changes since v1:
- follow capitalization conventions in the descriptions
- use C style comments except for the SPDX line

Kunihiko Hayashi (2):
  dt-bindings: PCI: Add UniPhier PCIe host controller description
  PCI: uniphier: Add UniPhier PCIe host controller support

 .../devicetree/bindings/pci/uniphier-pcie.txt  |  81 
 MAINTAINERS|   7 +
 drivers/pci/controller/dwc/Kconfig |  10 +
 drivers/pci/controller/dwc/Makefile|   1 +
 drivers/pci/controller/dwc/pcie-uniphier.c | 471 +
 5 files changed, 570 insertions(+)
 create mode 100644 Documentation/devicetree/bindings/pci/uniphier-pcie.txt
 create mode 100644 drivers/pci/controller/dwc/pcie-uniphier.c

-- 
2.7.4



[PATCH v2] tps65218: Use devm_regmap_add_irq_chip and clean up error path in probe

2018-12-06 Thread Keerthy
Use devm_regmap_add_irq_chip and clean up error path in probe.
Hence clean up the probe error path and the remove function.

Reported-by: Christian Hohnstaedt 
Signed-off-by: Keerthy 
---

Changes in v2:

  * Cleaned up remove function as well.

 drivers/mfd/tps65218.c | 18 +++---
 1 file changed, 3 insertions(+), 15 deletions(-)

diff --git a/drivers/mfd/tps65218.c b/drivers/mfd/tps65218.c
index 910f569..d2d4e3a 100644
--- a/drivers/mfd/tps65218.c
+++ b/drivers/mfd/tps65218.c
@@ -235,9 +235,9 @@ static int tps65218_probe(struct i2c_client *client,
 
mutex_init(>tps_lock);
 
-   ret = regmap_add_irq_chip(tps->regmap, tps->irq,
-   IRQF_ONESHOT, 0, _irq_chip,
-   >irq_data);
+   ret = devm_regmap_add_irq_chip(>dev, tps->regmap, tps->irq,
+  IRQF_ONESHOT, 0, _irq_chip,
+  >irq_data);
if (ret < 0)
return ret;
 
@@ -253,23 +253,11 @@ static int tps65218_probe(struct i2c_client *client,
  ARRAY_SIZE(tps65218_cells), NULL, 0,
  regmap_irq_get_domain(tps->irq_data));
 
-   if (ret < 0)
-   goto err_irq;
-
-   return 0;
-
-err_irq:
-   regmap_del_irq_chip(tps->irq, tps->irq_data);
-
return ret;
 }
 
 static int tps65218_remove(struct i2c_client *client)
 {
-   struct tps65218 *tps = i2c_get_clientdata(client);
-
-   regmap_del_irq_chip(tps->irq, tps->irq_data);
-
return 0;
 }
 
-- 
1.9.1



[PATCH v2] tps65218: Use devm_regmap_add_irq_chip and clean up error path in probe

2018-12-06 Thread Keerthy
Use devm_regmap_add_irq_chip and clean up error path in probe.
Hence clean up the probe error path and the remove function.

Reported-by: Christian Hohnstaedt 
Signed-off-by: Keerthy 
---

Changes in v2:

  * Cleaned up remove function as well.

 drivers/mfd/tps65218.c | 18 +++---
 1 file changed, 3 insertions(+), 15 deletions(-)

diff --git a/drivers/mfd/tps65218.c b/drivers/mfd/tps65218.c
index 910f569..d2d4e3a 100644
--- a/drivers/mfd/tps65218.c
+++ b/drivers/mfd/tps65218.c
@@ -235,9 +235,9 @@ static int tps65218_probe(struct i2c_client *client,
 
mutex_init(>tps_lock);
 
-   ret = regmap_add_irq_chip(tps->regmap, tps->irq,
-   IRQF_ONESHOT, 0, _irq_chip,
-   >irq_data);
+   ret = devm_regmap_add_irq_chip(>dev, tps->regmap, tps->irq,
+  IRQF_ONESHOT, 0, _irq_chip,
+  >irq_data);
if (ret < 0)
return ret;
 
@@ -253,23 +253,11 @@ static int tps65218_probe(struct i2c_client *client,
  ARRAY_SIZE(tps65218_cells), NULL, 0,
  regmap_irq_get_domain(tps->irq_data));
 
-   if (ret < 0)
-   goto err_irq;
-
-   return 0;
-
-err_irq:
-   regmap_del_irq_chip(tps->irq, tps->irq_data);
-
return ret;
 }
 
 static int tps65218_remove(struct i2c_client *client)
 {
-   struct tps65218 *tps = i2c_get_clientdata(client);
-
-   regmap_del_irq_chip(tps->irq, tps->irq_data);
-
return 0;
 }
 
-- 
1.9.1



Re: [PATCH] dt-bindings: sifive: describe sifive-blocks versioning

2018-12-06 Thread Paul Walmsley



On Thu, 6 Dec 2018, Rob Herring wrote:

> On Wed, Nov 21, 2018 at 05:06:56PM -0800, Paul Walmsley wrote:
> > 
> > For IP blocks that are generated from the public, open-source
> > sifive-blocks repository, describe the version numbering policy
> > that its maintainers intend to use, upon request from Rob
> > Herring .
> > 
> > Cc: Rob Herring 
> > Cc: Palmer Dabbelt 
> > Cc: Megan Wachs 
> > Cc: Wesley Terpstra 
> > Cc: Mark Rutland 
> > Cc: devicet...@vger.kernel.org
> > Cc: linux-ri...@lists.infradead.org
> > Cc: linux-kernel@vger.kernel.org
> > Signed-off-by: Paul Walmsley 
> > Signed-off-by: Paul Walmsley 
> > ---
> > 
> > Hi Rob, please let me know if this document works with your
> > requirements, or if some changes are needed.  - Paul
> 
> Looks pretty good to me.
> 
> >  .../sifive/sifive-blocks-ip-versioning.txt| 38 +++
> >  1 file changed, 38 insertions(+)
> >  create mode 100644 
> > Documentation/devicetree/bindings/sifive/sifive-blocks-ip-versioning.txt
> 
> Use the path that was suggested.

Could you remind me which one that is?

> > +
> > +An example is "sifive,uart0" from:
> > +
> > +https://github.com/sifive/sifive-blocks/blob/master/src/main/scala/devices/uart/UART.scala#L43
> 
> That's nice, but will be out of date as soon as someone adds or removes 
> a line above it. Can you point to a tagged version?

Will do

- Paul


Re: [PATCH] dt-bindings: sifive: describe sifive-blocks versioning

2018-12-06 Thread Paul Walmsley



On Thu, 6 Dec 2018, Rob Herring wrote:

> On Wed, Nov 21, 2018 at 05:06:56PM -0800, Paul Walmsley wrote:
> > 
> > For IP blocks that are generated from the public, open-source
> > sifive-blocks repository, describe the version numbering policy
> > that its maintainers intend to use, upon request from Rob
> > Herring .
> > 
> > Cc: Rob Herring 
> > Cc: Palmer Dabbelt 
> > Cc: Megan Wachs 
> > Cc: Wesley Terpstra 
> > Cc: Mark Rutland 
> > Cc: devicet...@vger.kernel.org
> > Cc: linux-ri...@lists.infradead.org
> > Cc: linux-kernel@vger.kernel.org
> > Signed-off-by: Paul Walmsley 
> > Signed-off-by: Paul Walmsley 
> > ---
> > 
> > Hi Rob, please let me know if this document works with your
> > requirements, or if some changes are needed.  - Paul
> 
> Looks pretty good to me.
> 
> >  .../sifive/sifive-blocks-ip-versioning.txt| 38 +++
> >  1 file changed, 38 insertions(+)
> >  create mode 100644 
> > Documentation/devicetree/bindings/sifive/sifive-blocks-ip-versioning.txt
> 
> Use the path that was suggested.

Could you remind me which one that is?

> > +
> > +An example is "sifive,uart0" from:
> > +
> > +https://github.com/sifive/sifive-blocks/blob/master/src/main/scala/devices/uart/UART.scala#L43
> 
> That's nice, but will be out of date as soon as someone adds or removes 
> a line above it. Can you point to a tagged version?

Will do

- Paul


Re: [RFC v3 17/19] of: unittest: migrate tests to run on KUnit

2018-12-06 Thread Rob Herring
On Wed, Dec 5, 2018 at 5:43 PM Brendan Higgins
 wrote:
>
> On Tue, Dec 4, 2018 at 5:41 AM Rob Herring  wrote:
> >
> > On Mon, Dec 3, 2018 at 6:14 PM Brendan Higgins
> >  wrote:
> > >
> > > On Thu, Nov 29, 2018 at 4:40 PM Randy Dunlap  
> > > wrote:
> > > >
> > > > On 11/28/18 12:56 PM, Rob Herring wrote:
> > > > >> diff --git a/drivers/of/Kconfig b/drivers/of/Kconfig
> > > > >> index ad3fcad4d75b8..f309399deac20 100644
> > > > >> --- a/drivers/of/Kconfig
> > > > >> +++ b/drivers/of/Kconfig
> > > > >> @@ -15,6 +15,7 @@ if OF
> > > > >>  config OF_UNITTEST
> > > > >> bool "Device Tree runtime unit tests"
> > > > >> depends on !SPARC
> > > > >> +   depends on KUNIT
> > > > > Unless KUNIT has depends, better to be a select here.
> > > >
> > > > That's just style or taste.  I would prefer to use depends
> > > > instead of select, but that's also just my preference.
> > >
> > > I prefer depends too, but Rob is the maintainer here.
> >
> > Well, we should be consistent, not the follow the whims of each maintainer.
>
> Sorry, I don't think that came out the way I meant it. I don't really
> think we are consistent on this point across the kernel, and I don't
> feel very strongly about the point, so I was just looking to follow
> the path of least resistance. (I also just assumed Rob would keep us
> consistent within drivers/of/.)

I meant across unittests, we should be consistent. All unittests do
either "depends on KUNIT" or "select KUNIT". The question I would ask
is does KUNIT need to be user visible or is useful to enable without
any unittests enabled? With depends, a user has 2 options to go enable
vs. 1 with select.

But if you want a global kill switch to turn off all unittests, then
depends works better.

> I figure if we are running unit tests from the test runner script or
> from an automated system, you won't be hunting for dependencies for a
> single test every time you want to run a test, so select doesn't make
> it easier to configure in most imagined use cases.
>
> KUNIT hypothetically should not depend on anything, so select should
> be safe to use.
>
> On the other hand, if we end up being wrong on this point and KUnit
> gains widespread adoption, I would prefer not to be in a position
> where I have to change a bunch of configs all over the kernel because
> this example got copied and pasted.

You'll be so happy that 100s of tests have been created using kunit,
it won't be a big deal. :)

In any case, I wouldn't spend more time on this.

Rob


Re: [PATCH v4] signal: add taskfd_send_signal() syscall

2018-12-06 Thread Daniel Colascione
On Thu, Dec 6, 2018 at 4:31 PM Serge E. Hallyn  wrote:
>
> On Fri, Dec 07, 2018 at 12:17:45AM +0100, Christian Brauner wrote:
> > On Thu, Dec 06, 2018 at 11:39:48PM +0100, Christian Brauner wrote:
> > > On Thu, Dec 06, 2018 at 03:46:53PM -0600, Eric W. Biederman wrote:
> > > > Christian Brauner  writes:
> > > >
> > > > >> Your intention is to add the thread case to support pthreads once the
> > > > >> process case is sorted out.  So this is something that needs to be 
> > > > >> made
> > > > >> clear.  Did I miss how you plan to handle threads?
> > > > >
> > > > > Yeah, maybe you missed it in the commit message [2] which is based on 
> > > > > a
> > > > > discussion with Andy [3] and Arnd [4]:
> > > >
> > > > Looking at your references I haven't missed it.  You are not deciding
> > > > anything as of yet to keep it simple.  Except you are returning
> > > > EOPNOTSUPP.  You are very much intending to do something.
> > >
> > > That was clear all along and was pointed at every occassion in the
> > > threads. I even went through the hazzle to give you all of the
> > > references when there's lore.kernel.org.
> > >
> > > >
> > > > Decide.  Do you use the flags parameter or is the width of the
> > > > target depending on the flags.
> >
> > Ok, let's try to be constructive. I understand the general concern for
> > the future so let's put a contract into the commit message stating that
> > the width of the target aka *what is signaled* will be based on a flag
> > parameter if we ever extend it:
> >
> > taskfd_send_signal(fd, SIGSTOP, NULL, TASKFD_PGID);
> > taskfd_send_signal(fd, SIGSTOP, NULL, TASKFD_TID);
> >
> > with the current default being
> >
> > taskfd_send_signal(fd, SIGSTOP, NULL, TASKFD_PID);
> >
> > This seems to me the cleanest solution as we only use one type of file
> > descriptor. Can everyone be on board with this? If so I'm going to send
> > out a new version of the patch.
> >
> > Christian
>
> I'm on board with this, but I think you need to also clarify what exactly
> the fd stands for.  I think that (a) userspace should not have to care
> about the struct pid implementation, and so (b) the procfd should stand
> for all the pids.  So when taskfd_send_signal(fd, SIGSTOP, NULL, TASKFD_PGID)
> becomes implemented, then open(/proc/5) will pin all three pids, as will
> open(/proc/5/task/6).

This change doesn't "pin" any PID, and it makes no sense to make a
process FD stand for all its threads. What does that even mean?


Re: [PATCH v4] signal: add taskfd_send_signal() syscall

2018-12-06 Thread Daniel Colascione
On Thu, Dec 6, 2018 at 4:31 PM Serge E. Hallyn  wrote:
>
> On Fri, Dec 07, 2018 at 12:17:45AM +0100, Christian Brauner wrote:
> > On Thu, Dec 06, 2018 at 11:39:48PM +0100, Christian Brauner wrote:
> > > On Thu, Dec 06, 2018 at 03:46:53PM -0600, Eric W. Biederman wrote:
> > > > Christian Brauner  writes:
> > > >
> > > > >> Your intention is to add the thread case to support pthreads once the
> > > > >> process case is sorted out.  So this is something that needs to be 
> > > > >> made
> > > > >> clear.  Did I miss how you plan to handle threads?
> > > > >
> > > > > Yeah, maybe you missed it in the commit message [2] which is based on 
> > > > > a
> > > > > discussion with Andy [3] and Arnd [4]:
> > > >
> > > > Looking at your references I haven't missed it.  You are not deciding
> > > > anything as of yet to keep it simple.  Except you are returning
> > > > EOPNOTSUPP.  You are very much intending to do something.
> > >
> > > That was clear all along and was pointed at every occassion in the
> > > threads. I even went through the hazzle to give you all of the
> > > references when there's lore.kernel.org.
> > >
> > > >
> > > > Decide.  Do you use the flags parameter or is the width of the
> > > > target depending on the flags.
> >
> > Ok, let's try to be constructive. I understand the general concern for
> > the future so let's put a contract into the commit message stating that
> > the width of the target aka *what is signaled* will be based on a flag
> > parameter if we ever extend it:
> >
> > taskfd_send_signal(fd, SIGSTOP, NULL, TASKFD_PGID);
> > taskfd_send_signal(fd, SIGSTOP, NULL, TASKFD_TID);
> >
> > with the current default being
> >
> > taskfd_send_signal(fd, SIGSTOP, NULL, TASKFD_PID);
> >
> > This seems to me the cleanest solution as we only use one type of file
> > descriptor. Can everyone be on board with this? If so I'm going to send
> > out a new version of the patch.
> >
> > Christian
>
> I'm on board with this, but I think you need to also clarify what exactly
> the fd stands for.  I think that (a) userspace should not have to care
> about the struct pid implementation, and so (b) the procfd should stand
> for all the pids.  So when taskfd_send_signal(fd, SIGSTOP, NULL, TASKFD_PGID)
> becomes implemented, then open(/proc/5) will pin all three pids, as will
> open(/proc/5/task/6).

This change doesn't "pin" any PID, and it makes no sense to make a
process FD stand for all its threads. What does that even mean?


Re: [PATCH v4] signal: add taskfd_send_signal() syscall

2018-12-06 Thread Serge E. Hallyn
On Fri, Dec 07, 2018 at 12:17:45AM +0100, Christian Brauner wrote:
> On Thu, Dec 06, 2018 at 11:39:48PM +0100, Christian Brauner wrote:
> > On Thu, Dec 06, 2018 at 03:46:53PM -0600, Eric W. Biederman wrote:
> > > Christian Brauner  writes:
> > > 
> > > >> Your intention is to add the thread case to support pthreads once the
> > > >> process case is sorted out.  So this is something that needs to be made
> > > >> clear.  Did I miss how you plan to handle threads?
> > > >
> > > > Yeah, maybe you missed it in the commit message [2] which is based on a
> > > > discussion with Andy [3] and Arnd [4]:
> > > 
> > > Looking at your references I haven't missed it.  You are not deciding
> > > anything as of yet to keep it simple.  Except you are returning
> > > EOPNOTSUPP.  You are very much intending to do something.
> > 
> > That was clear all along and was pointed at every occassion in the
> > threads. I even went through the hazzle to give you all of the
> > references when there's lore.kernel.org.
> > 
> > > 
> > > Decide.  Do you use the flags parameter or is the width of the
> > > target depending on the flags.
> 
> Ok, let's try to be constructive. I understand the general concern for
> the future so let's put a contract into the commit message stating that
> the width of the target aka *what is signaled* will be based on a flag
> parameter if we ever extend it:
> 
> taskfd_send_signal(fd, SIGSTOP, NULL, TASKFD_PGID);
> taskfd_send_signal(fd, SIGSTOP, NULL, TASKFD_TID);
> 
> with the current default being
> 
> taskfd_send_signal(fd, SIGSTOP, NULL, TASKFD_PID);
> 
> This seems to me the cleanest solution as we only use one type of file
> descriptor. Can everyone be on board with this? If so I'm going to send
> out a new version of the patch.
> 
> Christian

I'm on board with this, but I think you need to also clarify what exactly
the fd stands for.  I think that (a) userspace should not have to care
about the struct pid implementation, and so (b) the procfd should stand
for all the pids.  So when taskfd_send_signal(fd, SIGSTOP, NULL, TASKFD_PGID)
becomes implemented, then open(/proc/5) will pin all three pids, as will
open(/proc/5/task/6).

-serge


Re: [PATCH v4] signal: add taskfd_send_signal() syscall

2018-12-06 Thread Serge E. Hallyn
On Fri, Dec 07, 2018 at 12:17:45AM +0100, Christian Brauner wrote:
> On Thu, Dec 06, 2018 at 11:39:48PM +0100, Christian Brauner wrote:
> > On Thu, Dec 06, 2018 at 03:46:53PM -0600, Eric W. Biederman wrote:
> > > Christian Brauner  writes:
> > > 
> > > >> Your intention is to add the thread case to support pthreads once the
> > > >> process case is sorted out.  So this is something that needs to be made
> > > >> clear.  Did I miss how you plan to handle threads?
> > > >
> > > > Yeah, maybe you missed it in the commit message [2] which is based on a
> > > > discussion with Andy [3] and Arnd [4]:
> > > 
> > > Looking at your references I haven't missed it.  You are not deciding
> > > anything as of yet to keep it simple.  Except you are returning
> > > EOPNOTSUPP.  You are very much intending to do something.
> > 
> > That was clear all along and was pointed at every occassion in the
> > threads. I even went through the hazzle to give you all of the
> > references when there's lore.kernel.org.
> > 
> > > 
> > > Decide.  Do you use the flags parameter or is the width of the
> > > target depending on the flags.
> 
> Ok, let's try to be constructive. I understand the general concern for
> the future so let's put a contract into the commit message stating that
> the width of the target aka *what is signaled* will be based on a flag
> parameter if we ever extend it:
> 
> taskfd_send_signal(fd, SIGSTOP, NULL, TASKFD_PGID);
> taskfd_send_signal(fd, SIGSTOP, NULL, TASKFD_TID);
> 
> with the current default being
> 
> taskfd_send_signal(fd, SIGSTOP, NULL, TASKFD_PID);
> 
> This seems to me the cleanest solution as we only use one type of file
> descriptor. Can everyone be on board with this? If so I'm going to send
> out a new version of the patch.
> 
> Christian

I'm on board with this, but I think you need to also clarify what exactly
the fd stands for.  I think that (a) userspace should not have to care
about the struct pid implementation, and so (b) the procfd should stand
for all the pids.  So when taskfd_send_signal(fd, SIGSTOP, NULL, TASKFD_PGID)
becomes implemented, then open(/proc/5) will pin all three pids, as will
open(/proc/5/task/6).

-serge


Re: [PATCH] mfd: tps65218: Use devm_regmap_add_irq_chip and clean up error path in probe

2018-12-06 Thread J, KEERTHY




On 12/7/2018 2:21 AM, Sebastian Reichel wrote:

Hi,

On Thu, Dec 06, 2018 at 11:07:44PM +0530, Keerthy wrote:

Use devm_regmap_add_irq_chip and clean up error path in probe.

Reported-by: Christian Hohnstaedt 
Signed-off-by: Keerthy 
---

Boot tested on am437x-gp-evm.


This is missing cleanup of remove path?


Yes! Thanks for catching it. I will send v2 in a bit.



-- Sebastian


  drivers/mfd/tps65218.c | 14 +++---
  1 file changed, 3 insertions(+), 11 deletions(-)

diff --git a/drivers/mfd/tps65218.c b/drivers/mfd/tps65218.c
index 910f569..2383621 100644
--- a/drivers/mfd/tps65218.c
+++ b/drivers/mfd/tps65218.c
@@ -235,9 +235,9 @@ static int tps65218_probe(struct i2c_client *client,
  
  	mutex_init(>tps_lock);
  
-	ret = regmap_add_irq_chip(tps->regmap, tps->irq,

-   IRQF_ONESHOT, 0, _irq_chip,
-   >irq_data);
+   ret = devm_regmap_add_irq_chip(>dev, tps->regmap, tps->irq,
+  IRQF_ONESHOT, 0, _irq_chip,
+  >irq_data);
if (ret < 0)
return ret;
  
@@ -253,14 +253,6 @@ static int tps65218_probe(struct i2c_client *client,

  ARRAY_SIZE(tps65218_cells), NULL, 0,
  regmap_irq_get_domain(tps->irq_data));
  
-	if (ret < 0)

-   goto err_irq;
-
-   return 0;
-
-err_irq:
-   regmap_del_irq_chip(tps->irq, tps->irq_data);
-
return ret;
  }
  
--

1.9.1



Re: [PATCH] mfd: tps65218: Use devm_regmap_add_irq_chip and clean up error path in probe

2018-12-06 Thread J, KEERTHY




On 12/7/2018 2:21 AM, Sebastian Reichel wrote:

Hi,

On Thu, Dec 06, 2018 at 11:07:44PM +0530, Keerthy wrote:

Use devm_regmap_add_irq_chip and clean up error path in probe.

Reported-by: Christian Hohnstaedt 
Signed-off-by: Keerthy 
---

Boot tested on am437x-gp-evm.


This is missing cleanup of remove path?


Yes! Thanks for catching it. I will send v2 in a bit.



-- Sebastian


  drivers/mfd/tps65218.c | 14 +++---
  1 file changed, 3 insertions(+), 11 deletions(-)

diff --git a/drivers/mfd/tps65218.c b/drivers/mfd/tps65218.c
index 910f569..2383621 100644
--- a/drivers/mfd/tps65218.c
+++ b/drivers/mfd/tps65218.c
@@ -235,9 +235,9 @@ static int tps65218_probe(struct i2c_client *client,
  
  	mutex_init(>tps_lock);
  
-	ret = regmap_add_irq_chip(tps->regmap, tps->irq,

-   IRQF_ONESHOT, 0, _irq_chip,
-   >irq_data);
+   ret = devm_regmap_add_irq_chip(>dev, tps->regmap, tps->irq,
+  IRQF_ONESHOT, 0, _irq_chip,
+  >irq_data);
if (ret < 0)
return ret;
  
@@ -253,14 +253,6 @@ static int tps65218_probe(struct i2c_client *client,

  ARRAY_SIZE(tps65218_cells), NULL, 0,
  regmap_irq_get_domain(tps->irq_data));
  
-	if (ret < 0)

-   goto err_irq;
-
-   return 0;
-
-err_irq:
-   regmap_del_irq_chip(tps->irq, tps->irq_data);
-
return ret;
  }
  
--

1.9.1



Re: [RFC PATCH 00/14] Heterogeneous Memory System (HMS) and hbind()

2018-12-06 Thread Jerome Glisse
On Thu, Dec 06, 2018 at 04:48:57PM -0700, Logan Gunthorpe wrote:
> 
> 
> On 2018-12-06 4:38 p.m., Dave Hansen wrote:
> > On 12/6/18 3:28 PM, Logan Gunthorpe wrote:
> >> I didn't think this was meant to describe actual real world performance
> >> between all of the links. If that's the case all of this seems like a
> >> pipe dream to me.
> > 
> > The HMAT discussions (that I was a part of at least) settled on just
> > trying to describe what we called "sticker speed".  Nobody had an
> > expectation that you *really* had to measure everything.
> > 
> > The best we can do for any of these approaches is approximate things.
> 
> Yes, though there's a lot of caveats in this assumption alone.
> Specifically with PCI: the bus may run at however many GB/s but P2P
> through a CPU's root complexes can slow down significantly (like down to
> MB/s).
> 
> I've seen similar things across QPI: I can sometimes do P2P from
> PCI->QPI->PCI but the performance doesn't even come close to the sticker
> speed of any of those buses.
> 
> I'm not sure how anyone is going to deal with those issues, but it does
> firmly place us in world view #2 instead of #1. But, yes, I agree
> exposing information like in #2 full out to userspace, especially
> through sysfs, seems like a nightmare and I don't see anything in HMS to
> help with that. Providing an API to ask for memory (or another resource)
> that's accessible by a set of initiators and with a set of requirements
> for capabilities seems more manageable.

Note that in #1 you have bridge that fully allow to express those path
limitation. So what you just describe can be fully reported to userspace.

I explained and given examples on how program adapt their computation to
the system topology it does exist today and people are even developing new
programming langage with some of those idea baked in.

So they are people out there that already rely on such information they
just do not get it from the kernel but from a mix of various device specific
API and they have to stich everything themself and develop a database of
quirk and gotcha. My proposal is to provide a coherent kernel API where
we can sanitize that informations and report it to userspace in a single
and coherent description.

Cheers,
Jérôme


Re: [RFC PATCH 00/14] Heterogeneous Memory System (HMS) and hbind()

2018-12-06 Thread Jerome Glisse
On Thu, Dec 06, 2018 at 04:48:57PM -0700, Logan Gunthorpe wrote:
> 
> 
> On 2018-12-06 4:38 p.m., Dave Hansen wrote:
> > On 12/6/18 3:28 PM, Logan Gunthorpe wrote:
> >> I didn't think this was meant to describe actual real world performance
> >> between all of the links. If that's the case all of this seems like a
> >> pipe dream to me.
> > 
> > The HMAT discussions (that I was a part of at least) settled on just
> > trying to describe what we called "sticker speed".  Nobody had an
> > expectation that you *really* had to measure everything.
> > 
> > The best we can do for any of these approaches is approximate things.
> 
> Yes, though there's a lot of caveats in this assumption alone.
> Specifically with PCI: the bus may run at however many GB/s but P2P
> through a CPU's root complexes can slow down significantly (like down to
> MB/s).
> 
> I've seen similar things across QPI: I can sometimes do P2P from
> PCI->QPI->PCI but the performance doesn't even come close to the sticker
> speed of any of those buses.
> 
> I'm not sure how anyone is going to deal with those issues, but it does
> firmly place us in world view #2 instead of #1. But, yes, I agree
> exposing information like in #2 full out to userspace, especially
> through sysfs, seems like a nightmare and I don't see anything in HMS to
> help with that. Providing an API to ask for memory (or another resource)
> that's accessible by a set of initiators and with a set of requirements
> for capabilities seems more manageable.

Note that in #1 you have bridge that fully allow to express those path
limitation. So what you just describe can be fully reported to userspace.

I explained and given examples on how program adapt their computation to
the system topology it does exist today and people are even developing new
programming langage with some of those idea baked in.

So they are people out there that already rely on such information they
just do not get it from the kernel but from a mix of various device specific
API and they have to stich everything themself and develop a database of
quirk and gotcha. My proposal is to provide a coherent kernel API where
we can sanitize that informations and report it to userspace in a single
and coherent description.

Cheers,
Jérôme


Re: [PATCH] nvme-rdma: complete requests from ->timeout

2018-12-06 Thread Jaesoo Lee
Could you please take a look at this bug and code review?

We are seeing more instances of this bug and found that reconnect_work
could hang as well, as can be seen from below stacktrace.

 Workqueue: nvme-wq nvme_rdma_reconnect_ctrl_work [nvme_rdma]
 Call Trace:
 __schedule+0x2ab/0x880
 schedule+0x36/0x80
 schedule_timeout+0x161/0x300
 ? __next_timer_interrupt+0xe0/0xe0
 io_schedule_timeout+0x1e/0x50
 wait_for_completion_io_timeout+0x130/0x1a0
 ? wake_up_q+0x80/0x80
 blk_execute_rq+0x6e/0xa0
 __nvme_submit_sync_cmd+0x6e/0xe0
 nvmf_connect_admin_queue+0x128/0x190 [nvme_fabrics]
 ? wait_for_completion_interruptible_timeout+0x157/0x1b0
 nvme_rdma_start_queue+0x5e/0x90 [nvme_rdma]
 nvme_rdma_setup_ctrl+0x1b4/0x730 [nvme_rdma]
 nvme_rdma_reconnect_ctrl_work+0x27/0x70 [nvme_rdma]
 process_one_work+0x179/0x390
 worker_thread+0x4f/0x3e0
 kthread+0x105/0x140
 ? max_active_store+0x80/0x80
 ? kthread_bind+0x20/0x20

This bug is produced by setting MTU of RoCE interface to '568' for
test while running I/O traffics.

Thanks,

Jaesoo Lee.

On Thu, Nov 29, 2018 at 5:54 PM Jaesoo Lee  wrote:
>
> Not the queue, but the RDMA connections.
>
> Let me describe the scenario.
>
> 1. connected nvme-rdma target with 500 namespaces
> : this will make the nvme_remove_namespaces() took a long time to
> complete and open the window vulnerable to this bug
> 2. host will take below code path for nvme_delete_ctrl_work and send
> normal shutdown in nvme_shutdown_ctrl()
> - nvme_stop_ctrl
>   - nvme_stop_keep_alive --> stopped keep alive
> - nvme_remove_namespaces --> took too long time, over 10~15s
> - nvme_rdma_shutdown_ctrl
>   - nvme_rdma_teardown_io_queues
>   - nvme_shutdown_ctrl
> - nvmf_reg_write32
>   -__nvme_submit_sync_cmd --> nvme_delete_ctrl_work blocked here
>   - nvme_rdma_teardown_admin_queue
> - nvme_uninit_ctrl
> - nvme_put_ctrl
> 3. the rdma connection is disconnected by the nvme-rdma target
> : in our case, this is triggered by the target side timeout mechanism
> : I did not try, but I think this could happen if we lost the RoCE link, too.
> 4. the shutdown notification command timed out and the work stuck
> while leaving the controller in NVME_CTRL_DELETING state
>
> Thanks,
>
> Jaesoo Lee.
>
>
> On Thu, Nov 29, 2018 at 5:30 PM Sagi Grimberg  wrote:
> >
> >
> > > This does not hold at least for NVMe RDMA host driver. An example scenario
> > > is when the RDMA connection is gone while the controller is being deleted.
> > > In this case, the nvmf_reg_write32() for sending shutdown admin command by
> > > the delete_work could be hung forever if the command is not completed by
> > > the timeout handler.
> >
> > If the queue is gone, this means that the queue has already flushed and
> > any commands that were inflight has completed with a flush error
> > completion...
> >
> > Can you describe the scenario that caused this hang? When has the
> > queue became "gone" and when did the shutdown command execute?


Re: [PATCH] nvme-rdma: complete requests from ->timeout

2018-12-06 Thread Jaesoo Lee
Could you please take a look at this bug and code review?

We are seeing more instances of this bug and found that reconnect_work
could hang as well, as can be seen from below stacktrace.

 Workqueue: nvme-wq nvme_rdma_reconnect_ctrl_work [nvme_rdma]
 Call Trace:
 __schedule+0x2ab/0x880
 schedule+0x36/0x80
 schedule_timeout+0x161/0x300
 ? __next_timer_interrupt+0xe0/0xe0
 io_schedule_timeout+0x1e/0x50
 wait_for_completion_io_timeout+0x130/0x1a0
 ? wake_up_q+0x80/0x80
 blk_execute_rq+0x6e/0xa0
 __nvme_submit_sync_cmd+0x6e/0xe0
 nvmf_connect_admin_queue+0x128/0x190 [nvme_fabrics]
 ? wait_for_completion_interruptible_timeout+0x157/0x1b0
 nvme_rdma_start_queue+0x5e/0x90 [nvme_rdma]
 nvme_rdma_setup_ctrl+0x1b4/0x730 [nvme_rdma]
 nvme_rdma_reconnect_ctrl_work+0x27/0x70 [nvme_rdma]
 process_one_work+0x179/0x390
 worker_thread+0x4f/0x3e0
 kthread+0x105/0x140
 ? max_active_store+0x80/0x80
 ? kthread_bind+0x20/0x20

This bug is produced by setting MTU of RoCE interface to '568' for
test while running I/O traffics.

Thanks,

Jaesoo Lee.

On Thu, Nov 29, 2018 at 5:54 PM Jaesoo Lee  wrote:
>
> Not the queue, but the RDMA connections.
>
> Let me describe the scenario.
>
> 1. connected nvme-rdma target with 500 namespaces
> : this will make the nvme_remove_namespaces() took a long time to
> complete and open the window vulnerable to this bug
> 2. host will take below code path for nvme_delete_ctrl_work and send
> normal shutdown in nvme_shutdown_ctrl()
> - nvme_stop_ctrl
>   - nvme_stop_keep_alive --> stopped keep alive
> - nvme_remove_namespaces --> took too long time, over 10~15s
> - nvme_rdma_shutdown_ctrl
>   - nvme_rdma_teardown_io_queues
>   - nvme_shutdown_ctrl
> - nvmf_reg_write32
>   -__nvme_submit_sync_cmd --> nvme_delete_ctrl_work blocked here
>   - nvme_rdma_teardown_admin_queue
> - nvme_uninit_ctrl
> - nvme_put_ctrl
> 3. the rdma connection is disconnected by the nvme-rdma target
> : in our case, this is triggered by the target side timeout mechanism
> : I did not try, but I think this could happen if we lost the RoCE link, too.
> 4. the shutdown notification command timed out and the work stuck
> while leaving the controller in NVME_CTRL_DELETING state
>
> Thanks,
>
> Jaesoo Lee.
>
>
> On Thu, Nov 29, 2018 at 5:30 PM Sagi Grimberg  wrote:
> >
> >
> > > This does not hold at least for NVMe RDMA host driver. An example scenario
> > > is when the RDMA connection is gone while the controller is being deleted.
> > > In this case, the nvmf_reg_write32() for sending shutdown admin command by
> > > the delete_work could be hung forever if the command is not completed by
> > > the timeout handler.
> >
> > If the queue is gone, this means that the queue has already flushed and
> > any commands that were inflight has completed with a flush error
> > completion...
> >
> > Can you describe the scenario that caused this hang? When has the
> > queue became "gone" and when did the shutdown command execute?


Re: [RFC PATCH 00/14] Heterogeneous Memory System (HMS) and hbind()

2018-12-06 Thread Jerome Glisse
On Thu, Dec 06, 2018 at 03:09:21PM -0800, Dave Hansen wrote:
> On 12/6/18 2:39 PM, Jerome Glisse wrote:
> > No if the 4 sockets are connect in a ring fashion ie:
> > Socket0 - Socket1
> >| |
> > Socket3 - Socket2
> > 
> > Then you have 4 links:
> > link0: socket0 socket1
> > link1: socket1 socket2
> > link3: socket2 socket3
> > link4: socket3 socket0
> > 
> > I do not see how their can be an explosion of link directory, worse
> > case is as many link directories as they are bus for a CPU/device/
> > target.
> 
> This looks great.  But, we don't _have_ this kind of information for any
> system that I know about or any system available in the near future.

We do not have it in any standard way, it is out there in either
device driver database, application data base, special platform
OEM blob burried somewhere in the firmware ...

I want to solve the kernel side of the problem ie how to expose
this to userspace. How the kernel get that information is an
orthogonal problem. For now my intention is to have device driver
register and create the links and bridges that are not enumerated
by standard firmware.

> 
> We basically have two different world views:
> 1. The system is described point-to-point.  A connects to B @
>100GB/s.  B connects to C at 50GB/s.  Thus, C->A should be
>50GB/s.
>* Less information to convey
>* Potentially less precise if the properties are not perfectly
>  additive.  If A->B=10ns and B->C=20ns, A->C might be >30ns.
>* Costs must be calculated instead of being explicitly specified
> 2. The system is described endpoint-to-endpoint.  A->B @ 100GB/s
>B->C @ 50GB/s, A->C @ 50GB/s.
>* A *lot* more information to convey O(N^2)?
>* Potentially more precise.
>* Costs are explicitly specified, not calculated
> 
> These patches are really tied to world view #1.  But, the HMAT is really
> tied to world view #1.
  ^#2

Note that they are also the bridge object in my proposal. So in my
proposal you in #1 you have:
link0: A <-> B with 100GB/s and 10ns latency
link1: B <-> C with 50GB/s and 20ns latency

Now if A can reach C through B then you have bridges (bridge are uni-
directional unlike link that are bi-directional thought that finer
point can be discuss this is what allow any kind of directed graph to
be represented):
bridge2: link0 -> link1
bridge3: link1 -> link0

You can also associated properties to bridge (but it is not mandatory).
So you can say that bridge2 and bridge3 have a latency of 50ns and if
the addition of latency is enough then you do not specificy it in bridge.
It is a rule that a path latency is the sum of its individual link
latency. For bandwidth it is the minimum bandwidth ie what ever is the
bottleneck for the path.


> I know you're not a fan of the HMAT.  But it is the firmware reality
> that we are stuck with, until something better shows up.  I just don't
> see a way to convert it into what you have described here.

Like i said i am not targetting HMAT system i am targeting system that
rely today on database spread between driver and application. I want to
move that knowledge in driver first so that they can teach the core
kernel and register thing in the core. Providing a standard firmware
way to provide this information is a different problem (they are some
loose standard on non ACPI platform AFAIK).

> I'm starting to think that, no matter if the HMAT or some other approach
> gets adopted, we shouldn't be exposing this level of gunk to userspace
> at *all* since it requires adopting one of the world views.

I do not see this as exclusive. Yes they are HMAT system "soon" to arrive
but we already have the more extended view which is just buried under a
pile of different pieces. I do not see any exclusion between the 2. If
HMAT is good enough for a whole class of system fine but there is also
a whole class of system and users that do not fit in that paradigm hence
my proposal.

Cheers,
Jérôme


Re: [PATCH v2] mwifiex: debugfs: correct histogram spacing, formatting

2018-12-06 Thread Brian Norris
On Tue, Dec 04, 2018 at 08:37:30AM +0200, Kalle Valo wrote:
> Brian Norris  writes:
> 
> > Here's a v2 that combines both sets of strings in that way. I'm not
> > resending the other patches, since they were only related in concept
> > (since I was referring to debugfs for implementing the nl80211 stuff),
> > but have no real dependency.
> >
> > @Kalle: I can resend the others as a new series if that helps.
> 
> Yeah, it does. Trying to pick patches from different places in patchwork
> is just too much of a hassle for me. So please do resend the whole
> series.

I've split them up and resent the other 2 unmodified now (as 'RFC PATCH
v2 X/2'), as this patch is trivial and not related to the others
directly. They can be applied in any order. Hopefully that's sufficient.

Brian


Re: [RFC PATCH 00/14] Heterogeneous Memory System (HMS) and hbind()

2018-12-06 Thread Jerome Glisse
On Thu, Dec 06, 2018 at 03:09:21PM -0800, Dave Hansen wrote:
> On 12/6/18 2:39 PM, Jerome Glisse wrote:
> > No if the 4 sockets are connect in a ring fashion ie:
> > Socket0 - Socket1
> >| |
> > Socket3 - Socket2
> > 
> > Then you have 4 links:
> > link0: socket0 socket1
> > link1: socket1 socket2
> > link3: socket2 socket3
> > link4: socket3 socket0
> > 
> > I do not see how their can be an explosion of link directory, worse
> > case is as many link directories as they are bus for a CPU/device/
> > target.
> 
> This looks great.  But, we don't _have_ this kind of information for any
> system that I know about or any system available in the near future.

We do not have it in any standard way, it is out there in either
device driver database, application data base, special platform
OEM blob burried somewhere in the firmware ...

I want to solve the kernel side of the problem ie how to expose
this to userspace. How the kernel get that information is an
orthogonal problem. For now my intention is to have device driver
register and create the links and bridges that are not enumerated
by standard firmware.

> 
> We basically have two different world views:
> 1. The system is described point-to-point.  A connects to B @
>100GB/s.  B connects to C at 50GB/s.  Thus, C->A should be
>50GB/s.
>* Less information to convey
>* Potentially less precise if the properties are not perfectly
>  additive.  If A->B=10ns and B->C=20ns, A->C might be >30ns.
>* Costs must be calculated instead of being explicitly specified
> 2. The system is described endpoint-to-endpoint.  A->B @ 100GB/s
>B->C @ 50GB/s, A->C @ 50GB/s.
>* A *lot* more information to convey O(N^2)?
>* Potentially more precise.
>* Costs are explicitly specified, not calculated
> 
> These patches are really tied to world view #1.  But, the HMAT is really
> tied to world view #1.
  ^#2

Note that they are also the bridge object in my proposal. So in my
proposal you in #1 you have:
link0: A <-> B with 100GB/s and 10ns latency
link1: B <-> C with 50GB/s and 20ns latency

Now if A can reach C through B then you have bridges (bridge are uni-
directional unlike link that are bi-directional thought that finer
point can be discuss this is what allow any kind of directed graph to
be represented):
bridge2: link0 -> link1
bridge3: link1 -> link0

You can also associated properties to bridge (but it is not mandatory).
So you can say that bridge2 and bridge3 have a latency of 50ns and if
the addition of latency is enough then you do not specificy it in bridge.
It is a rule that a path latency is the sum of its individual link
latency. For bandwidth it is the minimum bandwidth ie what ever is the
bottleneck for the path.


> I know you're not a fan of the HMAT.  But it is the firmware reality
> that we are stuck with, until something better shows up.  I just don't
> see a way to convert it into what you have described here.

Like i said i am not targetting HMAT system i am targeting system that
rely today on database spread between driver and application. I want to
move that knowledge in driver first so that they can teach the core
kernel and register thing in the core. Providing a standard firmware
way to provide this information is a different problem (they are some
loose standard on non ACPI platform AFAIK).

> I'm starting to think that, no matter if the HMAT or some other approach
> gets adopted, we shouldn't be exposing this level of gunk to userspace
> at *all* since it requires adopting one of the world views.

I do not see this as exclusive. Yes they are HMAT system "soon" to arrive
but we already have the more extended view which is just buried under a
pile of different pieces. I do not see any exclusion between the 2. If
HMAT is good enough for a whole class of system fine but there is also
a whole class of system and users that do not fit in that paradigm hence
my proposal.

Cheers,
Jérôme


Re: [PATCH v2] mwifiex: debugfs: correct histogram spacing, formatting

2018-12-06 Thread Brian Norris
On Tue, Dec 04, 2018 at 08:37:30AM +0200, Kalle Valo wrote:
> Brian Norris  writes:
> 
> > Here's a v2 that combines both sets of strings in that way. I'm not
> > resending the other patches, since they were only related in concept
> > (since I was referring to debugfs for implementing the nl80211 stuff),
> > but have no real dependency.
> >
> > @Kalle: I can resend the others as a new series if that helps.
> 
> Yeah, it does. Trying to pick patches from different places in patchwork
> is just too much of a hassle for me. So please do resend the whole
> series.

I've split them up and resent the other 2 unmodified now (as 'RFC PATCH
v2 X/2'), as this patch is trivial and not related to the others
directly. They can be applied in any order. Hopefully that's sufficient.

Brian


[RFC PATCH v2 2/2] mwifiex: add NL80211_STA_INFO_RX_BITRATE support

2018-12-06 Thread Brian Norris
Comparing the existing TX_BITRATE parsing code (in
mwifiex_parse_htinfo()) with the RX bitrate histograms in debugfs.c, it
appears that the rxpd_rate and rxpd_htinfo fields have the same format.
At least, they give reasonable results when I parse them this way.

So this patch adds support for RX_BITRATE to our station info dump.

Along the way, I add legacy bitrate parsing into the same function,
using the debugfs code (mwifiex_histogram_read() and
mwifiex_adjust_data_rate()) as reference.

Additionally, to satisfy the requirements of
NL80211_STA_INFO_RX_BITRATE, I skip logging the bitrate of multicast
packets. This shouldn't add a lot of overhead to the RX path, as there
are already several similar 802.3 header checks in this same codepath.
We can also bias the branch behavior to favor unicast, as that's the
common performance-sensitive case.

I'd consider this support somewhat experimental, as I have zero
documentation from Marvell. But the existing driver code gives me good
reason to think this is correct.

I've tested this on a few different 802.11{a,b,g,n,ac} networks, and the
reported bitrates look good to me.

Signed-off-by: Brian Norris 
---
RFC: I'd appreciate it if someone from Marvell could double check my work
here.

v2:
 * no change - just split unrelated (debugfs) patch to its own series
---
 .../net/wireless/marvell/mwifiex/cfg80211.c   | 26 +++
 drivers/net/wireless/marvell/mwifiex/sta_rx.c | 13 ++
 2 files changed, 34 insertions(+), 5 deletions(-)

diff --git a/drivers/net/wireless/marvell/mwifiex/cfg80211.c 
b/drivers/net/wireless/marvell/mwifiex/cfg80211.c
index 02b80ea232a7..1467af22e394 100644
--- a/drivers/net/wireless/marvell/mwifiex/cfg80211.c
+++ b/drivers/net/wireless/marvell/mwifiex/cfg80211.c
@@ -1333,6 +1333,28 @@ mwifiex_parse_htinfo(struct mwifiex_private *priv, u8 
rateinfo, u8 htinfo,
rate->flags |= RATE_INFO_FLAGS_SHORT_GI;
}
}
+
+   /* Decode legacy rates for non-HT. */
+   if (!(htinfo & (BIT(0) | BIT(1 {
+   /* Bitrates in multiples of 100kb/s. */
+   static const int legacy_rates[] = {
+   [0] = 10,
+   [1] = 20,
+   [2] = 55,
+   [3] = 110,
+   [4] = 60, /* MWIFIEX_RATE_INDEX_OFDM0 */
+   [5] = 60,
+   [6] = 90,
+   [7] = 120,
+   [8] = 180,
+   [9] = 240,
+   [10] = 360,
+   [11] = 480,
+   [12] = 540,
+   };
+   if (rateinfo < ARRAY_SIZE(legacy_rates))
+   rate->legacy = legacy_rates[rateinfo];
+   }
 }
 
 /*
@@ -1414,6 +1436,10 @@ mwifiex_dump_station_info(struct mwifiex_private *priv,
/* bit rate is in 500 kb/s units. Convert it to 100kb/s units */
sinfo->txrate.legacy = rate * 5;
 
+   sinfo->filled |= BIT(NL80211_STA_INFO_RX_BITRATE);
+   mwifiex_parse_htinfo(priv, priv->rxpd_rate, priv->rxpd_htinfo,
+>rxrate);
+
if (priv->bss_mode == NL80211_IFTYPE_STATION) {
sinfo->filled |= BIT_ULL(NL80211_STA_INFO_BSS_PARAM);
sinfo->bss_param.flags = 0;
diff --git a/drivers/net/wireless/marvell/mwifiex/sta_rx.c 
b/drivers/net/wireless/marvell/mwifiex/sta_rx.c
index 00fcbda09349..fb28a5c7f441 100644
--- a/drivers/net/wireless/marvell/mwifiex/sta_rx.c
+++ b/drivers/net/wireless/marvell/mwifiex/sta_rx.c
@@ -152,14 +152,17 @@ int mwifiex_process_rx_packet(struct mwifiex_private 
*priv,
mwifiex_process_tdls_action_frame(priv, offset, rx_pkt_len);
}
 
-   priv->rxpd_rate = local_rx_pd->rx_rate;
-
-   priv->rxpd_htinfo = local_rx_pd->ht_info;
+   /* Only stash RX bitrate for unicast packets. */
+   if (likely(!is_multicast_ether_addr(rx_pkt_hdr->eth803_hdr.h_dest))) {
+   priv->rxpd_rate = local_rx_pd->rx_rate;
+   priv->rxpd_htinfo = local_rx_pd->ht_info;
+   }
 
if (GET_BSS_ROLE(priv) == MWIFIEX_BSS_ROLE_STA ||
GET_BSS_ROLE(priv) == MWIFIEX_BSS_ROLE_UAP) {
-   adj_rx_rate = mwifiex_adjust_data_rate(priv, priv->rxpd_rate,
-  priv->rxpd_htinfo);
+   adj_rx_rate = mwifiex_adjust_data_rate(priv,
+  local_rx_pd->rx_rate,
+  local_rx_pd->ht_info);
mwifiex_hist_data_add(priv, adj_rx_rate, local_rx_pd->snr,
  local_rx_pd->nf);
}
-- 
2.20.0.rc2.403.gdbc3b29805-goog



[RFC PATCH v2 1/2] mwifiex: refactor mwifiex_parse_htinfo() for reuse

2018-12-06 Thread Brian Norris
This function converts some firmware-specific parameters into cfg80211
'rate_info' structures. It currently assumes it's dealing only with TX
bitrate, but the RX bitrate looks to be the same, so refactor this
function to be reusable.

Signed-off-by: Brian Norris 
---
v2:
 * no change - just split unrelated (debugfs) patch to its own series
---
 .../net/wireless/marvell/mwifiex/cfg80211.c   | 36 ++-
 1 file changed, 19 insertions(+), 17 deletions(-)

diff --git a/drivers/net/wireless/marvell/mwifiex/cfg80211.c 
b/drivers/net/wireless/marvell/mwifiex/cfg80211.c
index adc88433faa8..02b80ea232a7 100644
--- a/drivers/net/wireless/marvell/mwifiex/cfg80211.c
+++ b/drivers/net/wireless/marvell/mwifiex/cfg80211.c
@@ -1275,27 +1275,27 @@ mwifiex_cfg80211_change_virtual_intf(struct wiphy 
*wiphy,
 }
 
 static void
-mwifiex_parse_htinfo(struct mwifiex_private *priv, u8 tx_htinfo,
+mwifiex_parse_htinfo(struct mwifiex_private *priv, u8 rateinfo, u8 htinfo,
 struct rate_info *rate)
 {
struct mwifiex_adapter *adapter = priv->adapter;
 
if (adapter->is_hw_11ac_capable) {
/* bit[1-0]: 00=LG 01=HT 10=VHT */
-   if (tx_htinfo & BIT(0)) {
+   if (htinfo & BIT(0)) {
/* HT */
-   rate->mcs = priv->tx_rate;
+   rate->mcs = rateinfo;
rate->flags |= RATE_INFO_FLAGS_MCS;
}
-   if (tx_htinfo & BIT(1)) {
+   if (htinfo & BIT(1)) {
/* VHT */
-   rate->mcs = priv->tx_rate & 0x0F;
+   rate->mcs = rateinfo & 0x0F;
rate->flags |= RATE_INFO_FLAGS_VHT_MCS;
}
 
-   if (tx_htinfo & (BIT(1) | BIT(0))) {
+   if (htinfo & (BIT(1) | BIT(0))) {
/* HT or VHT */
-   switch (tx_htinfo & (BIT(3) | BIT(2))) {
+   switch (htinfo & (BIT(3) | BIT(2))) {
case 0:
rate->bw = RATE_INFO_BW_20;
break;
@@ -1310,26 +1310,26 @@ mwifiex_parse_htinfo(struct mwifiex_private *priv, u8 
tx_htinfo,
break;
}
 
-   if (tx_htinfo & BIT(4))
+   if (htinfo & BIT(4))
rate->flags |= RATE_INFO_FLAGS_SHORT_GI;
 
-   if ((priv->tx_rate >> 4) == 1)
+   if ((rateinfo >> 4) == 1)
rate->nss = 2;
else
rate->nss = 1;
}
} else {
/*
-* Bit 0 in tx_htinfo indicates that current Tx rate
-* is 11n rate. Valid MCS index values for us are 0 to 15.
+* Bit 0 in htinfo indicates that current rate is 11n. Valid
+* MCS index values for us are 0 to 15.
 */
-   if ((tx_htinfo & BIT(0)) && (priv->tx_rate < 16)) {
-   rate->mcs = priv->tx_rate;
+   if ((htinfo & BIT(0)) && (rateinfo < 16)) {
+   rate->mcs = rateinfo;
rate->flags |= RATE_INFO_FLAGS_MCS;
rate->bw = RATE_INFO_BW_20;
-   if (tx_htinfo & BIT(1))
+   if (htinfo & BIT(1))
rate->bw = RATE_INFO_BW_40;
-   if (tx_htinfo & BIT(2))
+   if (htinfo & BIT(2))
rate->flags |= RATE_INFO_FLAGS_SHORT_GI;
}
}
@@ -1375,7 +1375,8 @@ mwifiex_dump_station_info(struct mwifiex_private *priv,
sinfo->tx_packets = node->stats.tx_packets;
sinfo->tx_failed = node->stats.tx_failed;
 
-   mwifiex_parse_htinfo(priv, node->stats.last_tx_htinfo,
+   mwifiex_parse_htinfo(priv, priv->tx_rate,
+node->stats.last_tx_htinfo,
 >txrate);
sinfo->txrate.legacy = node->stats.last_tx_rate * 5;
 
@@ -1401,7 +1402,8 @@ mwifiex_dump_station_info(struct mwifiex_private *priv,
 HostCmd_ACT_GEN_GET, DTIM_PERIOD_I,
 >dtim_period, true);
 
-   mwifiex_parse_htinfo(priv, priv->tx_htinfo, >txrate);
+   mwifiex_parse_htinfo(priv, priv->tx_rate, priv->tx_htinfo,
+>txrate);
 
sinfo->signal_avg = priv->bcn_rssi_avg;
sinfo->rx_bytes = priv->stats.rx_bytes;
-- 
2.20.0.rc2.403.gdbc3b29805-goog



Re: [PATCH] arm64: dts: sdm845: Add lpasscc node

2018-12-06 Thread Doug Anderson
Hi,

On Wed, Dec 5, 2018 at 12:00 AM Taniya Das  wrote:
>
> This adds the low pass audio clock controller node to sdm845 based on
> the example in the bindings.
>
> Signed-off-by: Taniya Das 
> ---
>  arch/arm64/boot/dts/qcom/sdm845-mtp.dts | 4 +++-
>  arch/arm64/boot/dts/qcom/sdm845.dtsi| 8 
>  2 files changed, 11 insertions(+), 1 deletion(-)
>
> diff --git a/arch/arm64/boot/dts/qcom/sdm845-mtp.dts 
> b/arch/arm64/boot/dts/qcom/sdm845-mtp.dts
> index b3def03..cf73f3c 100644
> --- a/arch/arm64/boot/dts/qcom/sdm845-mtp.dts
> +++ b/arch/arm64/boot/dts/qcom/sdm845-mtp.dts
> @@ -346,7 +346,9 @@
>   {
> protected-clocks = ,
>,
> -  ;
> +  ,
> +  ,
> +  ;
>  };
>
>   {
> diff --git a/arch/arm64/boot/dts/qcom/sdm845.dtsi 
> b/arch/arm64/boot/dts/qcom/sdm845.dtsi
> index 1419b00..a3db089 100644
> --- a/arch/arm64/boot/dts/qcom/sdm845.dtsi
> +++ b/arch/arm64/boot/dts/qcom/sdm845.dtsi
> @@ -7,6 +7,7 @@
>
>  #include 
>  #include 
> +#include 
>  #include 
>  #include 
>  #include 
> @@ -1264,6 +1265,13 @@
> #power-domain-cells = <1>;
> };
>
> +   lpasscc: clock-controller@17014000 {
> +   compatible = "qcom,sdm845-lpasscc";
> +   reg = <0x17014000 0x1f004>, <0x1730 0x200>;
> +   reg-names = "cc", "qdsp6ss";
> +   #clock-cells = <1>;
> +   };
> +
> tsens0: thermal-sensor@c263000 {

Please keep nodes with a unit address sorted numerically by unit
address.  0x17014000 should not come before 0xc263000

Presumably this is the kind of thing that Andy can fix himself when he
lands stuff (often dts stuff ends up conflicting a bunch anyway), so I
wouldn't personally post another patch unless I hear from him that he
wants you to repost.  ...but please keep it in mind for the future.

In any case:

Reviewed-by: Douglas Anderson 

-Doug


Re: [PATCH] arm64: dts: sdm845: Add lpasscc node

2018-12-06 Thread Doug Anderson
Hi,

On Wed, Dec 5, 2018 at 12:00 AM Taniya Das  wrote:
>
> This adds the low pass audio clock controller node to sdm845 based on
> the example in the bindings.
>
> Signed-off-by: Taniya Das 
> ---
>  arch/arm64/boot/dts/qcom/sdm845-mtp.dts | 4 +++-
>  arch/arm64/boot/dts/qcom/sdm845.dtsi| 8 
>  2 files changed, 11 insertions(+), 1 deletion(-)
>
> diff --git a/arch/arm64/boot/dts/qcom/sdm845-mtp.dts 
> b/arch/arm64/boot/dts/qcom/sdm845-mtp.dts
> index b3def03..cf73f3c 100644
> --- a/arch/arm64/boot/dts/qcom/sdm845-mtp.dts
> +++ b/arch/arm64/boot/dts/qcom/sdm845-mtp.dts
> @@ -346,7 +346,9 @@
>   {
> protected-clocks = ,
>,
> -  ;
> +  ,
> +  ,
> +  ;
>  };
>
>   {
> diff --git a/arch/arm64/boot/dts/qcom/sdm845.dtsi 
> b/arch/arm64/boot/dts/qcom/sdm845.dtsi
> index 1419b00..a3db089 100644
> --- a/arch/arm64/boot/dts/qcom/sdm845.dtsi
> +++ b/arch/arm64/boot/dts/qcom/sdm845.dtsi
> @@ -7,6 +7,7 @@
>
>  #include 
>  #include 
> +#include 
>  #include 
>  #include 
>  #include 
> @@ -1264,6 +1265,13 @@
> #power-domain-cells = <1>;
> };
>
> +   lpasscc: clock-controller@17014000 {
> +   compatible = "qcom,sdm845-lpasscc";
> +   reg = <0x17014000 0x1f004>, <0x1730 0x200>;
> +   reg-names = "cc", "qdsp6ss";
> +   #clock-cells = <1>;
> +   };
> +
> tsens0: thermal-sensor@c263000 {

Please keep nodes with a unit address sorted numerically by unit
address.  0x17014000 should not come before 0xc263000

Presumably this is the kind of thing that Andy can fix himself when he
lands stuff (often dts stuff ends up conflicting a bunch anyway), so I
wouldn't personally post another patch unless I hear from him that he
wants you to repost.  ...but please keep it in mind for the future.

In any case:

Reviewed-by: Douglas Anderson 

-Doug


Re: [PATCH] dt-bindings: sifive: describe sifive-blocks versioning

2018-12-06 Thread Rob Herring
On Wed, Nov 21, 2018 at 05:06:56PM -0800, Paul Walmsley wrote:
> 
> For IP blocks that are generated from the public, open-source
> sifive-blocks repository, describe the version numbering policy
> that its maintainers intend to use, upon request from Rob
> Herring .
> 
> Cc: Rob Herring 
> Cc: Palmer Dabbelt 
> Cc: Megan Wachs 
> Cc: Wesley Terpstra 
> Cc: Mark Rutland 
> Cc: devicet...@vger.kernel.org
> Cc: linux-ri...@lists.infradead.org
> Cc: linux-kernel@vger.kernel.org
> Signed-off-by: Paul Walmsley 
> Signed-off-by: Paul Walmsley 
> ---
> 
> Hi Rob, please let me know if this document works with your
> requirements, or if some changes are needed.  - Paul

Looks pretty good to me.

>  .../sifive/sifive-blocks-ip-versioning.txt| 38 +++
>  1 file changed, 38 insertions(+)
>  create mode 100644 
> Documentation/devicetree/bindings/sifive/sifive-blocks-ip-versioning.txt

Use the path that was suggested.

> diff --git 
> a/Documentation/devicetree/bindings/sifive/sifive-blocks-ip-versioning.txt 
> b/Documentation/devicetree/bindings/sifive/sifive-blocks-ip-versioning.txt
> new file mode 100644
> index ..b899e5c6e00c
> --- /dev/null
> +++ b/Documentation/devicetree/bindings/sifive/sifive-blocks-ip-versioning.txt
> @@ -0,0 +1,38 @@
> +DT compatible string versioning for SiFive open-source IP blocks
> +
> +This document describes the version specification for DT "compatible"
> +strings for open-source SiFive IP blocks.  HDL for these IP blocks
> +can be found in this public repository:
> +
> +https://github.com/sifive/sifive-blocks
> +
> +IP block-specific DT compatible strings are contained within the HDL,
> +in the form "sifive,".

Really, my preference would be to add a '-v' in this:

sifive,-v

But given this ship has already sailed, I guess it is fine as is.

> +
> +An example is "sifive,uart0" from:
> +
> +https://github.com/sifive/sifive-blocks/blob/master/src/main/scala/devices/uart/UART.scala#L43

That's nice, but will be out of date as soon as someone adds or removes 
a line above it. Can you point to a tagged version?

> +
> +Until these IP blocks (or IP integration) support version
> +autodiscovery, the maintainers of these IP blocks intend to increment
> +the suffixed number in the compatible string whenever the software
> +interface to these IP blocks changes, or when the functionality of the
> +underlying IP blocks changes in a way that software should be aware of.
> +
> +Driver developers can use compatible string "match" values such as
> +"sifive,uart0" to indicate that their driver is compatible with the
> +register interface and functionality associated with the relevant
> +upstream sifive-blocks commits.  It is expected that most drivers will
> +match on these IP block-specific compatible strings.
> +
> +DT data authors, when writing data for a particular SoC, should
> +continue to specify an SoC-specific compatible string value, such as
> +"sifive,fu540-c000-uart".  This way, if SoC-specific
> +integration-specific bug fixes or workarounds are needed, the kernel
> +or other system software can match on this string to apply them.  The
> +IP block-specific compatible string (such as "sifive,uart0") should
> +then be specified as a subsequent value.
> +
> +An example of this style:
> +
> +compatible = "sifive,fu540-c000-uart", "sifive,uart0";
> -- 
> 2.19.1
> 


Re: [PATCH] dt-bindings: sifive: describe sifive-blocks versioning

2018-12-06 Thread Rob Herring
On Wed, Nov 21, 2018 at 05:06:56PM -0800, Paul Walmsley wrote:
> 
> For IP blocks that are generated from the public, open-source
> sifive-blocks repository, describe the version numbering policy
> that its maintainers intend to use, upon request from Rob
> Herring .
> 
> Cc: Rob Herring 
> Cc: Palmer Dabbelt 
> Cc: Megan Wachs 
> Cc: Wesley Terpstra 
> Cc: Mark Rutland 
> Cc: devicet...@vger.kernel.org
> Cc: linux-ri...@lists.infradead.org
> Cc: linux-kernel@vger.kernel.org
> Signed-off-by: Paul Walmsley 
> Signed-off-by: Paul Walmsley 
> ---
> 
> Hi Rob, please let me know if this document works with your
> requirements, or if some changes are needed.  - Paul

Looks pretty good to me.

>  .../sifive/sifive-blocks-ip-versioning.txt| 38 +++
>  1 file changed, 38 insertions(+)
>  create mode 100644 
> Documentation/devicetree/bindings/sifive/sifive-blocks-ip-versioning.txt

Use the path that was suggested.

> diff --git 
> a/Documentation/devicetree/bindings/sifive/sifive-blocks-ip-versioning.txt 
> b/Documentation/devicetree/bindings/sifive/sifive-blocks-ip-versioning.txt
> new file mode 100644
> index ..b899e5c6e00c
> --- /dev/null
> +++ b/Documentation/devicetree/bindings/sifive/sifive-blocks-ip-versioning.txt
> @@ -0,0 +1,38 @@
> +DT compatible string versioning for SiFive open-source IP blocks
> +
> +This document describes the version specification for DT "compatible"
> +strings for open-source SiFive IP blocks.  HDL for these IP blocks
> +can be found in this public repository:
> +
> +https://github.com/sifive/sifive-blocks
> +
> +IP block-specific DT compatible strings are contained within the HDL,
> +in the form "sifive,".

Really, my preference would be to add a '-v' in this:

sifive,-v

But given this ship has already sailed, I guess it is fine as is.

> +
> +An example is "sifive,uart0" from:
> +
> +https://github.com/sifive/sifive-blocks/blob/master/src/main/scala/devices/uart/UART.scala#L43

That's nice, but will be out of date as soon as someone adds or removes 
a line above it. Can you point to a tagged version?

> +
> +Until these IP blocks (or IP integration) support version
> +autodiscovery, the maintainers of these IP blocks intend to increment
> +the suffixed number in the compatible string whenever the software
> +interface to these IP blocks changes, or when the functionality of the
> +underlying IP blocks changes in a way that software should be aware of.
> +
> +Driver developers can use compatible string "match" values such as
> +"sifive,uart0" to indicate that their driver is compatible with the
> +register interface and functionality associated with the relevant
> +upstream sifive-blocks commits.  It is expected that most drivers will
> +match on these IP block-specific compatible strings.
> +
> +DT data authors, when writing data for a particular SoC, should
> +continue to specify an SoC-specific compatible string value, such as
> +"sifive,fu540-c000-uart".  This way, if SoC-specific
> +integration-specific bug fixes or workarounds are needed, the kernel
> +or other system software can match on this string to apply them.  The
> +IP block-specific compatible string (such as "sifive,uart0") should
> +then be specified as a subsequent value.
> +
> +An example of this style:
> +
> +compatible = "sifive,fu540-c000-uart", "sifive,uart0";
> -- 
> 2.19.1
> 


Re: [PATCH v2 3/3] arm64: dts: qcom: sdm845: Add SD nodes for sdm845-mtp

2018-12-06 Thread Bjorn Andersson
On Thu 06 Dec 10:45 PST 2018, Evan Green wrote:

> Enable support for one of the micro SD slots on the MTP.
> 
> Signed-off-by: Evan Green 

Reviewed-by: Bjorn Andersson 

Regards,
Bjorn

> ---
> 
> Changes in v2:
>  - Fixed alphabetization of node placement in sdm845-mtp.dtsi (Doug)
>  - Fixed card detect name to match schematics (Doug).
>  - Moved comment about drive strength next to the drive-strength entry
>  (Doug)
>  - Removed drive-strength from card detect input pin (Doug).
>  - Consolidated tlmm nodes in MTP.
> 
>  arch/arm64/boot/dts/qcom/sdm845-mtp.dts | 58 -
>  1 file changed, 56 insertions(+), 2 deletions(-)
> 
> diff --git a/arch/arm64/boot/dts/qcom/sdm845-mtp.dts 
> b/arch/arm64/boot/dts/qcom/sdm845-mtp.dts
> index b3def03581775..cde76da42cbb7 100644
> --- a/arch/arm64/boot/dts/qcom/sdm845-mtp.dts
> +++ b/arch/arm64/boot/dts/qcom/sdm845-mtp.dts
> @@ -7,6 +7,7 @@
>  
>  /dts-v1/;
>  
> +#include 
>  #include 
>  #include "sdm845.dtsi"
>  
> @@ -358,8 +359,16 @@
>   status = "okay";
>  };
>  
> - {
> - gpio-reserved-ranges = <0 4>, <81 4>;
> +_2 {
> + status = "okay";
> +
> + pinctrl-names = "default";
> + pinctrl-0 = <_clk _cmd _data _card_det_n>;
> +
> + vmmc-supply = <_l21a_2p95>;
> + vqmmc-supply = <_2>;
> +
> + cd-gpios = < 126 GPIO_ACTIVE_LOW>;
>  };
>  
>   {
> @@ -450,3 +459,48 @@
>   bias-pull-up;
>   };
>  };
> +
> + {
> + gpio-reserved-ranges = <0 4>, <81 4>;
> +
> + sdc2_clk: sdc2-clk {
> + pinconf {
> + pins = "sdc2_clk";
> + bias-disable;
> +
> + /*
> +  * It seems that mmc_test reports errors if drive
> +  * strength is not 16 on clk, cmd, and data pins.
> +  */
> + drive-strength = <16>;
> + };
> + };
> +
> + sdc2_cmd: sdc2-cmd {
> + pinconf {
> + pins = "sdc2_cmd";
> + bias-pull-up;
> + drive-strength = <16>;
> + };
> + };
> +
> + sdc2_data: sdc2-data {
> + pinconf {
> + pins = "sdc2_data";
> + bias-pull-up;
> + drive-strength = <16>;
> + };
> + };
> +
> + sd_card_det_n: sd-card-det-n {
> + pinmux {
> + pins = "gpio126";
> + function = "gpio";
> + };
> +
> + pinconf {
> + pins = "gpio126";
> + bias-pull-up;
> + };
> + };
> +};
> -- 
> 2.18.1
> 


Re: [PATCH v2 3/3] arm64: dts: qcom: sdm845: Add SD nodes for sdm845-mtp

2018-12-06 Thread Bjorn Andersson
On Thu 06 Dec 10:45 PST 2018, Evan Green wrote:

> Enable support for one of the micro SD slots on the MTP.
> 
> Signed-off-by: Evan Green 

Reviewed-by: Bjorn Andersson 

Regards,
Bjorn

> ---
> 
> Changes in v2:
>  - Fixed alphabetization of node placement in sdm845-mtp.dtsi (Doug)
>  - Fixed card detect name to match schematics (Doug).
>  - Moved comment about drive strength next to the drive-strength entry
>  (Doug)
>  - Removed drive-strength from card detect input pin (Doug).
>  - Consolidated tlmm nodes in MTP.
> 
>  arch/arm64/boot/dts/qcom/sdm845-mtp.dts | 58 -
>  1 file changed, 56 insertions(+), 2 deletions(-)
> 
> diff --git a/arch/arm64/boot/dts/qcom/sdm845-mtp.dts 
> b/arch/arm64/boot/dts/qcom/sdm845-mtp.dts
> index b3def03581775..cde76da42cbb7 100644
> --- a/arch/arm64/boot/dts/qcom/sdm845-mtp.dts
> +++ b/arch/arm64/boot/dts/qcom/sdm845-mtp.dts
> @@ -7,6 +7,7 @@
>  
>  /dts-v1/;
>  
> +#include 
>  #include 
>  #include "sdm845.dtsi"
>  
> @@ -358,8 +359,16 @@
>   status = "okay";
>  };
>  
> - {
> - gpio-reserved-ranges = <0 4>, <81 4>;
> +_2 {
> + status = "okay";
> +
> + pinctrl-names = "default";
> + pinctrl-0 = <_clk _cmd _data _card_det_n>;
> +
> + vmmc-supply = <_l21a_2p95>;
> + vqmmc-supply = <_2>;
> +
> + cd-gpios = < 126 GPIO_ACTIVE_LOW>;
>  };
>  
>   {
> @@ -450,3 +459,48 @@
>   bias-pull-up;
>   };
>  };
> +
> + {
> + gpio-reserved-ranges = <0 4>, <81 4>;
> +
> + sdc2_clk: sdc2-clk {
> + pinconf {
> + pins = "sdc2_clk";
> + bias-disable;
> +
> + /*
> +  * It seems that mmc_test reports errors if drive
> +  * strength is not 16 on clk, cmd, and data pins.
> +  */
> + drive-strength = <16>;
> + };
> + };
> +
> + sdc2_cmd: sdc2-cmd {
> + pinconf {
> + pins = "sdc2_cmd";
> + bias-pull-up;
> + drive-strength = <16>;
> + };
> + };
> +
> + sdc2_data: sdc2-data {
> + pinconf {
> + pins = "sdc2_data";
> + bias-pull-up;
> + drive-strength = <16>;
> + };
> + };
> +
> + sd_card_det_n: sd-card-det-n {
> + pinmux {
> + pins = "gpio126";
> + function = "gpio";
> + };
> +
> + pinconf {
> + pins = "gpio126";
> + bias-pull-up;
> + };
> + };
> +};
> -- 
> 2.18.1
> 


Re: [PATCH v2 2/3] arm64: dts: qcom: sdm845: Add SD node

2018-12-06 Thread Bjorn Andersson
On Thu 06 Dec 10:45 PST 2018, Evan Green wrote:

> Add one of the two SD controllers to SDM845.
> 
> Signed-off-by: Evan Green 
> Reviewed-by: Douglas Anderson 

Reviewed-by: Bjorn Andersson 

Regards,
Bjorn

> ---
> 
> Changes in v2:
>  - Reworded commit message to note that there are multiple SD
>  controllers.
> 
>  arch/arm64/boot/dts/qcom/sdm845.dtsi | 15 +++
>  1 file changed, 15 insertions(+)
> 
> diff --git a/arch/arm64/boot/dts/qcom/sdm845.dtsi 
> b/arch/arm64/boot/dts/qcom/sdm845.dtsi
> index 1419b0098cb38..bb8eacdf40910 100644
> --- a/arch/arm64/boot/dts/qcom/sdm845.dtsi
> +++ b/arch/arm64/boot/dts/qcom/sdm845.dtsi
> @@ -1078,6 +1078,21 @@
>   };
>   };
>  
> + sdhc_2: sdhci@8804000 {
> + compatible = "qcom,sdm845-sdhci", "qcom,sdhci-msm-v5";
> + reg = <0x8804000 0x1000>;
> +
> + interrupts = ,
> +  ;
> + interrupt-names = "hc_irq", "pwr_irq";
> +
> + clocks = < GCC_SDCC2_AHB_CLK>,
> +  < GCC_SDCC2_APPS_CLK>;
> + clock-names = "iface", "core";
> +
> + status = "disabled";
> + };
> +
>   usb_1_hsphy: phy@88e2000 {
>   compatible = "qcom,sdm845-qusb2-phy";
>   reg = <0x88e2000 0x400>;
> -- 
> 2.18.1
> 


Re: [PATCH v2 2/3] arm64: dts: qcom: sdm845: Add SD node

2018-12-06 Thread Bjorn Andersson
On Thu 06 Dec 10:45 PST 2018, Evan Green wrote:

> Add one of the two SD controllers to SDM845.
> 
> Signed-off-by: Evan Green 
> Reviewed-by: Douglas Anderson 

Reviewed-by: Bjorn Andersson 

Regards,
Bjorn

> ---
> 
> Changes in v2:
>  - Reworded commit message to note that there are multiple SD
>  controllers.
> 
>  arch/arm64/boot/dts/qcom/sdm845.dtsi | 15 +++
>  1 file changed, 15 insertions(+)
> 
> diff --git a/arch/arm64/boot/dts/qcom/sdm845.dtsi 
> b/arch/arm64/boot/dts/qcom/sdm845.dtsi
> index 1419b0098cb38..bb8eacdf40910 100644
> --- a/arch/arm64/boot/dts/qcom/sdm845.dtsi
> +++ b/arch/arm64/boot/dts/qcom/sdm845.dtsi
> @@ -1078,6 +1078,21 @@
>   };
>   };
>  
> + sdhc_2: sdhci@8804000 {
> + compatible = "qcom,sdm845-sdhci", "qcom,sdhci-msm-v5";
> + reg = <0x8804000 0x1000>;
> +
> + interrupts = ,
> +  ;
> + interrupt-names = "hc_irq", "pwr_irq";
> +
> + clocks = < GCC_SDCC2_AHB_CLK>,
> +  < GCC_SDCC2_APPS_CLK>;
> + clock-names = "iface", "core";
> +
> + status = "disabled";
> + };
> +
>   usb_1_hsphy: phy@88e2000 {
>   compatible = "qcom,sdm845-qusb2-phy";
>   reg = <0x88e2000 0x400>;
> -- 
> 2.18.1
> 


[PATCH] selftests/seccomp: Remove SIGSTOP si_pid check

2018-12-06 Thread Kees Cook
Commit f149b3155744 ("signal: Never allocate siginfo for SIGKILL or SIGSTOP")
means that the seccomp selftest cannot check si_pid under SIGSTOP anymore.
Since it's believed[1] there are no other userspace things depending on the
old behavior, this removes the behavioral check in the selftest, since it's
more a "extra" sanity check (which turns out, maybe, not to have been
useful to test).

[1] 
https://lkml.kernel.org/r/cagxu5jjazaozp1qfz66tyrtbuywqb+un2soa1vlhpccoiyv...@mail.gmail.com

Reported-by: Tycho Andersen 
Suggested-by: Eric W. Biederman 
Signed-off-by: Kees Cook 
---
Shuah, can you make sure that Linus gets this before v4.20 is released? Thanks!
---
 tools/testing/selftests/seccomp/seccomp_bpf.c | 9 +++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/tools/testing/selftests/seccomp/seccomp_bpf.c 
b/tools/testing/selftests/seccomp/seccomp_bpf.c
index e1473234968d..c9a2abf8be1b 100644
--- a/tools/testing/selftests/seccomp/seccomp_bpf.c
+++ b/tools/testing/selftests/seccomp/seccomp_bpf.c
@@ -2731,9 +2731,14 @@ TEST(syscall_restart)
ASSERT_EQ(child_pid, waitpid(child_pid, , 0));
ASSERT_EQ(true, WIFSTOPPED(status));
ASSERT_EQ(SIGSTOP, WSTOPSIG(status));
-   /* Verify signal delivery came from parent now. */
ASSERT_EQ(0, ptrace(PTRACE_GETSIGINFO, child_pid, NULL, ));
-   EXPECT_EQ(getpid(), info.si_pid);
+   /*
+* There is no siginfo on SIGSTOP any more, so we can't verify
+* signal delivery came from parent now (getpid() == info.si_pid).
+* 
https://lkml.kernel.org/r/cagxu5jjazaozp1qfz66tyrtbuywqb+un2soa1vlhpccoiyv...@mail.gmail.com
+* At least verify the SIGSTOP via PTRACE_GETSIGINFO.
+*/
+   EXPECT_EQ(SIGSTOP, info.si_signo);
 
/* Restart nanosleep with SIGCONT, which triggers restart_syscall. */
ASSERT_EQ(0, kill(child_pid, SIGCONT));
-- 
2.17.1


-- 
Kees Cook


[PATCH] selftests/seccomp: Remove SIGSTOP si_pid check

2018-12-06 Thread Kees Cook
Commit f149b3155744 ("signal: Never allocate siginfo for SIGKILL or SIGSTOP")
means that the seccomp selftest cannot check si_pid under SIGSTOP anymore.
Since it's believed[1] there are no other userspace things depending on the
old behavior, this removes the behavioral check in the selftest, since it's
more a "extra" sanity check (which turns out, maybe, not to have been
useful to test).

[1] 
https://lkml.kernel.org/r/cagxu5jjazaozp1qfz66tyrtbuywqb+un2soa1vlhpccoiyv...@mail.gmail.com

Reported-by: Tycho Andersen 
Suggested-by: Eric W. Biederman 
Signed-off-by: Kees Cook 
---
Shuah, can you make sure that Linus gets this before v4.20 is released? Thanks!
---
 tools/testing/selftests/seccomp/seccomp_bpf.c | 9 +++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/tools/testing/selftests/seccomp/seccomp_bpf.c 
b/tools/testing/selftests/seccomp/seccomp_bpf.c
index e1473234968d..c9a2abf8be1b 100644
--- a/tools/testing/selftests/seccomp/seccomp_bpf.c
+++ b/tools/testing/selftests/seccomp/seccomp_bpf.c
@@ -2731,9 +2731,14 @@ TEST(syscall_restart)
ASSERT_EQ(child_pid, waitpid(child_pid, , 0));
ASSERT_EQ(true, WIFSTOPPED(status));
ASSERT_EQ(SIGSTOP, WSTOPSIG(status));
-   /* Verify signal delivery came from parent now. */
ASSERT_EQ(0, ptrace(PTRACE_GETSIGINFO, child_pid, NULL, ));
-   EXPECT_EQ(getpid(), info.si_pid);
+   /*
+* There is no siginfo on SIGSTOP any more, so we can't verify
+* signal delivery came from parent now (getpid() == info.si_pid).
+* 
https://lkml.kernel.org/r/cagxu5jjazaozp1qfz66tyrtbuywqb+un2soa1vlhpccoiyv...@mail.gmail.com
+* At least verify the SIGSTOP via PTRACE_GETSIGINFO.
+*/
+   EXPECT_EQ(SIGSTOP, info.si_signo);
 
/* Restart nanosleep with SIGCONT, which triggers restart_syscall. */
ASSERT_EQ(0, kill(child_pid, SIGCONT));
-- 
2.17.1


-- 
Kees Cook


Re: [RFC PATCH 00/14] Heterogeneous Memory System (HMS) and hbind()

2018-12-06 Thread Logan Gunthorpe



On 2018-12-06 4:38 p.m., Dave Hansen wrote:
> On 12/6/18 3:28 PM, Logan Gunthorpe wrote:
>> I didn't think this was meant to describe actual real world performance
>> between all of the links. If that's the case all of this seems like a
>> pipe dream to me.
> 
> The HMAT discussions (that I was a part of at least) settled on just
> trying to describe what we called "sticker speed".  Nobody had an
> expectation that you *really* had to measure everything.
> 
> The best we can do for any of these approaches is approximate things.

Yes, though there's a lot of caveats in this assumption alone.
Specifically with PCI: the bus may run at however many GB/s but P2P
through a CPU's root complexes can slow down significantly (like down to
MB/s).

I've seen similar things across QPI: I can sometimes do P2P from
PCI->QPI->PCI but the performance doesn't even come close to the sticker
speed of any of those buses.

I'm not sure how anyone is going to deal with those issues, but it does
firmly place us in world view #2 instead of #1. But, yes, I agree
exposing information like in #2 full out to userspace, especially
through sysfs, seems like a nightmare and I don't see anything in HMS to
help with that. Providing an API to ask for memory (or another resource)
that's accessible by a set of initiators and with a set of requirements
for capabilities seems more manageable.

Logan


[PATCH] scsi: qla2xxx: deadlock by configfs_depend_item

2018-12-06 Thread Anatoliy Glagolev
The intent of invoking configfs_depend_item in commit 7474f52a82d51
("tcm_qla2xxx: Perform configfs depend/undepend for base_tpg")
was to prevent a physical Fibre Channel port removal when
virtual (NPIV) ports announced through that physical port are active.
The change does not work as expected: it makes enabled physical port
dependent on target configfs subsystem (the port's parent), something
the configfs guarantees anyway.

Besides, scheduling work in a worker thread and waiting for the work's
completion is not really a valid workaround for the requirement not
to call configfs_depend_item from a configfs callback: the call
occasionally deadlocks.

Thus, removing configfs_depend_item calls does not break anything
and fixes the deadlock problem.

Signed-off-by: Anatoliy Glagolev 
---
 drivers/scsi/qla2xxx/tcm_qla2xxx.c | 48 +++---
 drivers/scsi/qla2xxx/tcm_qla2xxx.h |  3 ---
 2 files changed, 8 insertions(+), 43 deletions(-)

diff --git a/drivers/scsi/qla2xxx/tcm_qla2xxx.c 
b/drivers/scsi/qla2xxx/tcm_qla2xxx.c
index 7732e93..1c7ca84 100644
--- a/drivers/scsi/qla2xxx/tcm_qla2xxx.c
+++ b/drivers/scsi/qla2xxx/tcm_qla2xxx.c
@@ -929,38 +929,14 @@ static ssize_t tcm_qla2xxx_tpg_enable_show(struct 
config_item *item,
atomic_read(>lport_tpg_enabled));
 }
 
-static void tcm_qla2xxx_depend_tpg(struct work_struct *work)
-{
-   struct tcm_qla2xxx_tpg *base_tpg = container_of(work,
-   struct tcm_qla2xxx_tpg, tpg_base_work);
-   struct se_portal_group *se_tpg = _tpg->se_tpg;
-   struct scsi_qla_host *base_vha = base_tpg->lport->qla_vha;
-
-   if (!target_depend_item(_tpg->tpg_group.cg_item)) {
-   atomic_set(_tpg->lport_tpg_enabled, 1);
-   qlt_enable_vha(base_vha);
-   }
-   complete(_tpg->tpg_base_comp);
-}
-
-static void tcm_qla2xxx_undepend_tpg(struct work_struct *work)
-{
-   struct tcm_qla2xxx_tpg *base_tpg = container_of(work,
-   struct tcm_qla2xxx_tpg, tpg_base_work);
-   struct se_portal_group *se_tpg = _tpg->se_tpg;
-   struct scsi_qla_host *base_vha = base_tpg->lport->qla_vha;
-
-   if (!qlt_stop_phase1(base_vha->vha_tgt.qla_tgt)) {
-   atomic_set(_tpg->lport_tpg_enabled, 0);
-   target_undepend_item(_tpg->tpg_group.cg_item);
-   }
-   complete(_tpg->tpg_base_comp);
-}
-
 static ssize_t tcm_qla2xxx_tpg_enable_store(struct config_item *item,
const char *page, size_t count)
 {
struct se_portal_group *se_tpg = to_tpg(item);
+   struct se_wwn *se_wwn = se_tpg->se_tpg_wwn;
+   struct tcm_qla2xxx_lport *lport = container_of(se_wwn,
+   struct tcm_qla2xxx_lport, lport_wwn);
+   struct scsi_qla_host *vha = lport->qla_vha;
struct tcm_qla2xxx_tpg *tpg = container_of(se_tpg,
struct tcm_qla2xxx_tpg, se_tpg);
unsigned long op;
@@ -979,24 +955,16 @@ static ssize_t tcm_qla2xxx_tpg_enable_store(struct 
config_item *item,
if (atomic_read(>lport_tpg_enabled))
return -EEXIST;
 
-   INIT_WORK(>tpg_base_work, tcm_qla2xxx_depend_tpg);
+   atomic_set(>lport_tpg_enabled, 1);
+   qlt_enable_vha(vha);
} else {
if (!atomic_read(>lport_tpg_enabled))
return count;
 
-   INIT_WORK(>tpg_base_work, tcm_qla2xxx_undepend_tpg);
+   atomic_set(>lport_tpg_enabled, 0);
+   qlt_stop_phase1(vha->vha_tgt.qla_tgt);
}
-   init_completion(>tpg_base_comp);
-   schedule_work(>tpg_base_work);
-   wait_for_completion(>tpg_base_comp);
 
-   if (op) {
-   if (!atomic_read(>lport_tpg_enabled))
-   return -ENODEV;
-   } else {
-   if (atomic_read(>lport_tpg_enabled))
-   return -EPERM;
-   }
return count;
 }
 
diff --git a/drivers/scsi/qla2xxx/tcm_qla2xxx.h 
b/drivers/scsi/qla2xxx/tcm_qla2xxx.h
index 7550ba2..147cf6c 100644
--- a/drivers/scsi/qla2xxx/tcm_qla2xxx.h
+++ b/drivers/scsi/qla2xxx/tcm_qla2xxx.h
@@ -48,9 +48,6 @@ struct tcm_qla2xxx_tpg {
struct tcm_qla2xxx_tpg_attrib tpg_attrib;
/* Returned by tcm_qla2xxx_make_tpg() */
struct se_portal_group se_tpg;
-   /* Items for dealing with configfs_depend_item */
-   struct completion tpg_base_comp;
-   struct work_struct tpg_base_work;
 };
 
 struct tcm_qla2xxx_fc_loopid {
-- 
1.9.1



Re: [RFC PATCH 00/14] Heterogeneous Memory System (HMS) and hbind()

2018-12-06 Thread Logan Gunthorpe



On 2018-12-06 4:38 p.m., Dave Hansen wrote:
> On 12/6/18 3:28 PM, Logan Gunthorpe wrote:
>> I didn't think this was meant to describe actual real world performance
>> between all of the links. If that's the case all of this seems like a
>> pipe dream to me.
> 
> The HMAT discussions (that I was a part of at least) settled on just
> trying to describe what we called "sticker speed".  Nobody had an
> expectation that you *really* had to measure everything.
> 
> The best we can do for any of these approaches is approximate things.

Yes, though there's a lot of caveats in this assumption alone.
Specifically with PCI: the bus may run at however many GB/s but P2P
through a CPU's root complexes can slow down significantly (like down to
MB/s).

I've seen similar things across QPI: I can sometimes do P2P from
PCI->QPI->PCI but the performance doesn't even come close to the sticker
speed of any of those buses.

I'm not sure how anyone is going to deal with those issues, but it does
firmly place us in world view #2 instead of #1. But, yes, I agree
exposing information like in #2 full out to userspace, especially
through sysfs, seems like a nightmare and I don't see anything in HMS to
help with that. Providing an API to ask for memory (or another resource)
that's accessible by a set of initiators and with a set of requirements
for capabilities seems more manageable.

Logan


[PATCH] scsi: qla2xxx: deadlock by configfs_depend_item

2018-12-06 Thread Anatoliy Glagolev
The intent of invoking configfs_depend_item in commit 7474f52a82d51
("tcm_qla2xxx: Perform configfs depend/undepend for base_tpg")
was to prevent a physical Fibre Channel port removal when
virtual (NPIV) ports announced through that physical port are active.
The change does not work as expected: it makes enabled physical port
dependent on target configfs subsystem (the port's parent), something
the configfs guarantees anyway.

Besides, scheduling work in a worker thread and waiting for the work's
completion is not really a valid workaround for the requirement not
to call configfs_depend_item from a configfs callback: the call
occasionally deadlocks.

Thus, removing configfs_depend_item calls does not break anything
and fixes the deadlock problem.

Signed-off-by: Anatoliy Glagolev 
---
 drivers/scsi/qla2xxx/tcm_qla2xxx.c | 48 +++---
 drivers/scsi/qla2xxx/tcm_qla2xxx.h |  3 ---
 2 files changed, 8 insertions(+), 43 deletions(-)

diff --git a/drivers/scsi/qla2xxx/tcm_qla2xxx.c 
b/drivers/scsi/qla2xxx/tcm_qla2xxx.c
index 7732e93..1c7ca84 100644
--- a/drivers/scsi/qla2xxx/tcm_qla2xxx.c
+++ b/drivers/scsi/qla2xxx/tcm_qla2xxx.c
@@ -929,38 +929,14 @@ static ssize_t tcm_qla2xxx_tpg_enable_show(struct 
config_item *item,
atomic_read(>lport_tpg_enabled));
 }
 
-static void tcm_qla2xxx_depend_tpg(struct work_struct *work)
-{
-   struct tcm_qla2xxx_tpg *base_tpg = container_of(work,
-   struct tcm_qla2xxx_tpg, tpg_base_work);
-   struct se_portal_group *se_tpg = _tpg->se_tpg;
-   struct scsi_qla_host *base_vha = base_tpg->lport->qla_vha;
-
-   if (!target_depend_item(_tpg->tpg_group.cg_item)) {
-   atomic_set(_tpg->lport_tpg_enabled, 1);
-   qlt_enable_vha(base_vha);
-   }
-   complete(_tpg->tpg_base_comp);
-}
-
-static void tcm_qla2xxx_undepend_tpg(struct work_struct *work)
-{
-   struct tcm_qla2xxx_tpg *base_tpg = container_of(work,
-   struct tcm_qla2xxx_tpg, tpg_base_work);
-   struct se_portal_group *se_tpg = _tpg->se_tpg;
-   struct scsi_qla_host *base_vha = base_tpg->lport->qla_vha;
-
-   if (!qlt_stop_phase1(base_vha->vha_tgt.qla_tgt)) {
-   atomic_set(_tpg->lport_tpg_enabled, 0);
-   target_undepend_item(_tpg->tpg_group.cg_item);
-   }
-   complete(_tpg->tpg_base_comp);
-}
-
 static ssize_t tcm_qla2xxx_tpg_enable_store(struct config_item *item,
const char *page, size_t count)
 {
struct se_portal_group *se_tpg = to_tpg(item);
+   struct se_wwn *se_wwn = se_tpg->se_tpg_wwn;
+   struct tcm_qla2xxx_lport *lport = container_of(se_wwn,
+   struct tcm_qla2xxx_lport, lport_wwn);
+   struct scsi_qla_host *vha = lport->qla_vha;
struct tcm_qla2xxx_tpg *tpg = container_of(se_tpg,
struct tcm_qla2xxx_tpg, se_tpg);
unsigned long op;
@@ -979,24 +955,16 @@ static ssize_t tcm_qla2xxx_tpg_enable_store(struct 
config_item *item,
if (atomic_read(>lport_tpg_enabled))
return -EEXIST;
 
-   INIT_WORK(>tpg_base_work, tcm_qla2xxx_depend_tpg);
+   atomic_set(>lport_tpg_enabled, 1);
+   qlt_enable_vha(vha);
} else {
if (!atomic_read(>lport_tpg_enabled))
return count;
 
-   INIT_WORK(>tpg_base_work, tcm_qla2xxx_undepend_tpg);
+   atomic_set(>lport_tpg_enabled, 0);
+   qlt_stop_phase1(vha->vha_tgt.qla_tgt);
}
-   init_completion(>tpg_base_comp);
-   schedule_work(>tpg_base_work);
-   wait_for_completion(>tpg_base_comp);
 
-   if (op) {
-   if (!atomic_read(>lport_tpg_enabled))
-   return -ENODEV;
-   } else {
-   if (atomic_read(>lport_tpg_enabled))
-   return -EPERM;
-   }
return count;
 }
 
diff --git a/drivers/scsi/qla2xxx/tcm_qla2xxx.h 
b/drivers/scsi/qla2xxx/tcm_qla2xxx.h
index 7550ba2..147cf6c 100644
--- a/drivers/scsi/qla2xxx/tcm_qla2xxx.h
+++ b/drivers/scsi/qla2xxx/tcm_qla2xxx.h
@@ -48,9 +48,6 @@ struct tcm_qla2xxx_tpg {
struct tcm_qla2xxx_tpg_attrib tpg_attrib;
/* Returned by tcm_qla2xxx_make_tpg() */
struct se_portal_group se_tpg;
-   /* Items for dealing with configfs_depend_item */
-   struct completion tpg_base_comp;
-   struct work_struct tpg_base_work;
 };
 
 struct tcm_qla2xxx_fc_loopid {
-- 
1.9.1



Re: MADV_HUGEPAGE vs. NUMA semantic (was: Re: [LKP] [mm] ac5b2c1891: vm-scalability.throughput -61.3% regression)

2018-12-06 Thread David Rientjes
On Thu, 6 Dec 2018, Michal Hocko wrote:

> MADV_HUGEPAGE changes the picture because the caller expressed a need
> for THP and is willing to go extra mile to get it. That involves
> allocation latency and as of now also a potential remote access. We do
> not have complete agreement on the later but the prevailing argument is
> that any strong NUMA locality is just reinventing node-reclaim story
> again or makes THP success rate down the toilet (to quote Mel). I agree
> that we do not want to fallback to a remote node overeagerly. I believe
> that something like the below would be sensible
>   1) THP on a local node with compaction not giving up too early
>   2) THP on a remote node in NOWAIT mode - so no direct
>  compaction/reclaim (trigger kswapd/kcompactd only for
>  defrag=defer+madvise)
>   3) fallback to the base page allocation
> 

I disagree that MADV_HUGEPAGE should take on any new semantic that 
overrides the preference of node local memory for a hugepage, which is the 
nearly four year behavior.  The order of MADV_HUGEPAGE preferences listed 
above would cause current users to regress who rely on local small page 
fallback rather than remote hugepages because the access latency is much 
better.  I think the preference of remote hugepages over local small pages 
needs to be expressed differently to prevent regression.


Re: MADV_HUGEPAGE vs. NUMA semantic (was: Re: [LKP] [mm] ac5b2c1891: vm-scalability.throughput -61.3% regression)

2018-12-06 Thread David Rientjes
On Thu, 6 Dec 2018, Michal Hocko wrote:

> MADV_HUGEPAGE changes the picture because the caller expressed a need
> for THP and is willing to go extra mile to get it. That involves
> allocation latency and as of now also a potential remote access. We do
> not have complete agreement on the later but the prevailing argument is
> that any strong NUMA locality is just reinventing node-reclaim story
> again or makes THP success rate down the toilet (to quote Mel). I agree
> that we do not want to fallback to a remote node overeagerly. I believe
> that something like the below would be sensible
>   1) THP on a local node with compaction not giving up too early
>   2) THP on a remote node in NOWAIT mode - so no direct
>  compaction/reclaim (trigger kswapd/kcompactd only for
>  defrag=defer+madvise)
>   3) fallback to the base page allocation
> 

I disagree that MADV_HUGEPAGE should take on any new semantic that 
overrides the preference of node local memory for a hugepage, which is the 
nearly four year behavior.  The order of MADV_HUGEPAGE preferences listed 
above would cause current users to regress who rely on local small page 
fallback rather than remote hugepages because the access latency is much 
better.  I think the preference of remote hugepages over local small pages 
needs to be expressed differently to prevent regression.


Re: [PATCH 2/3] dt-bindings: drm/panel: simple: add support for PDA 91-00156-A0

2018-12-06 Thread Rob Herring
On Wed, Nov 21, 2018 at 08:48:00AM +, eugen.hris...@microchip.com wrote:
> From: Cristian Birsan 
> 
> PDA 91-00156-A0 5.0 is a 5.0" WVGA TFT LCD panel.
> This panel with backlight is found in PDA 5" LCD screen (TM5000 series or
> AC320005-5).
> Adding Device Tree bindings for this panel.
> 
> Signed-off-by: Cristian Birsan 
> ---
>  .../devicetree/bindings/display/panel/pda,91-00156-a0.txt  | 7 
> +++
>  1 file changed, 7 insertions(+)
>  create mode 100644 
> Documentation/devicetree/bindings/display/panel/pda,91-00156-a0.txt
> 
> diff --git 
> a/Documentation/devicetree/bindings/display/panel/pda,91-00156-a0.txt 
> b/Documentation/devicetree/bindings/display/panel/pda,91-00156-a0.txt
> new file mode 100644
> index 000..52f0da9
> --- /dev/null
> +++ b/Documentation/devicetree/bindings/display/panel/pda,91-00156-a0.txt
> @@ -0,0 +1,7 @@
> +PDA 91-00156-A0 5.0" WVGA TFT LCD panel
> +
> +Required properties:
> +- compatible: should be "pda,91-00156-a0"
> +
> +This binding is compatible with the simple-panel binding, which is specified
> +in simple-panel.txt in this directory.

You need to explicitly list what properties from this are used. Does 
this have a single supply or did you just forget to add supplies for 
example? I don't know.

Rob


[PATCH 1/6] arm64: kpti: move check for non-vulnerable CPUs to a function

2018-12-06 Thread Jeremy Linton
From: Mian Yousaf Kaukab 

Add is_meltdown_safe() which is a whitelist of known safe cores.

Signed-off-by: Mian Yousaf Kaukab 
[Moved location of function]
Signed-off-by: Jeremy Linton 
---
 arch/arm64/kernel/cpufeature.c | 16 
 1 file changed, 12 insertions(+), 4 deletions(-)

diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c
index aec5ecb85737..242898395f68 100644
--- a/arch/arm64/kernel/cpufeature.c
+++ b/arch/arm64/kernel/cpufeature.c
@@ -908,8 +908,7 @@ has_useable_cnp(const struct arm64_cpu_capabilities *entry, 
int scope)
 #ifdef CONFIG_UNMAP_KERNEL_AT_EL0
 static int __kpti_forced; /* 0: not forced, >0: forced on, <0: forced off */
 
-static bool unmap_kernel_at_el0(const struct arm64_cpu_capabilities *entry,
-   int scope)
+static bool is_cpu_meltdown_safe(void)
 {
/* List of CPUs that are not vulnerable and don't need KPTI */
static const struct midr_range kpti_safe_list[] = {
@@ -917,6 +916,16 @@ static bool unmap_kernel_at_el0(const struct 
arm64_cpu_capabilities *entry,
MIDR_ALL_VERSIONS(MIDR_BRCM_VULCAN),
{ /* sentinel */ }
};
+   /* Don't force KPTI for CPUs that are not vulnerable */
+   if (is_midr_in_range_list(read_cpuid_id(), kpti_safe_list))
+   return true;
+
+   return false;
+}
+
+static bool unmap_kernel_at_el0(const struct arm64_cpu_capabilities *entry,
+   int scope)
+{
char const *str = "command line option";
 
/*
@@ -940,8 +949,7 @@ static bool unmap_kernel_at_el0(const struct 
arm64_cpu_capabilities *entry,
if (IS_ENABLED(CONFIG_RANDOMIZE_BASE))
return true;
 
-   /* Don't force KPTI for CPUs that are not vulnerable */
-   if (is_midr_in_range_list(read_cpuid_id(), kpti_safe_list))
+   if (is_cpu_meltdown_safe())
return false;
 
/* Defer to CPU feature registers */
-- 
2.17.2



[PATCH 1/6] arm64: kpti: move check for non-vulnerable CPUs to a function

2018-12-06 Thread Jeremy Linton
From: Mian Yousaf Kaukab 

Add is_meltdown_safe() which is a whitelist of known safe cores.

Signed-off-by: Mian Yousaf Kaukab 
[Moved location of function]
Signed-off-by: Jeremy Linton 
---
 arch/arm64/kernel/cpufeature.c | 16 
 1 file changed, 12 insertions(+), 4 deletions(-)

diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c
index aec5ecb85737..242898395f68 100644
--- a/arch/arm64/kernel/cpufeature.c
+++ b/arch/arm64/kernel/cpufeature.c
@@ -908,8 +908,7 @@ has_useable_cnp(const struct arm64_cpu_capabilities *entry, 
int scope)
 #ifdef CONFIG_UNMAP_KERNEL_AT_EL0
 static int __kpti_forced; /* 0: not forced, >0: forced on, <0: forced off */
 
-static bool unmap_kernel_at_el0(const struct arm64_cpu_capabilities *entry,
-   int scope)
+static bool is_cpu_meltdown_safe(void)
 {
/* List of CPUs that are not vulnerable and don't need KPTI */
static const struct midr_range kpti_safe_list[] = {
@@ -917,6 +916,16 @@ static bool unmap_kernel_at_el0(const struct 
arm64_cpu_capabilities *entry,
MIDR_ALL_VERSIONS(MIDR_BRCM_VULCAN),
{ /* sentinel */ }
};
+   /* Don't force KPTI for CPUs that are not vulnerable */
+   if (is_midr_in_range_list(read_cpuid_id(), kpti_safe_list))
+   return true;
+
+   return false;
+}
+
+static bool unmap_kernel_at_el0(const struct arm64_cpu_capabilities *entry,
+   int scope)
+{
char const *str = "command line option";
 
/*
@@ -940,8 +949,7 @@ static bool unmap_kernel_at_el0(const struct 
arm64_cpu_capabilities *entry,
if (IS_ENABLED(CONFIG_RANDOMIZE_BASE))
return true;
 
-   /* Don't force KPTI for CPUs that are not vulnerable */
-   if (is_midr_in_range_list(read_cpuid_id(), kpti_safe_list))
+   if (is_cpu_meltdown_safe())
return false;
 
/* Defer to CPU feature registers */
-- 
2.17.2



Re: [PATCH 1/3] dt-bindings: add vendor prefix for PDA Precision Design Associates, Inc.

2018-12-06 Thread Rob Herring
On Wed, 21 Nov 2018 08:47:57 +,  wrote:
> Precision Design Associates, Inc. (PDA) manufactures standard and custom
> capacitive touch screens, LCD's embedded controllers and custom embedded
> software. They specialize in industrial, rugged and outdoor applications.
> Website: http://www.pdaatl.com/
> 
> Signed-off-by: Eugen Hristev 
> ---
>  Documentation/devicetree/bindings/vendor-prefixes.txt | 1 +
>  1 file changed, 1 insertion(+)
> 

Reviewed-by: Rob Herring 


[PATCH 2/6] arm64: add sysfs vulnerability show for meltdown

2018-12-06 Thread Jeremy Linton
Add a simple state machine which will track whether
all the online cores in a machine are vulnerable.

Once that is done we have a fairly authoritative view
of the machine vulnerability, which allows us to make a
judgment about machine safety if it hasn't been mitigated.

Signed-off-by: Jeremy Linton 
---
 arch/arm64/kernel/cpufeature.c | 31 ++-
 1 file changed, 26 insertions(+), 5 deletions(-)

diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c
index 242898395f68..bea9adfef7fa 100644
--- a/arch/arm64/kernel/cpufeature.c
+++ b/arch/arm64/kernel/cpufeature.c
@@ -905,6 +905,8 @@ has_useable_cnp(const struct arm64_cpu_capabilities *entry, 
int scope)
return has_cpuid_feature(entry, scope);
 }
 
+static enum { A64_MELT_UNSET, A64_MELT_SAFE, A64_MELT_UNKN } __meltdown_safe = 
A64_MELT_UNSET;
+
 #ifdef CONFIG_UNMAP_KERNEL_AT_EL0
 static int __kpti_forced; /* 0: not forced, >0: forced on, <0: forced off */
 
@@ -928,6 +930,15 @@ static bool unmap_kernel_at_el0(const struct 
arm64_cpu_capabilities *entry,
 {
char const *str = "command line option";
 
+   bool meltdown_safe = is_cpu_meltdown_safe() ||
+   has_cpuid_feature(entry, scope);
+
+   /* Only safe if all booted cores are known safe */
+   if (meltdown_safe && __meltdown_safe == A64_MELT_UNSET)
+   __meltdown_safe = A64_MELT_SAFE;
+   else if (!meltdown_safe)
+   __meltdown_safe = A64_MELT_UNKN;
+
/*
 * For reasons that aren't entirely clear, enabling KPTI on Cavium
 * ThunderX leads to apparent I-cache corruption of kernel text, which
@@ -949,11 +960,7 @@ static bool unmap_kernel_at_el0(const struct 
arm64_cpu_capabilities *entry,
if (IS_ENABLED(CONFIG_RANDOMIZE_BASE))
return true;
 
-   if (is_cpu_meltdown_safe())
-   return false;
-
-   /* Defer to CPU feature registers */
-   return !has_cpuid_feature(entry, scope);
+   return !meltdown_safe;
 }
 
 static void
@@ -1920,3 +1927,17 @@ static int __init enable_mrs_emulation(void)
 }
 
 core_initcall(enable_mrs_emulation);
+
+#ifdef CONFIG_GENERIC_CPU_VULNERABILITIES
+ssize_t cpu_show_meltdown(struct device *dev, struct device_attribute *attr,
+   char *buf)
+{
+   if (arm64_kernel_unmapped_at_el0())
+   return sprintf(buf, "Mitigation: KPTI\n");
+
+   if (__meltdown_safe == A64_MELT_SAFE)
+   return sprintf(buf, "Not affected\n");
+
+   return sprintf(buf, "Unknown\n");
+}
+#endif
-- 
2.17.2



[PATCH 3/6] arm64: add sysfs vulnerability show for spectre v1

2018-12-06 Thread Jeremy Linton
From: Mian Yousaf Kaukab 

spectre v1, has been mitigated, and the mitigation is
always active.

Signed-off-by: Mian Yousaf Kaukab 
Signed-off-by: Jeremy Linton 
---
 arch/arm64/kernel/cpu_errata.c | 10 ++
 1 file changed, 10 insertions(+)

diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c
index 6ad715d67df8..559ecdee6fd2 100644
--- a/arch/arm64/kernel/cpu_errata.c
+++ b/arch/arm64/kernel/cpu_errata.c
@@ -757,3 +757,13 @@ const struct arm64_cpu_capabilities arm64_errata[] = {
{
}
 };
+
+#ifdef CONFIG_GENERIC_CPU_VULNERABILITIES
+
+ssize_t cpu_show_spectre_v1(struct device *dev, struct device_attribute *attr,
+   char *buf)
+{
+   return sprintf(buf, "Mitigation: __user pointer sanitization\n");
+}
+
+#endif
-- 
2.17.2



[PATCH 4/6] arm64: add sysfs vulnerability show for spectre v2

2018-12-06 Thread Jeremy Linton
Add code to track whether all the cores in the machine are
vulnerable, and whether all the vulnerable cores have been
mitigated.

Once we have that information we can add the sysfs stub and
provide an accurate view of what is known about the machine.

Signed-off-by: Jeremy Linton 
---
 arch/arm64/kernel/cpu_errata.c | 72 +++---
 1 file changed, 67 insertions(+), 5 deletions(-)

diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c
index 559ecdee6fd2..6505c93d507e 100644
--- a/arch/arm64/kernel/cpu_errata.c
+++ b/arch/arm64/kernel/cpu_errata.c
@@ -109,6 +109,11 @@ cpu_enable_trap_ctr_access(const struct 
arm64_cpu_capabilities *__unused)
 
 atomic_t arm64_el2_vector_last_slot = ATOMIC_INIT(-1);
 
+#if defined(CONFIG_HARDEN_BRANCH_PREDICTOR) || 
defined(CONFIG_GENERIC_CPU_VULNERABILITIES)
+/* Track overall mitigation state. We are only mitigated if all cores are ok */
+static enum { A64_HBP_UNSET, A64_HBP_MIT, A64_HBP_NOTMIT } __hardenbp_enab = 
A64_HBP_UNSET;
+#endif
+
 #ifdef CONFIG_HARDEN_BRANCH_PREDICTOR
 #include 
 #include 
@@ -231,15 +236,19 @@ enable_smccc_arch_workaround_1(const struct 
arm64_cpu_capabilities *entry)
if (!entry->matches(entry, SCOPE_LOCAL_CPU))
return;
 
-   if (psci_ops.smccc_version == SMCCC_VERSION_1_0)
+   if (psci_ops.smccc_version == SMCCC_VERSION_1_0) {
+   __hardenbp_enab = A64_HBP_NOTMIT;
return;
+   }
 
switch (psci_ops.conduit) {
case PSCI_CONDUIT_HVC:
arm_smccc_1_1_hvc(ARM_SMCCC_ARCH_FEATURES_FUNC_ID,
  ARM_SMCCC_ARCH_WORKAROUND_1, );
-   if ((int)res.a0 < 0)
+   if ((int)res.a0 < 0) {
+   __hardenbp_enab = A64_HBP_NOTMIT;
return;
+   }
cb = call_hvc_arch_workaround_1;
/* This is a guest, no need to patch KVM vectors */
smccc_start = NULL;
@@ -249,14 +258,17 @@ enable_smccc_arch_workaround_1(const struct 
arm64_cpu_capabilities *entry)
case PSCI_CONDUIT_SMC:
arm_smccc_1_1_smc(ARM_SMCCC_ARCH_FEATURES_FUNC_ID,
  ARM_SMCCC_ARCH_WORKAROUND_1, );
-   if ((int)res.a0 < 0)
+   if ((int)res.a0 < 0) {
+   __hardenbp_enab = A64_HBP_NOTMIT;
return;
+   }
cb = call_smc_arch_workaround_1;
smccc_start = __smccc_workaround_1_smc_start;
smccc_end = __smccc_workaround_1_smc_end;
break;
 
default:
+   __hardenbp_enab = A64_HBP_NOTMIT;
return;
}
 
@@ -266,6 +278,9 @@ enable_smccc_arch_workaround_1(const struct 
arm64_cpu_capabilities *entry)
 
install_bp_hardening_cb(entry, cb, smccc_start, smccc_end);
 
+   if (__hardenbp_enab == A64_HBP_UNSET)
+   __hardenbp_enab = A64_HBP_MIT;
+
return;
 }
 #endif /* CONFIG_HARDEN_BRANCH_PREDICTOR */
@@ -539,7 +554,36 @@ multi_entry_cap_cpu_enable(const struct 
arm64_cpu_capabilities *entry)
caps->cpu_enable(caps);
 }
 
-#ifdef CONFIG_HARDEN_BRANCH_PREDICTOR
+#if defined(CONFIG_HARDEN_BRANCH_PREDICTOR) || \
+   defined(CONFIG_GENERIC_CPU_VULNERABILITIES)
+
+static enum { A64_SV2_UNSET, A64_SV2_SAFE, A64_SV2_UNSAFE } __spectrev2_safe = 
A64_SV2_UNSET;
+
+/*
+ * Track overall bp hardening for all heterogeneous cores in the machine.
+ * We are only considered "safe" if all booted cores are known safe.
+ */
+static bool __maybe_unused
+check_branch_predictor(const struct arm64_cpu_capabilities *entry, int scope)
+{
+   bool is_vul;
+   bool has_csv2;
+   u64 pfr0;
+
+   WARN_ON(scope != SCOPE_LOCAL_CPU || preemptible());
+
+   is_vul = is_midr_in_range_list(read_cpuid_id(), entry->midr_range_list);
+
+   pfr0 = read_cpuid(ID_AA64PFR0_EL1);
+   has_csv2 = cpuid_feature_extract_unsigned_field(pfr0, 
ID_AA64PFR0_CSV2_SHIFT);
+
+   if (is_vul)
+   __spectrev2_safe = A64_SV2_UNSAFE;
+   else if (__spectrev2_safe == A64_SV2_UNSET && has_csv2)
+   __spectrev2_safe = A64_SV2_SAFE;
+
+   return is_vul;
+}
 
 /*
  * List of CPUs where we need to issue a psci call to
@@ -728,7 +772,9 @@ const struct arm64_cpu_capabilities arm64_errata[] = {
{
.capability = ARM64_HARDEN_BRANCH_PREDICTOR,
.cpu_enable = enable_smccc_arch_workaround_1,
-   ERRATA_MIDR_RANGE_LIST(arm64_bp_harden_smccc_cpus),
+   .type = ARM64_CPUCAP_LOCAL_CPU_ERRATUM,
+   .matches = check_branch_predictor,
+   .midr_range_list = arm64_bp_harden_smccc_cpus,
},
 #endif
 #ifdef CONFIG_HARDEN_EL2_VECTORS
@@ -766,4 +812,20 @@ ssize_t cpu_show_spectre_v1(struct device *dev, struct 
device_attribute *attr,
return sprintf(buf, "Mitigation: __user 

Re: [PATCH v2 3/3] arm64: dts: qcom: sdm845: Add SD nodes for sdm845-mtp

2018-12-06 Thread Doug Anderson
Hi,

On Thu, Dec 6, 2018 at 10:46 AM Evan Green  wrote:
>
> Enable support for one of the micro SD slots on the MTP.
>
> Signed-off-by: Evan Green 
> ---
>
> Changes in v2:
>  - Fixed alphabetization of node placement in sdm845-mtp.dtsi (Doug)
>  - Fixed card detect name to match schematics (Doug).
>  - Moved comment about drive strength next to the drive-strength entry
>  (Doug)
>  - Removed drive-strength from card detect input pin (Doug).
>  - Consolidated tlmm nodes in MTP.
>
>  arch/arm64/boot/dts/qcom/sdm845-mtp.dts | 58 -
>  1 file changed, 56 insertions(+), 2 deletions(-)

Reviewed-by: Douglas Anderson 


[PATCH 5/6] arm64: add sysfs vulnerability show for speculative store bypass

2018-12-06 Thread Jeremy Linton
From: Mian Yousaf Kaukab 

Return status based no ssbd_state and the arm64 SSBS feature.
Return string "Unknown" in case CONFIG_ARM64_SSBD is
disabled or arch workaround2 is not available
in the firmware.

Signed-off-by: Mian Yousaf Kaukab 
[Added SSBS logic]
Signed-off-by: Jeremy Linton 
---
 arch/arm64/kernel/cpu_errata.c | 28 
 1 file changed, 28 insertions(+)

diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c
index 6505c93d507e..8aeb5ca38db8 100644
--- a/arch/arm64/kernel/cpu_errata.c
+++ b/arch/arm64/kernel/cpu_errata.c
@@ -423,6 +423,7 @@ static bool has_ssbd_mitigation(const struct 
arm64_cpu_capabilities *entry,
ssbd_state = ARM64_SSBD_UNKNOWN;
return false;
 
+   /* machines with mixed mitigation requirements must not return this */
case SMCCC_RET_NOT_REQUIRED:
pr_info_once("%s mitigation not required\n", entry->desc);
ssbd_state = ARM64_SSBD_MITIGATED;
@@ -828,4 +829,31 @@ ssize_t cpu_show_spectre_v2(struct device *dev, struct 
device_attribute *attr,
}
 }
 
+ssize_t cpu_show_spec_store_bypass(struct device *dev,
+   struct device_attribute *attr, char *buf)
+{
+   /*
+*  Two assumptions: First, get_ssbd_state() reflects the worse case
+*  for hetrogenous machines, and that if SSBS is supported its
+*  supported by all cores.
+*/
+   switch (arm64_get_ssbd_state()) {
+   case ARM64_SSBD_MITIGATED:
+   return sprintf(buf, "Not affected\n");
+
+   case ARM64_SSBD_KERNEL:
+   case ARM64_SSBD_FORCE_ENABLE:
+   if (cpus_have_cap(ARM64_SSBS))
+   return sprintf(buf, "Not affected\n");
+   return sprintf(buf,
+   "Mitigation: Speculative Store Bypass disabled\n");
+
+   case ARM64_SSBD_FORCE_DISABLE:
+   return sprintf(buf, "Vulnerable\n");
+
+   default: /* ARM64_SSBD_UNKNOWN*/
+   return sprintf(buf, "Unknown\n");
+   }
+}
+
 #endif
-- 
2.17.2



[PATCH 2/6] arm64: add sysfs vulnerability show for meltdown

2018-12-06 Thread Jeremy Linton
Add a simple state machine which will track whether
all the online cores in a machine are vulnerable.

Once that is done we have a fairly authoritative view
of the machine vulnerability, which allows us to make a
judgment about machine safety if it hasn't been mitigated.

Signed-off-by: Jeremy Linton 
---
 arch/arm64/kernel/cpufeature.c | 31 ++-
 1 file changed, 26 insertions(+), 5 deletions(-)

diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c
index 242898395f68..bea9adfef7fa 100644
--- a/arch/arm64/kernel/cpufeature.c
+++ b/arch/arm64/kernel/cpufeature.c
@@ -905,6 +905,8 @@ has_useable_cnp(const struct arm64_cpu_capabilities *entry, 
int scope)
return has_cpuid_feature(entry, scope);
 }
 
+static enum { A64_MELT_UNSET, A64_MELT_SAFE, A64_MELT_UNKN } __meltdown_safe = 
A64_MELT_UNSET;
+
 #ifdef CONFIG_UNMAP_KERNEL_AT_EL0
 static int __kpti_forced; /* 0: not forced, >0: forced on, <0: forced off */
 
@@ -928,6 +930,15 @@ static bool unmap_kernel_at_el0(const struct 
arm64_cpu_capabilities *entry,
 {
char const *str = "command line option";
 
+   bool meltdown_safe = is_cpu_meltdown_safe() ||
+   has_cpuid_feature(entry, scope);
+
+   /* Only safe if all booted cores are known safe */
+   if (meltdown_safe && __meltdown_safe == A64_MELT_UNSET)
+   __meltdown_safe = A64_MELT_SAFE;
+   else if (!meltdown_safe)
+   __meltdown_safe = A64_MELT_UNKN;
+
/*
 * For reasons that aren't entirely clear, enabling KPTI on Cavium
 * ThunderX leads to apparent I-cache corruption of kernel text, which
@@ -949,11 +960,7 @@ static bool unmap_kernel_at_el0(const struct 
arm64_cpu_capabilities *entry,
if (IS_ENABLED(CONFIG_RANDOMIZE_BASE))
return true;
 
-   if (is_cpu_meltdown_safe())
-   return false;
-
-   /* Defer to CPU feature registers */
-   return !has_cpuid_feature(entry, scope);
+   return !meltdown_safe;
 }
 
 static void
@@ -1920,3 +1927,17 @@ static int __init enable_mrs_emulation(void)
 }
 
 core_initcall(enable_mrs_emulation);
+
+#ifdef CONFIG_GENERIC_CPU_VULNERABILITIES
+ssize_t cpu_show_meltdown(struct device *dev, struct device_attribute *attr,
+   char *buf)
+{
+   if (arm64_kernel_unmapped_at_el0())
+   return sprintf(buf, "Mitigation: KPTI\n");
+
+   if (__meltdown_safe == A64_MELT_SAFE)
+   return sprintf(buf, "Not affected\n");
+
+   return sprintf(buf, "Unknown\n");
+}
+#endif
-- 
2.17.2



[PATCH 3/6] arm64: add sysfs vulnerability show for spectre v1

2018-12-06 Thread Jeremy Linton
From: Mian Yousaf Kaukab 

spectre v1, has been mitigated, and the mitigation is
always active.

Signed-off-by: Mian Yousaf Kaukab 
Signed-off-by: Jeremy Linton 
---
 arch/arm64/kernel/cpu_errata.c | 10 ++
 1 file changed, 10 insertions(+)

diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c
index 6ad715d67df8..559ecdee6fd2 100644
--- a/arch/arm64/kernel/cpu_errata.c
+++ b/arch/arm64/kernel/cpu_errata.c
@@ -757,3 +757,13 @@ const struct arm64_cpu_capabilities arm64_errata[] = {
{
}
 };
+
+#ifdef CONFIG_GENERIC_CPU_VULNERABILITIES
+
+ssize_t cpu_show_spectre_v1(struct device *dev, struct device_attribute *attr,
+   char *buf)
+{
+   return sprintf(buf, "Mitigation: __user pointer sanitization\n");
+}
+
+#endif
-- 
2.17.2



[PATCH 4/6] arm64: add sysfs vulnerability show for spectre v2

2018-12-06 Thread Jeremy Linton
Add code to track whether all the cores in the machine are
vulnerable, and whether all the vulnerable cores have been
mitigated.

Once we have that information we can add the sysfs stub and
provide an accurate view of what is known about the machine.

Signed-off-by: Jeremy Linton 
---
 arch/arm64/kernel/cpu_errata.c | 72 +++---
 1 file changed, 67 insertions(+), 5 deletions(-)

diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c
index 559ecdee6fd2..6505c93d507e 100644
--- a/arch/arm64/kernel/cpu_errata.c
+++ b/arch/arm64/kernel/cpu_errata.c
@@ -109,6 +109,11 @@ cpu_enable_trap_ctr_access(const struct 
arm64_cpu_capabilities *__unused)
 
 atomic_t arm64_el2_vector_last_slot = ATOMIC_INIT(-1);
 
+#if defined(CONFIG_HARDEN_BRANCH_PREDICTOR) || 
defined(CONFIG_GENERIC_CPU_VULNERABILITIES)
+/* Track overall mitigation state. We are only mitigated if all cores are ok */
+static enum { A64_HBP_UNSET, A64_HBP_MIT, A64_HBP_NOTMIT } __hardenbp_enab = 
A64_HBP_UNSET;
+#endif
+
 #ifdef CONFIG_HARDEN_BRANCH_PREDICTOR
 #include 
 #include 
@@ -231,15 +236,19 @@ enable_smccc_arch_workaround_1(const struct 
arm64_cpu_capabilities *entry)
if (!entry->matches(entry, SCOPE_LOCAL_CPU))
return;
 
-   if (psci_ops.smccc_version == SMCCC_VERSION_1_0)
+   if (psci_ops.smccc_version == SMCCC_VERSION_1_0) {
+   __hardenbp_enab = A64_HBP_NOTMIT;
return;
+   }
 
switch (psci_ops.conduit) {
case PSCI_CONDUIT_HVC:
arm_smccc_1_1_hvc(ARM_SMCCC_ARCH_FEATURES_FUNC_ID,
  ARM_SMCCC_ARCH_WORKAROUND_1, );
-   if ((int)res.a0 < 0)
+   if ((int)res.a0 < 0) {
+   __hardenbp_enab = A64_HBP_NOTMIT;
return;
+   }
cb = call_hvc_arch_workaround_1;
/* This is a guest, no need to patch KVM vectors */
smccc_start = NULL;
@@ -249,14 +258,17 @@ enable_smccc_arch_workaround_1(const struct 
arm64_cpu_capabilities *entry)
case PSCI_CONDUIT_SMC:
arm_smccc_1_1_smc(ARM_SMCCC_ARCH_FEATURES_FUNC_ID,
  ARM_SMCCC_ARCH_WORKAROUND_1, );
-   if ((int)res.a0 < 0)
+   if ((int)res.a0 < 0) {
+   __hardenbp_enab = A64_HBP_NOTMIT;
return;
+   }
cb = call_smc_arch_workaround_1;
smccc_start = __smccc_workaround_1_smc_start;
smccc_end = __smccc_workaround_1_smc_end;
break;
 
default:
+   __hardenbp_enab = A64_HBP_NOTMIT;
return;
}
 
@@ -266,6 +278,9 @@ enable_smccc_arch_workaround_1(const struct 
arm64_cpu_capabilities *entry)
 
install_bp_hardening_cb(entry, cb, smccc_start, smccc_end);
 
+   if (__hardenbp_enab == A64_HBP_UNSET)
+   __hardenbp_enab = A64_HBP_MIT;
+
return;
 }
 #endif /* CONFIG_HARDEN_BRANCH_PREDICTOR */
@@ -539,7 +554,36 @@ multi_entry_cap_cpu_enable(const struct 
arm64_cpu_capabilities *entry)
caps->cpu_enable(caps);
 }
 
-#ifdef CONFIG_HARDEN_BRANCH_PREDICTOR
+#if defined(CONFIG_HARDEN_BRANCH_PREDICTOR) || \
+   defined(CONFIG_GENERIC_CPU_VULNERABILITIES)
+
+static enum { A64_SV2_UNSET, A64_SV2_SAFE, A64_SV2_UNSAFE } __spectrev2_safe = 
A64_SV2_UNSET;
+
+/*
+ * Track overall bp hardening for all heterogeneous cores in the machine.
+ * We are only considered "safe" if all booted cores are known safe.
+ */
+static bool __maybe_unused
+check_branch_predictor(const struct arm64_cpu_capabilities *entry, int scope)
+{
+   bool is_vul;
+   bool has_csv2;
+   u64 pfr0;
+
+   WARN_ON(scope != SCOPE_LOCAL_CPU || preemptible());
+
+   is_vul = is_midr_in_range_list(read_cpuid_id(), entry->midr_range_list);
+
+   pfr0 = read_cpuid(ID_AA64PFR0_EL1);
+   has_csv2 = cpuid_feature_extract_unsigned_field(pfr0, 
ID_AA64PFR0_CSV2_SHIFT);
+
+   if (is_vul)
+   __spectrev2_safe = A64_SV2_UNSAFE;
+   else if (__spectrev2_safe == A64_SV2_UNSET && has_csv2)
+   __spectrev2_safe = A64_SV2_SAFE;
+
+   return is_vul;
+}
 
 /*
  * List of CPUs where we need to issue a psci call to
@@ -728,7 +772,9 @@ const struct arm64_cpu_capabilities arm64_errata[] = {
{
.capability = ARM64_HARDEN_BRANCH_PREDICTOR,
.cpu_enable = enable_smccc_arch_workaround_1,
-   ERRATA_MIDR_RANGE_LIST(arm64_bp_harden_smccc_cpus),
+   .type = ARM64_CPUCAP_LOCAL_CPU_ERRATUM,
+   .matches = check_branch_predictor,
+   .midr_range_list = arm64_bp_harden_smccc_cpus,
},
 #endif
 #ifdef CONFIG_HARDEN_EL2_VECTORS
@@ -766,4 +812,20 @@ ssize_t cpu_show_spectre_v1(struct device *dev, struct 
device_attribute *attr,
return sprintf(buf, "Mitigation: __user 

Re: [PATCH v2 3/3] arm64: dts: qcom: sdm845: Add SD nodes for sdm845-mtp

2018-12-06 Thread Doug Anderson
Hi,

On Thu, Dec 6, 2018 at 10:46 AM Evan Green  wrote:
>
> Enable support for one of the micro SD slots on the MTP.
>
> Signed-off-by: Evan Green 
> ---
>
> Changes in v2:
>  - Fixed alphabetization of node placement in sdm845-mtp.dtsi (Doug)
>  - Fixed card detect name to match schematics (Doug).
>  - Moved comment about drive strength next to the drive-strength entry
>  (Doug)
>  - Removed drive-strength from card detect input pin (Doug).
>  - Consolidated tlmm nodes in MTP.
>
>  arch/arm64/boot/dts/qcom/sdm845-mtp.dts | 58 -
>  1 file changed, 56 insertions(+), 2 deletions(-)

Reviewed-by: Douglas Anderson 


<    1   2   3   4   5   6   7   8   9   10   >