Re: [PATCH v3] lockdep: Allow tuning tracing capacity constants.

2020-12-04 Thread Tetsuo Handa
I attempted some analysis, but I couldn't find which lock is causing 
nr_list_entries == 32768.
Since "struct lock_list list_entries[MAX_LOCKDEP_ENTRIES]" is marked as 
"static" variable in lockdep.c ,
none of /proc/lockdep_stats /proc/lockdep /proc/lockdep_chains can report which 
lock is responsible
for "BUG: MAX_LOCKDEP_ENTRIES too low!" message?

 lock-classes: 3456 [max: 8192]
 direct dependencies: 32768 [max: 32768]
 indirect dependencies:  200953
 all direct dependencies:   1213844
 dependency chains:   56497 [max: 65536]
 dependency chain hlocks used:   249022 [max: 327680]
 dependency chain hlocks lost:0
 in-hardirq chains:  88
 in-softirq chains:2636
 in-process chains:   53773
 stack-trace entries:303496 [max: 524288]
 number of stack traces:  15714
 number of stack hash chains: 10103
 combined max dependencies:hardirq-safe locks: 57
 hardirq-unsafe locks: 2613
 softirq-safe locks:381
 softirq-unsafe locks: 2202
 irq-safe locks:395
 irq-unsafe locks: 2613
 hardirq-read-safe locks: 4
 hardirq-read-unsafe locks: 233
 softirq-read-safe locks:21
 softirq-read-unsafe locks: 218
 irq-read-safe locks:21
 irq-read-unsafe locks: 233
 uncategorized locks:   491
 unused locks:1
 max locking depth:  20
 max bfs queue depth:   749
 debug_locks: 0

 zapped classes:224
 zapped lock chains:640
 large chain blocks:  1

I was able to calculate "lock-classes:" and "indirect dependencies:" from 
/proc/lockdep , but
I failed to calculate "direct dependencies:" from /proc/lockdep . How can we 
calculate it?

grep ^FD: CrashLog-12016d5550 | wc -l
3456

grep -F -- '->' CrashLog-12016d5550 | wc -l
16004

grep '^ ->' CrashLog-12016d5550 | wc -l
14014

grep '^ ->' CrashLog-12016d5550 | sort -u | wc -l
2506

grep ^FD: CrashLog-12016d5550 | awk ' BEGIN { FD=0; BD=0; } { if ($1 == 
"FD:") { FD=FD+$2; BD=BD+$4; } } END { print FD" "BD; } '
200953 200856

Top FD entries tend to be long held locks (e.g. workqueue) whereas
top BD entries tend to be short held locks (e.g. scheduler and timekeeping).
But given that this result is from a VM with only 2 CPUs, I think that
this result does not match your

  Examples of bad annotations is getting every CPU a separate class, that
  leads to nr_cpus! chains if CPUs arbitrarily nest (nr_cpus^2 if there's
  only a single nesting level).

case.

awk ' { if ($1 == "FD:") print $2" "$6$7 } ' CrashLog-12016d5550 | sort -rV 
| head -n 40
1933 (wq_completion)usb_hub_wq
1932 (work_completion)(>events)
1730 (wq_completion)events
1508 (delayed_fput_work).work
1332 >s_type->i_mutex_key#13
1227 >f_pos_lock
1173 sb_writers#5
1095 (wq_completion)netns
1094 net_cleanup_work
1094 masq_mutex
1092 pernet_ops_rwsem
1046 (work_completion)(>wq)
1046 (work_completion)(>rq)
1045 >mutex/1
1043 tty_mutex
1042 >legacy_mutex
1039 >lock
1035 >legacy_mutex/1
1033 >ldisc_sem
1009 cb_lock
1007 (work_completion)(>work)
1007 nlk_cb_mutex-GENERIC
1006 genl_mutex
985 sb_writers#8
984 sb_writers#13
984 sb_writers#6
979 >mutex
964 (work_completion)(_work->work)
946 (wq_completion)events_power_efficient
943 _nl_types[idx].sem
942 link_ops_rwsem
939 (wq_completion)gid-cache-wq
937 (work_completion)(>work)
937 (work_completion)(_work->work)
936 devices_rwsem
931 kn->active#88
930 nsim_bus_dev_list_lock
926 rdma_nets_rwsem
924 >compat_devs_mutex
922 (wq_completion)ipv6_addrconf

awk ' { if ($3 == "BD:") print $4" "$6$7 } ' CrashLog-12016d5550 | sort 
-urV | head -n 40
1618 pool_lock
1617 _hash[i].lock
1510 >lock
1489 pvclock_gtod_data
1487 tk_core.seq.seqcount
1465 hrtimer_bases.lock
1460 _cpu_ptr(group->pcpu,cpu)->seq
1459 _rq->rt_runtime_lock
1459 >lock/1
1458 _b->rt_runtime_lock
1458 >lock
1457 >delays->lock
1457 >lock/1
1457 >rto_lock
1457 >lock
1457 _rq->removed.lock
1457 _b->lock
1457 per_cpu_ptr(_rstat_cpu_lock,cpu)
1456 >lock
1299 >pi_lock
1266 depot_lock
1114 &s->seqcount
1085 >list_lock
1071 >lock
1039 >wait#13
1039 wq_mayday_lock
1038 >lock
869 quarantine_lock
859 _lock
854 rcu_node_0
827 _state.expedited_wq
812 key#10
808 >deferred_split_queue.split_queue_lock
807 >lru_lock
806 lock#3
804 >refs_lock
798 >private_lock
788 _state.gp_wq
782 _wait_table[i]
778 pgd_lock

grep -F -- '->kn->active#' CrashLog-12016d5550 | wc -l
533
grep -F -- '->kn->active#' CrashLog-12016d5550 | sort -u | wc -l
439



FD:  931 BD:1 .+.+: kn->active#88
 ->fs_reclaim
 ->&s->seqcount
 ->kernfs_open_file_mutex
 ->nsim_bus_dev_list_lock
 

Re: [PATCH v3] lockdep: Allow tuning tracing capacity constants.

2020-12-03 Thread Tetsuo Handa
On Sun, Nov 22, 2020 at 2:56 AM Tetsuo Handa 
 wrote:
> Peter, you guessed that the culprit is sysfs at
> https://lkml.kernel.org/r/20200916115057.go2...@hirez.programming.kicks-ass.net
>  , but
> syzbot reported at 
> https://syzkaller.appspot.com/text?tag=MachineInfo=99b8f2b092d9714f
> that "BUG: MAX_LOCKDEP_ENTRIES too low!" can occur on a VM with only 2 CPUs.
> Is your guess catching the culprit?
>
> We could improve a few locks, but as a whole we won't be able to afford 
> keeping
> sum of individual dependencies under current threshold. Therefore, allow 
> lockdep to
> tune the capacity and allow syzkaller to dump when reaching the capacity will 
> be
> the way to go.

An example dump from a VM with only 2 CPUs
( https://syzkaller.appspot.com/text?tag=CrashLog=12016d5550 ) had
a chain with more than kn->active#number 150 entries.

kn->active#number is initialized by __kernfs_create_file() but it is not
deinitialized (there is no method for erasing lockdep map?) by kernfs_put().
Since syzkaller continues object creation/deletion via fuzz testing until
kernel crashes, __kernfs_create_file() are called for so many times?



struct kernfs_node *__kernfs_create_file(struct kernfs_node *parent,
 const char *name,
 umode_t mode, kuid_t uid, kgid_t gid,
 loff_t size,
 const struct kernfs_ops *ops,
 void *priv, const void *ns,
 struct lock_class_key *key)
{
(...snipped...)
kn = kernfs_new_node(parent, name, (mode & S_IALLUGO) | S_IFREG,
 uid, gid, flags);
if (!kn)
return ERR_PTR(-ENOMEM);

kn->attr.ops = ops;
kn->attr.size = size;
kn->ns = ns;
kn->priv = priv;

#ifdef CONFIG_DEBUG_LOCK_ALLOC
if (key) {
lockdep_init_map(>dep_map, "kn->active", key, 0);
kn->flags |= KERNFS_LOCKDEP;
}
#endif
(...snipped...)
}

void kernfs_put(struct kernfs_node *kn)
{
(...snipped...)
kmem_cache_free(kernfs_node_cache, kn);
(...snipped...)
}



FD:  919 BD:   59 +.+.: rtnl_mutex
 ->fs_reclaim
 ->depot_lock
 ->pcpu_alloc_mutex
 ->>wait#6
 ->_hash[i].lock
 ->gdp_mutex
 ->>list_lock
 ->lock#2
 ->kernfs_mutex
 ->bus_type_sem
 ->sysfs_symlink_target_lock
 ->&s->seqcount
 ->>power.lock
 ->dpm_list_mtx
 ->uevent_sock_mutex
 ->>lock
 ->running_helpers_waitq.lock
 ->subsys mutex#17
 ->dev_hotplug_mutex
 ->dev_base_lock
 ->input_pool.lock
 ->nl_table_lock
 ->nl_table_wait.lock
 ->net_rwsem
 ->>lock
 ->sysctl_lock
 ->>lock
 ->krc.lock
 ->cpu_hotplug_lock
 ->kthread_create_lock
 ->>pi_lock
 ->_rq->removed.lock
 ->>wait
 ->>alloc_lock
 ->_rwsem
 ->wq_pool_mutex
 ->>lock
 ->rcu_node_0
 ->_state.expedited_wq
 ->>list_lock
 ->>lock
 ->pool_lock
 ->lweventlist_lock
 ->rtnl_mutex.wait_lock
 ->proc_subdir_lock
 ->proc_inum_ida.xa_lock
 ->>k_lock
 ->pcpu_lock
 ->param_lock
 ->logbuf_lock
 ->(console_sem).lock
 ->subsys mutex#52
 ->pin_fs_lock
 ->>s_type->i_mutex_key#3
 ->>iflist_mtx
 ->>mtx
 ->subsys mutex#53
 ->>sec_mtx
 ->>iflist_mtx#2
 ->lock#6
 ->net_dm_mutex
 ->failover_lock
 ->>lock
 ->>lock
 ->>lock
 ->>mc_lock
 ->>mca_lock
 ->>lock
 ->j1939_netdev_lock
 ->>lock
 ->>sock_lock
 ->_requests_lock
 ->_pending_beacons_lock
 ->_indoor_lock
 ->_xmit_LOOPBACK
 ->(inetaddr_validator_chain).rwsem
 ->(inetaddr_chain).rwsem
 ->_dev->mc_tomb_lock
 ->>lock
 ->fib_info_lock
 ->cbs_list_lock
 ->taprio_list_lock
 ->(inet6addr_validator_chain).rwsem
 ->addrconf_hash_lock
 ->>lock
 ->>tb6_lock
 ->rlock-AF_NETLINK
 ->_xmit_ETHER
 ->noop_qdisc.q.lock
 ->quarantine_lock
 ->kernfs_mutex.wait_lock
 ->x25_neigh_list_lock
 ->>mmap_lock#2
 ->free_vmap_area_lock
 ->vmap_area_lock
 ->>lock
 ->>lock
 ->>hash_lock
 ->init_mm.page_table_lock
 ->_netdev_addr_lock_key
 ->listen_lock
 ->>consumer_lock
 ->batched_entropy_u32.lock
 ->(switchdev_blocking_notif_chain).rwsem
 ->>hash_lock
 ->nf_hook_mutex
 ->console_owner_lock
 ->console_owner
 ->_priv->tvlv.handler_list_lock
 ->_priv->tvlv.container_list_lock
 ->_priv->softif_vlan_list_lock
 ->key#14
 ->_priv->tt.changes_list_lock
 ->tk_core.seq.seqcount
 ->>mutex
 ->init_lock
 ->deferred_lock
 ->target_list_lock
 ->>lock
 ->_addr_list_lock_key/1
 ->team->team_lock_key
 ->_iface->bat_iv.ogm_buff_mutex
 ->ptype_lock
 ->>list_lock
 ->>exp_wq[1]
 ->>exp_wq[2]
 ->>exp_wq[3]
 ->>xa_lock#7
 ->_addr_list_lock_key#3/1
 ->req_lock
 ->>wait#9
 ->subsys mutex#74
 ->_port->type_lock
 ->pgd_lock
 ->>tx_global_lock
 ->>exp_wq[0]
 ->>q.lock
 ->(>proxy_timer)
 ->class
 ->bpf_devs_lock
 ->devnet_rename_sem
 ->>nft.commit_mutex
 ->>pde_unload_lock
 ->team->team_lock_key#2
 ->>device_update_lock
 ->_xmit_SIT
 ->_netdev_addr_lock_key/1
 ->team->team_lock_key#3
 ->_xmit_NONE
 ->_xmit_TUNNEL
 ->_xmit_IPGRE
 ->mount_lock
 ->team->team_lock_key#4
 ->_xmit_TUNNEL6
 

Re: [PATCH v3] lockdep: Allow tuning tracing capacity constants.

2020-11-27 Thread Dmitry Vyukov
On Sun, Nov 22, 2020 at 2:56 AM Tetsuo Handa
 wrote:
>
> On 2020/11/20 18:27, Dmitry Vyukov wrote:
> > Peter, so far it looks like just a very large, but normal graph to me.
> > The cheapest from an engineering point of view solution would be just
> > to increase the constants. I assume a 2x increase should buy us lots
> > of time to overflow.
> > I can think of more elaborate solutions, e.g. using bitmasks to
> > represent hot leaf and top-level locks. But it will both increase the
> > resulting code complexity (no uniform representation anymore, all code
> > will need to deal with different representations) and require some
> > time investments (that I can't justify for me at least as compared to
> > just throwing a bit more machine memory at it). And in the end it
> > won't really reduce the size of the graph.
> > What do you think?
> >
>
> Yes, I think it is a normal graph; simply syzkaller kernels tend to record
> a few times more dependencies than my idle kernel (shown bottom).
>
> Peter, you guessed that the culprit is sysfs at
> https://lkml.kernel.org/r/20200916115057.go2...@hirez.programming.kicks-ass.net
>  , but
> syzbot reported at 
> https://syzkaller.appspot.com/text?tag=MachineInfo=99b8f2b092d9714f
> that "BUG: MAX_LOCKDEP_ENTRIES too low!" can occur on a VM with only 2 CPUs.
> Is your guess catching the culprit?
>
> We could improve a few locks, but as a whole we won't be able to afford 
> keeping
> sum of individual dependencies under current threshold. Therefore, allow 
> lockdep to
> tune the capacity and allow syzkaller to dump when reaching the capacity will 
> be
> the way to go.
>
>
>
> # cat /proc/lockdep_stats
>  lock-classes: 1236 [max: 8192]
>  direct dependencies:  9610 [max: 32768]
>  indirect dependencies:   40401
>  all direct dependencies:174635
>  dependency chains:   11398 [max: 65536]
>  dependency chain hlocks used:42830 [max: 327680]
>  dependency chain hlocks lost:0
>  in-hardirq chains:  61
>  in-softirq chains: 414
>  in-process chains:   10923
>  stack-trace entries: 93041 [max: 524288]
>  number of stack traces:   4997
>  number of stack hash chains:  4292
>  combined max dependencies:   281074520
>  hardirq-safe locks: 50
>  hardirq-unsafe locks:  805
>  softirq-safe locks:146
>  softirq-unsafe locks:  722
>  irq-safe locks:155
>  irq-unsafe locks:  805
>  hardirq-read-safe locks: 2
>  hardirq-read-unsafe locks: 129
>  softirq-read-safe locks:11
>  softirq-read-unsafe locks: 123
>  irq-read-safe locks:11
>  irq-read-unsafe locks: 129
>  uncategorized locks:   224
>  unused locks:0
>  max locking depth:  15
>  max bfs queue depth:   215
>  chain lookup misses: 11664
>  chain lookup hits:37393935
>  cyclic checks:   11053
>  redundant checks:0
>  redundant links: 0
>  find-mask forwards checks:1588
>  find-mask backwards checks:   1779
>  hardirq on events:17502380
>  hardirq off events:   17502376
>  redundant hardirq ons:   0
>  redundant hardirq offs:  0
>  softirq on events:   90845
>  softirq off events:  90845
>  redundant softirq ons:   0
>  redundant softirq offs:  0
>  debug_locks: 1
>
>  zapped classes:  0
>  zapped lock chains:  0
>  large chain blocks:  1
> # awk ' { if ($2 == "OPS:") print $5" "$9 } ' /proc/lockdep | sort -rV | head 
> -n 30
> 423 (wq_completion)events
> 405 (wq_completion)events_unbound
> 393 >f_pos_lock
> 355 >lock
> 349 sb_writers#3
> 342 sb_writers#6
> 338 >mutex
> 330 (work_completion)(>work)
> 330 pernet_ops_rwsem
> 289 epmutex
> 288 >mtx
> 281 tty_mutex
> 280 >legacy_mutex
> 273 >legacy_mutex/1
> 269 >ldisc_sem
> 268 (wq_completion)ipv6_addrconf
> 266 (work_completion)(&(>dad_work)->work)
> 266 (linkwatch_work).work
> 266 (addr_chk_work).work
> 266 >atomic_read_lock
> 265 (work_completion)(>work)
> 265 rtnl_mutex
> 263 >atomic_write_lock
> 262 >lock
> 261 >termios_rwsem
> 242 >buf.lock/1
> 242 kn->active#40
> 241 _tty->termios_rwsem/1
> 240 registration_lock
> 239 >output_lock
> # awk ' { if ($2 == "OPS:") print $7" "$9 } ' /proc/lockdep | sort -rV | head 
> -n 30
> 642 pool_lock
> 641 _hash[i].lock
> 582 tk_core.seq.seqcount
> 561 hrtimer_bases.lock
> 560 _rq->rt_runtime_lock
> 559 _b->rt_runtime_lock
> 559 >lock
> 559 

Re: [PATCH v3] lockdep: Allow tuning tracing capacity constants.

2020-11-21 Thread Tetsuo Handa
On 2020/11/20 18:27, Dmitry Vyukov wrote:
> Peter, so far it looks like just a very large, but normal graph to me.
> The cheapest from an engineering point of view solution would be just
> to increase the constants. I assume a 2x increase should buy us lots
> of time to overflow.
> I can think of more elaborate solutions, e.g. using bitmasks to
> represent hot leaf and top-level locks. But it will both increase the
> resulting code complexity (no uniform representation anymore, all code
> will need to deal with different representations) and require some
> time investments (that I can't justify for me at least as compared to
> just throwing a bit more machine memory at it). And in the end it
> won't really reduce the size of the graph.
> What do you think?
> 

Yes, I think it is a normal graph; simply syzkaller kernels tend to record
a few times more dependencies than my idle kernel (shown bottom).

Peter, you guessed that the culprit is sysfs at
https://lkml.kernel.org/r/20200916115057.go2...@hirez.programming.kicks-ass.net 
, but
syzbot reported at 
https://syzkaller.appspot.com/text?tag=MachineInfo=99b8f2b092d9714f
that "BUG: MAX_LOCKDEP_ENTRIES too low!" can occur on a VM with only 2 CPUs.
Is your guess catching the culprit?

We could improve a few locks, but as a whole we won't be able to afford keeping
sum of individual dependencies under current threshold. Therefore, allow 
lockdep to
tune the capacity and allow syzkaller to dump when reaching the capacity will be
the way to go.



# cat /proc/lockdep_stats
 lock-classes: 1236 [max: 8192]
 direct dependencies:  9610 [max: 32768]
 indirect dependencies:   40401
 all direct dependencies:174635
 dependency chains:   11398 [max: 65536]
 dependency chain hlocks used:42830 [max: 327680]
 dependency chain hlocks lost:0
 in-hardirq chains:  61
 in-softirq chains: 414
 in-process chains:   10923
 stack-trace entries: 93041 [max: 524288]
 number of stack traces:   4997
 number of stack hash chains:  4292
 combined max dependencies:   281074520
 hardirq-safe locks: 50
 hardirq-unsafe locks:  805
 softirq-safe locks:146
 softirq-unsafe locks:  722
 irq-safe locks:155
 irq-unsafe locks:  805
 hardirq-read-safe locks: 2
 hardirq-read-unsafe locks: 129
 softirq-read-safe locks:11
 softirq-read-unsafe locks: 123
 irq-read-safe locks:11
 irq-read-unsafe locks: 129
 uncategorized locks:   224
 unused locks:0
 max locking depth:  15
 max bfs queue depth:   215
 chain lookup misses: 11664
 chain lookup hits:37393935
 cyclic checks:   11053
 redundant checks:0
 redundant links: 0
 find-mask forwards checks:1588
 find-mask backwards checks:   1779
 hardirq on events:17502380
 hardirq off events:   17502376
 redundant hardirq ons:   0
 redundant hardirq offs:  0
 softirq on events:   90845
 softirq off events:  90845
 redundant softirq ons:   0
 redundant softirq offs:  0
 debug_locks: 1

 zapped classes:  0
 zapped lock chains:  0
 large chain blocks:  1
# awk ' { if ($2 == "OPS:") print $5" "$9 } ' /proc/lockdep | sort -rV | head 
-n 30
423 (wq_completion)events
405 (wq_completion)events_unbound
393 >f_pos_lock
355 >lock
349 sb_writers#3
342 sb_writers#6
338 >mutex
330 (work_completion)(>work)
330 pernet_ops_rwsem
289 epmutex
288 >mtx
281 tty_mutex
280 >legacy_mutex
273 >legacy_mutex/1
269 >ldisc_sem
268 (wq_completion)ipv6_addrconf
266 (work_completion)(&(>dad_work)->work)
266 (linkwatch_work).work
266 (addr_chk_work).work
266 >atomic_read_lock
265 (work_completion)(>work)
265 rtnl_mutex
263 >atomic_write_lock
262 >lock
261 >termios_rwsem
242 >buf.lock/1
242 kn->active#40
241 _tty->termios_rwsem/1
240 registration_lock
239 >output_lock
# awk ' { if ($2 == "OPS:") print $7" "$9 } ' /proc/lockdep | sort -rV | head 
-n 30
642 pool_lock
641 _hash[i].lock
582 tk_core.seq.seqcount
561 hrtimer_bases.lock
560 _rq->rt_runtime_lock
559 _b->rt_runtime_lock
559 >lock
559 _rq->removed.lock
559 _b->lock
558 >lock
550 >delays->lock
549 >pi_lock
506 >lock
504 >list_lock
432 &s->seqcount
404 >wait#10
401 >lock
369 >lock
330 rcu_node_0
326 &(kretprobe_table_locks[i].lock)
326 pgd_lock
324 >lru_lock
323 lock#5
321 _wait_table[i]
319 ptlock_ptr(page)#2/1
318 ptlock_ptr(page)#2

Re: [PATCH v3] lockdep: Allow tuning tracing capacity constants.

2020-11-20 Thread Dmitry Vyukov
On Fri, Nov 20, 2020 at 10:22 AM Dmitry Vyukov  wrote:
>
> On Thu, Nov 19, 2020 at 7:08 PM Dmitry Vyukov  wrote:
> > > > > On Thu, Nov 19, 2020 at 2:45 PM Tetsuo Handa
> > > > >  wrote:
> > > > > >
> > > > > > On 2020/11/19 22:06, Dmitry Vyukov wrote:
> > > > > > 
> > > > > >  I am trying to reproduce this locally first. syzbot caims it 
> > > > > >  can
> > > > > >  reproduce it with a number of very simpler reproducers (like 
> > > > > >  spawn
> > > > > >  process, unshare, create socket):
> > > > > >  https://syzkaller.appspot.com/bug?id=8a18efe79140782a88dcd098808d6ab20ed740cc
> > > > > > 
> > > > > >  I see a very slow drift, but it's very slow, so get only to:
> > > > > >   direct dependencies: 22072 [max: 32768]
> > > > > > 
> > > > > >  But that's running a very uniform workload.
> > > > > > 
> > > > > >  However when I tried to cat /proc/lockdep to see if there is 
> > > > > >  anything
> > > > > >  fishy already,
> > > > > >  I got this (on c2e7554e1b85935d962127efa3c2a76483b0b3b6).
> > > > > > 
> > > > > >  Some missing locks?
> > > > > >
> > > > > > Not a TOMOYO's bug. Maybe a lockdep's bug.
> > > > > >
> > > > > > >
> > > > > > > But I don't know if it's enough to explain the overflow or not...
> > > > > > >
> > > > > >
> > > > > > Since you can't hit the limit locally, I guess we need to ask 
> > > > > > syzbot to
> > > > > > run massive testcases.
> > > > >
> > > > > I am trying to test the code that will do this. Otherwise we will get
> > > > > days-long round-trips for stupid bugs. These files are also quite
> > > > > huge, I afraid that may not fit into storage.
> > > > >
> > > > > So far I get to at most:
> > > > >
> > > > >  lock-classes: 2901 [max: 8192]
> > > > >  direct dependencies: 25574 [max: 32768]
> > > > >  dependency chains:   40605 [max: 65536]
> > > > >  dependency chain hlocks used:   176814 [max: 327680]
> > > > >  stack-trace entries:258590 [max: 524288]
> > > > >
> > > > > with these worst offenders:
> > > > >
> > > > > # egrep "BD: [0-9]" /proc/lockdep
> > > > > df5b6792 FD:2 BD: 1235 -.-.: _hash[i].lock
> > > > > 5dfeb73c FD:1 BD: 1236 ..-.: pool_lock
> > > > > b86254b1 FD:   14 BD:  -.-.: >lock
> > > > > 866efb75 FD:1 BD: 1112 : _b->lock
> > > > > 6970cf1a FD:2 BD: 1126 : tk_core.seq.seqcount
> > > > > f49d95b0 FD:3 BD: 1180 -.-.: >lock
> > > > > ba3f8454 FD:5 BD: 1115 -.-.: hrtimer_bases.lock
> > > > > fb340f16 FD:   16 BD: 1030 -.-.: >pi_lock
> > > > > c9f6f58c FD:1 BD: 1114 -.-.: _cpu_ptr(group->pcpu, 
> > > > > cpu)->seq
> > > > > 49d3998c FD:1 BD: 1112 -.-.: _rq->removed.lock
> > > > > fdf7f396 FD:7 BD: 1112 -...: _b->rt_runtime_lock
> > > > > 21aedb8d FD:1 BD: 1113 -...: _rq->rt_runtime_lock
> > > > > 4e34c8d4 FD:1 BD: 1112 : >lock
> > > > > b2ac5d96 FD:1 BD: 1127 -.-.: pvclock_gtod_data
> > > > > c5df4dc3 FD:1 BD: 1031 ..-.: >delays->lock
> > > > > fe623698 FD:1 BD: 1112 -...:
> > > > > per_cpu_ptr(_rstat_cpu_lock, cpu)
> > > > >
> > > > >
> > > > > But the kernel continues to crash on different unrelated bugs...
> > > >
> > > >
> > > > Here is one successful sample. How do we debug it? What should we be
> > > > looking for?
> > > >
> > > > p.s. it's indeed huge, full log was 11MB, this probably won't be
> > > > chewed by syzbot.
> > > > Peter, are these [hex numbers] needed? Could we strip them during
> > > > post-processing? At first sight they look like derivatives of the
> > > > name.
> > >
> > > The worst back-edge offenders are:
> > >
> > > b445a595 FD:2 BD: 1595 -.-.: _hash[i].lock
> > > 55ae0468 FD:1 BD: 1596 ..-.: pool_lock
> > > b1336dc4 FD:2 BD: 1002 ..-.: >lock
> > > 9a0cabce FD:1 BD: 1042 ...-: &s->seqcount
> > > 1f2849b5 FD:1 BD: 1192 ..-.: depot_lock
> > > d044255b FD:1 BD: 1038 -.-.: >list_lock
> > > 5868699e FD:   17 BD: 1447 -.-.: >lock
> > > bb52ab59 FD:1 BD: 1448 : _b->lock
> > > 4f442fff FD:2 BD: 1469 : tk_core.seq.seqcount
> > > c908cc32 FD:3 BD: 1512 -.-.: >lock
> > > 478677cc FD:5 BD: 1435 -.-.: hrtimer_bases.lock
> > > b5b65cb1 FD:   19 BD: 1255 -.-.: >pi_lock
> > > 7f313bd5 FD:1 BD: 1451 -.-.: _cpu_ptr(group->pcpu, 
> > > cpu)->seq
> > > bac5d8ed FD:1 BD: 1004 ...-: &s->seqcount#2
> > > 0f57e411 FD:1 BD: 1448 -.-.: _rq->removed.lock
> > > 13c1ab65 FD:7 BD: 1449 -.-.: _b->rt_runtime_lock
> > > 3bdf78f4 FD:1 BD: 1450 -.-.: _rq->rt_runtime_lock
> > > 975d5b80 FD:1 BD: 1448 : >lock
> > > 2586e81b FD:1 BD: 1471 -.-.: pvclock_gtod_data
> > > 

Re: [PATCH v3] lockdep: Allow tuning tracing capacity constants.

2020-11-19 Thread Tetsuo Handa
On 2020/11/19 23:30, Dmitry Vyukov wrote:
> p.s. it's indeed huge, full log was 11MB, this probably won't be
> chewed by syzbot.

Is "cat /proc/lockdep*" output written from userspace?
Then, we could try "xz -9" + "base64" for recording.

> Peter, are these [hex numbers] needed? Could we strip them during
> post-processing? At first sight they look like derivatives of the
> name.

kernel/locking/lockdep.c uses %px (raw address) whereas
kernel/locking/lockdep_proc.c uses %p (__ptr_to_hashval() value).
I think saving these [hashed hex numbers] is unlikely useful.
At least for this testing, we can strip leading  part.


Re: [PATCH v3] lockdep: Allow tuning tracing capacity constants.

2020-11-19 Thread Dmitry Vyukov
On Thu, Nov 19, 2020 at 3:30 PM Dmitry Vyukov  wrote:
> >
> > On Thu, Nov 19, 2020 at 2:45 PM Tetsuo Handa
> >  wrote:
> > >
> > > On 2020/11/19 22:06, Dmitry Vyukov wrote:
> > > 
> > >  I am trying to reproduce this locally first. syzbot caims it can
> > >  reproduce it with a number of very simpler reproducers (like spawn
> > >  process, unshare, create socket):
> > >  https://syzkaller.appspot.com/bug?id=8a18efe79140782a88dcd098808d6ab20ed740cc
> > > 
> > >  I see a very slow drift, but it's very slow, so get only to:
> > >   direct dependencies: 22072 [max: 32768]
> > > 
> > >  But that's running a very uniform workload.
> > > 
> > >  However when I tried to cat /proc/lockdep to see if there is anything
> > >  fishy already,
> > >  I got this (on c2e7554e1b85935d962127efa3c2a76483b0b3b6).
> > > 
> > >  Some missing locks?
> > >
> > > Not a TOMOYO's bug. Maybe a lockdep's bug.
> > >
> > > >
> > > > But I don't know if it's enough to explain the overflow or not...
> > > >
> > >
> > > Since you can't hit the limit locally, I guess we need to ask syzbot to
> > > run massive testcases.
> >
> > I am trying to test the code that will do this. Otherwise we will get
> > days-long round-trips for stupid bugs. These files are also quite
> > huge, I afraid that may not fit into storage.
> >
> > So far I get to at most:
> >
> >  lock-classes: 2901 [max: 8192]
> >  direct dependencies: 25574 [max: 32768]
> >  dependency chains:   40605 [max: 65536]
> >  dependency chain hlocks used:   176814 [max: 327680]
> >  stack-trace entries:258590 [max: 524288]
> >
> > with these worst offenders:
> >
> > # egrep "BD: [0-9]" /proc/lockdep
> > df5b6792 FD:2 BD: 1235 -.-.: _hash[i].lock
> > 5dfeb73c FD:1 BD: 1236 ..-.: pool_lock
> > b86254b1 FD:   14 BD:  -.-.: >lock
> > 866efb75 FD:1 BD: 1112 : _b->lock
> > 6970cf1a FD:2 BD: 1126 : tk_core.seq.seqcount
> > f49d95b0 FD:3 BD: 1180 -.-.: >lock
> > ba3f8454 FD:5 BD: 1115 -.-.: hrtimer_bases.lock
> > fb340f16 FD:   16 BD: 1030 -.-.: >pi_lock
> > c9f6f58c FD:1 BD: 1114 -.-.: _cpu_ptr(group->pcpu, cpu)->seq
> > 49d3998c FD:1 BD: 1112 -.-.: _rq->removed.lock
> > fdf7f396 FD:7 BD: 1112 -...: _b->rt_runtime_lock
> > 21aedb8d FD:1 BD: 1113 -...: _rq->rt_runtime_lock
> > 4e34c8d4 FD:1 BD: 1112 : >lock
> > b2ac5d96 FD:1 BD: 1127 -.-.: pvclock_gtod_data
> > c5df4dc3 FD:1 BD: 1031 ..-.: >delays->lock
> > fe623698 FD:1 BD: 1112 -...:
> > per_cpu_ptr(_rstat_cpu_lock, cpu)
> >
> >
> > But the kernel continues to crash on different unrelated bugs...
>
>
> Here is one successful sample. How do we debug it? What should we be
> looking for?
>
> p.s. it's indeed huge, full log was 11MB, this probably won't be
> chewed by syzbot.
> Peter, are these [hex numbers] needed? Could we strip them during
> post-processing? At first sight they look like derivatives of the
> name.

The worst back-edge offenders are:

b445a595 FD:2 BD: 1595 -.-.: _hash[i].lock
55ae0468 FD:1 BD: 1596 ..-.: pool_lock
b1336dc4 FD:2 BD: 1002 ..-.: >lock
9a0cabce FD:1 BD: 1042 ...-: &s->seqcount
1f2849b5 FD:1 BD: 1192 ..-.: depot_lock
d044255b FD:1 BD: 1038 -.-.: >list_lock
5868699e FD:   17 BD: 1447 -.-.: >lock
bb52ab59 FD:1 BD: 1448 : _b->lock
4f442fff FD:2 BD: 1469 : tk_core.seq.seqcount
c908cc32 FD:3 BD: 1512 -.-.: >lock
478677cc FD:5 BD: 1435 -.-.: hrtimer_bases.lock
b5b65cb1 FD:   19 BD: 1255 -.-.: >pi_lock
7f313bd5 FD:1 BD: 1451 -.-.: _cpu_ptr(group->pcpu, cpu)->seq
bac5d8ed FD:1 BD: 1004 ...-: &s->seqcount#2
0f57e411 FD:1 BD: 1448 -.-.: _rq->removed.lock
13c1ab65 FD:7 BD: 1449 -.-.: _b->rt_runtime_lock
3bdf78f4 FD:1 BD: 1450 -.-.: _rq->rt_runtime_lock
975d5b80 FD:1 BD: 1448 : >lock
2586e81b FD:1 BD: 1471 -.-.: pvclock_gtod_data
d03aed24 FD:1 BD: 1275 ..-.: >delays->lock
1119414f FD:1 BD: 1448 -...:
per_cpu_ptr(_rstat_cpu_lock, cpu)
6f3d793b FD:6 BD: 1449 -.-.: >lock
f3f0190c FD:9 BD: 1448 -...: >lock/1
7410cf1a FD:1 BD: 1448 -...: >rto_lock

There are 19 with ~1500 incoming edges. So that's 20K.

In my local testing I was at around 20-something K and these worst
offenders were at ~1000 back edges.
Now they got to 1500, so that is what got us over the 32K limit, right?

Does this analysis make sense?

Any ideas what to do with these?


Re: [PATCH v3] lockdep: Allow tuning tracing capacity constants.

2020-11-19 Thread Dmitry Vyukov
On Thu, Nov 19, 2020 at 2:45 PM Tetsuo Handa
 wrote:
>
> On 2020/11/19 22:06, Dmitry Vyukov wrote:
> 
>  I am trying to reproduce this locally first. syzbot caims it can
>  reproduce it with a number of very simpler reproducers (like spawn
>  process, unshare, create socket):
>  https://syzkaller.appspot.com/bug?id=8a18efe79140782a88dcd098808d6ab20ed740cc
> 
>  I see a very slow drift, but it's very slow, so get only to:
>   direct dependencies: 22072 [max: 32768]
> 
>  But that's running a very uniform workload.
> 
>  However when I tried to cat /proc/lockdep to see if there is anything
>  fishy already,
>  I got this (on c2e7554e1b85935d962127efa3c2a76483b0b3b6).
> 
>  Some missing locks?
>
> Not a TOMOYO's bug. Maybe a lockdep's bug.
>
> >
> > But I don't know if it's enough to explain the overflow or not...
> >
>
> Since you can't hit the limit locally, I guess we need to ask syzbot to
> run massive testcases.

I am trying to test the code that will do this. Otherwise we will get
days-long round-trips for stupid bugs. These files are also quite
huge, I afraid that may not fit into storage.

So far I get to at most:

 lock-classes: 2901 [max: 8192]
 direct dependencies: 25574 [max: 32768]
 dependency chains:   40605 [max: 65536]
 dependency chain hlocks used:   176814 [max: 327680]
 stack-trace entries:258590 [max: 524288]

with these worst offenders:

# egrep "BD: [0-9]" /proc/lockdep
df5b6792 FD:2 BD: 1235 -.-.: _hash[i].lock
5dfeb73c FD:1 BD: 1236 ..-.: pool_lock
b86254b1 FD:   14 BD:  -.-.: >lock
866efb75 FD:1 BD: 1112 : _b->lock
6970cf1a FD:2 BD: 1126 : tk_core.seq.seqcount
f49d95b0 FD:3 BD: 1180 -.-.: >lock
ba3f8454 FD:5 BD: 1115 -.-.: hrtimer_bases.lock
fb340f16 FD:   16 BD: 1030 -.-.: >pi_lock
c9f6f58c FD:1 BD: 1114 -.-.: _cpu_ptr(group->pcpu, cpu)->seq
49d3998c FD:1 BD: 1112 -.-.: _rq->removed.lock
fdf7f396 FD:7 BD: 1112 -...: _b->rt_runtime_lock
21aedb8d FD:1 BD: 1113 -...: _rq->rt_runtime_lock
4e34c8d4 FD:1 BD: 1112 : >lock
b2ac5d96 FD:1 BD: 1127 -.-.: pvclock_gtod_data
c5df4dc3 FD:1 BD: 1031 ..-.: >delays->lock
fe623698 FD:1 BD: 1112 -...:
per_cpu_ptr(_rstat_cpu_lock, cpu)


But the kernel continues to crash on different unrelated bugs...


Re: [PATCH v3] lockdep: Allow tuning tracing capacity constants.

2020-11-19 Thread Tetsuo Handa
On 2020/11/19 22:06, Dmitry Vyukov wrote:

 I am trying to reproduce this locally first. syzbot caims it can
 reproduce it with a number of very simpler reproducers (like spawn
 process, unshare, create socket):
 https://syzkaller.appspot.com/bug?id=8a18efe79140782a88dcd098808d6ab20ed740cc

 I see a very slow drift, but it's very slow, so get only to:
  direct dependencies: 22072 [max: 32768]

 But that's running a very uniform workload.

 However when I tried to cat /proc/lockdep to see if there is anything
 fishy already,
 I got this (on c2e7554e1b85935d962127efa3c2a76483b0b3b6).

 Some missing locks?

Not a TOMOYO's bug. Maybe a lockdep's bug.

> 
> But I don't know if it's enough to explain the overflow or not...
> 

Since you can't hit the limit locally, I guess we need to ask syzbot to
run massive testcases.



Re: [PATCH v3] lockdep: Allow tuning tracing capacity constants.

2020-11-19 Thread Dmitry Vyukov
On Thu, Nov 19, 2020 at 1:49 PM Dmitry Vyukov  wrote:
>
> On Thu, Nov 19, 2020 at 1:43 PM Dmitry Vyukov  wrote:
> > > On Wed, Nov 18, 2020 at 4:32 PM Tetsuo Handa
> > >  wrote:
> > > >
> > > > On 2020/11/19 0:10, Peter Zijlstra wrote:
> > > > > On Wed, Nov 18, 2020 at 11:30:05PM +0900, Tetsuo Handa wrote:
> > > > >> The problem is that we can't know what exactly is consuming these 
> > > > >> resources.
> > > > >> My question is do you have a plan to make it possible to know what 
> > > > >> exactly is
> > > > >> consuming these resources.
> > > > >
> > > > > I'm pretty sure it's in /proc/lockdep* somewhere.
> > > >
> > > > OK. Then...
> > > >
> > > > Dmitry, can you update syzkaller to dump /proc/lockdep* before 
> > > > terminating as
> > > > a crash as soon as encountering one of
> > > >
> > > >   BUG: MAX_LOCKDEP_ENTRIES too low!
> > > >   BUG: MAX_LOCKDEP_CHAIN_HLOCKS too low!
> > > >   BUG: MAX_LOCKDEP_CHAINS too low!
> > > >   BUG: MAX_LOCKDEP_KEYS too low!
> > > >   WARNING in print_bfs_bug
> > > >
> > > > messages?
> > > >
> > > > On 2020/09/16 21:14, Dmitry Vyukov wrote:
> > > > > On Wed, Sep 16, 2020 at 1:51 PM  wrote:
> > > > >>
> > > > >> On Wed, Sep 16, 2020 at 01:28:19PM +0200, Dmitry Vyukov wrote:
> > > > >>> On Fri, Sep 4, 2020 at 6:05 PM Tetsuo Handa
> > > > >>>  wrote:
> > > > 
> > > >  Hello. Can we apply this patch?
> > > > 
> > > >  This patch addresses top crashers for syzbot, and applying this 
> > > >  patch
> > > >  will help utilizing syzbot's resource for finding other bugs.
> > > > >>>
> > > > >>> Acked-by: Dmitry Vyukov 
> > > > >>>
> > > > >>> Peter, do you still have concerns with this?
> > > > >>
> > > > >> Yeah, I still hate it with a passion; it discourages thinking. A bad
> > > > >> annotation that blows up the lockdep storage, no worries, we'll just
> > > > >> increase this :/
> > > > >>
> > > > >> IIRC the issue with syzbot is that the current sysfs annotation is
> > > > >> pretty terrible and generates a gazillion classes, and syzbot likes
> > > > >> poking at /sys a lot and thus floods the system.
> > > > >>
> > > > >> I don't know enough about sysfs to suggest an alternative, and 
> > > > >> haven't
> > > > >> exactly had spare time to look into it either :/
> > > > >>
> > > > >> Examples of bad annotations is getting every CPU a separate class, 
> > > > >> that
> > > > >> leads to nr_cpus! chains if CPUs arbitrarily nest (nr_cpus^2 if 
> > > > >> there's
> > > > >> only a single nesting level).
> > > > >
> > > > > Maybe on "BUG: MAX_LOCKDEP_CHAINS too low!" we should then aggregate,
> > > > > sort and show existing chains so that it's possible to identify if
> > > > > there are any worst offenders and who they are.
> > > > >
> > > > > Currently we only have a hypothesis that there are some worst
> > > > > offenders vs lots of normal load. And we can't point fingers which
> > > > > means that, say, sysfs, or other maintainers won't be too inclined to
> > > > > fix anything.
> > > > >
> > > > > If we would know for sure that lock class X is guilty. That would make
> > > > > the situation much more actionable.
> > >
> > > I am trying to reproduce this locally first. syzbot caims it can
> > > reproduce it with a number of very simpler reproducers (like spawn
> > > process, unshare, create socket):
> > > https://syzkaller.appspot.com/bug?id=8a18efe79140782a88dcd098808d6ab20ed740cc
> > >
> > > I see a very slow drift, but it's very slow, so get only to:
> > >  direct dependencies: 22072 [max: 32768]
> > >
> > > But that's running a very uniform workload.
> > >
> > > However when I tried to cat /proc/lockdep to see if there is anything
> > > fishy already,
> > > I got this (on c2e7554e1b85935d962127efa3c2a76483b0b3b6).
> > >
> > > Some missing locks?
> > >
> > > ==
> > > BUG: KASAN: use-after-free in string_nocheck lib/vsprintf.c:611 [inline]
> > > BUG: KASAN: use-after-free in string+0x39c/0x3d0 lib/vsprintf.c:693
> > > Read of size 1 at addr 888295833740 by task less/1855
> > >
> > > CPU: 0 PID: 1855 Comm: less Tainted: GW 5.10.0-rc4+ #68
> > > Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS
> > > rel-1.13.0-44-g88ab0c15525c-prebuilt.qemu.org 04/01/2014
> > > Call Trace:
> > >  __dump_stack lib/dump_stack.c:77 [inline]
> > >  dump_stack+0x107/0x163 lib/dump_stack.c:118
> > >  print_address_description.constprop.0.cold+0xae/0x4c8 
> > > mm/kasan/report.c:385
> > >  __kasan_report mm/kasan/report.c:545 [inline]
> > >  kasan_report.cold+0x1f/0x37 mm/kasan/report.c:562
> > >  string_nocheck lib/vsprintf.c:611 [inline]
> > >  string+0x39c/0x3d0 lib/vsprintf.c:693
> > >  vsnprintf+0x71b/0x14f0 lib/vsprintf.c:2618
> > >  seq_vprintf fs/seq_file.c:398 [inline]
> > >  seq_printf+0x195/0x240 fs/seq_file.c:413
> > >  print_name+0x98/0x1d0 kernel/locking/lockdep_proc.c:50
> > >  l_show+0x111/0x2c0 kernel/locking/lockdep_proc.c:82
> > >  

Re: [PATCH v3] lockdep: Allow tuning tracing capacity constants.

2020-11-19 Thread Dmitry Vyukov
On Thu, Nov 19, 2020 at 1:43 PM Dmitry Vyukov  wrote:
> > On Wed, Nov 18, 2020 at 4:32 PM Tetsuo Handa
> >  wrote:
> > >
> > > On 2020/11/19 0:10, Peter Zijlstra wrote:
> > > > On Wed, Nov 18, 2020 at 11:30:05PM +0900, Tetsuo Handa wrote:
> > > >> The problem is that we can't know what exactly is consuming these 
> > > >> resources.
> > > >> My question is do you have a plan to make it possible to know what 
> > > >> exactly is
> > > >> consuming these resources.
> > > >
> > > > I'm pretty sure it's in /proc/lockdep* somewhere.
> > >
> > > OK. Then...
> > >
> > > Dmitry, can you update syzkaller to dump /proc/lockdep* before 
> > > terminating as
> > > a crash as soon as encountering one of
> > >
> > >   BUG: MAX_LOCKDEP_ENTRIES too low!
> > >   BUG: MAX_LOCKDEP_CHAIN_HLOCKS too low!
> > >   BUG: MAX_LOCKDEP_CHAINS too low!
> > >   BUG: MAX_LOCKDEP_KEYS too low!
> > >   WARNING in print_bfs_bug
> > >
> > > messages?
> > >
> > > On 2020/09/16 21:14, Dmitry Vyukov wrote:
> > > > On Wed, Sep 16, 2020 at 1:51 PM  wrote:
> > > >>
> > > >> On Wed, Sep 16, 2020 at 01:28:19PM +0200, Dmitry Vyukov wrote:
> > > >>> On Fri, Sep 4, 2020 at 6:05 PM Tetsuo Handa
> > > >>>  wrote:
> > > 
> > >  Hello. Can we apply this patch?
> > > 
> > >  This patch addresses top crashers for syzbot, and applying this patch
> > >  will help utilizing syzbot's resource for finding other bugs.
> > > >>>
> > > >>> Acked-by: Dmitry Vyukov 
> > > >>>
> > > >>> Peter, do you still have concerns with this?
> > > >>
> > > >> Yeah, I still hate it with a passion; it discourages thinking. A bad
> > > >> annotation that blows up the lockdep storage, no worries, we'll just
> > > >> increase this :/
> > > >>
> > > >> IIRC the issue with syzbot is that the current sysfs annotation is
> > > >> pretty terrible and generates a gazillion classes, and syzbot likes
> > > >> poking at /sys a lot and thus floods the system.
> > > >>
> > > >> I don't know enough about sysfs to suggest an alternative, and haven't
> > > >> exactly had spare time to look into it either :/
> > > >>
> > > >> Examples of bad annotations is getting every CPU a separate class, that
> > > >> leads to nr_cpus! chains if CPUs arbitrarily nest (nr_cpus^2 if there's
> > > >> only a single nesting level).
> > > >
> > > > Maybe on "BUG: MAX_LOCKDEP_CHAINS too low!" we should then aggregate,
> > > > sort and show existing chains so that it's possible to identify if
> > > > there are any worst offenders and who they are.
> > > >
> > > > Currently we only have a hypothesis that there are some worst
> > > > offenders vs lots of normal load. And we can't point fingers which
> > > > means that, say, sysfs, or other maintainers won't be too inclined to
> > > > fix anything.
> > > >
> > > > If we would know for sure that lock class X is guilty. That would make
> > > > the situation much more actionable.
> >
> > I am trying to reproduce this locally first. syzbot caims it can
> > reproduce it with a number of very simpler reproducers (like spawn
> > process, unshare, create socket):
> > https://syzkaller.appspot.com/bug?id=8a18efe79140782a88dcd098808d6ab20ed740cc
> >
> > I see a very slow drift, but it's very slow, so get only to:
> >  direct dependencies: 22072 [max: 32768]
> >
> > But that's running a very uniform workload.
> >
> > However when I tried to cat /proc/lockdep to see if there is anything
> > fishy already,
> > I got this (on c2e7554e1b85935d962127efa3c2a76483b0b3b6).
> >
> > Some missing locks?
> >
> > ==
> > BUG: KASAN: use-after-free in string_nocheck lib/vsprintf.c:611 [inline]
> > BUG: KASAN: use-after-free in string+0x39c/0x3d0 lib/vsprintf.c:693
> > Read of size 1 at addr 888295833740 by task less/1855
> >
> > CPU: 0 PID: 1855 Comm: less Tainted: GW 5.10.0-rc4+ #68
> > Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS
> > rel-1.13.0-44-g88ab0c15525c-prebuilt.qemu.org 04/01/2014
> > Call Trace:
> >  __dump_stack lib/dump_stack.c:77 [inline]
> >  dump_stack+0x107/0x163 lib/dump_stack.c:118
> >  print_address_description.constprop.0.cold+0xae/0x4c8 mm/kasan/report.c:385
> >  __kasan_report mm/kasan/report.c:545 [inline]
> >  kasan_report.cold+0x1f/0x37 mm/kasan/report.c:562
> >  string_nocheck lib/vsprintf.c:611 [inline]
> >  string+0x39c/0x3d0 lib/vsprintf.c:693
> >  vsnprintf+0x71b/0x14f0 lib/vsprintf.c:2618
> >  seq_vprintf fs/seq_file.c:398 [inline]
> >  seq_printf+0x195/0x240 fs/seq_file.c:413
> >  print_name+0x98/0x1d0 kernel/locking/lockdep_proc.c:50
> >  l_show+0x111/0x2c0 kernel/locking/lockdep_proc.c:82
> >  seq_read_iter+0xae4/0x10c0 fs/seq_file.c:268
> >  proc_reg_read_iter+0x1fb/0x2d0 fs/proc/inode.c:310
> >  call_read_iter include/linux/fs.h:1897 [inline]
> >  new_sync_read+0x41e/0x6e0 fs/read_write.c:415
> >  vfs_read+0x35c/0x570 fs/read_write.c:496
> >  ksys_read+0x12d/0x250 fs/read_write.c:634
> >  do_syscall_64+0x2d/0x70 

Re: [PATCH v3] lockdep: Allow tuning tracing capacity constants.

2020-11-19 Thread Dmitry Vyukov
On Thu, Nov 19, 2020 at 1:33 PM Dmitry Vyukov  wrote:
>
> On Wed, Nov 18, 2020 at 4:32 PM Tetsuo Handa
>  wrote:
> >
> > On 2020/11/19 0:10, Peter Zijlstra wrote:
> > > On Wed, Nov 18, 2020 at 11:30:05PM +0900, Tetsuo Handa wrote:
> > >> The problem is that we can't know what exactly is consuming these 
> > >> resources.
> > >> My question is do you have a plan to make it possible to know what 
> > >> exactly is
> > >> consuming these resources.
> > >
> > > I'm pretty sure it's in /proc/lockdep* somewhere.
> >
> > OK. Then...
> >
> > Dmitry, can you update syzkaller to dump /proc/lockdep* before terminating 
> > as
> > a crash as soon as encountering one of
> >
> >   BUG: MAX_LOCKDEP_ENTRIES too low!
> >   BUG: MAX_LOCKDEP_CHAIN_HLOCKS too low!
> >   BUG: MAX_LOCKDEP_CHAINS too low!
> >   BUG: MAX_LOCKDEP_KEYS too low!
> >   WARNING in print_bfs_bug
> >
> > messages?
> >
> > On 2020/09/16 21:14, Dmitry Vyukov wrote:
> > > On Wed, Sep 16, 2020 at 1:51 PM  wrote:
> > >>
> > >> On Wed, Sep 16, 2020 at 01:28:19PM +0200, Dmitry Vyukov wrote:
> > >>> On Fri, Sep 4, 2020 at 6:05 PM Tetsuo Handa
> > >>>  wrote:
> > 
> >  Hello. Can we apply this patch?
> > 
> >  This patch addresses top crashers for syzbot, and applying this patch
> >  will help utilizing syzbot's resource for finding other bugs.
> > >>>
> > >>> Acked-by: Dmitry Vyukov 
> > >>>
> > >>> Peter, do you still have concerns with this?
> > >>
> > >> Yeah, I still hate it with a passion; it discourages thinking. A bad
> > >> annotation that blows up the lockdep storage, no worries, we'll just
> > >> increase this :/
> > >>
> > >> IIRC the issue with syzbot is that the current sysfs annotation is
> > >> pretty terrible and generates a gazillion classes, and syzbot likes
> > >> poking at /sys a lot and thus floods the system.
> > >>
> > >> I don't know enough about sysfs to suggest an alternative, and haven't
> > >> exactly had spare time to look into it either :/
> > >>
> > >> Examples of bad annotations is getting every CPU a separate class, that
> > >> leads to nr_cpus! chains if CPUs arbitrarily nest (nr_cpus^2 if there's
> > >> only a single nesting level).
> > >
> > > Maybe on "BUG: MAX_LOCKDEP_CHAINS too low!" we should then aggregate,
> > > sort and show existing chains so that it's possible to identify if
> > > there are any worst offenders and who they are.
> > >
> > > Currently we only have a hypothesis that there are some worst
> > > offenders vs lots of normal load. And we can't point fingers which
> > > means that, say, sysfs, or other maintainers won't be too inclined to
> > > fix anything.
> > >
> > > If we would know for sure that lock class X is guilty. That would make
> > > the situation much more actionable.
>
> I am trying to reproduce this locally first. syzbot caims it can
> reproduce it with a number of very simpler reproducers (like spawn
> process, unshare, create socket):
> https://syzkaller.appspot.com/bug?id=8a18efe79140782a88dcd098808d6ab20ed740cc
>
> I see a very slow drift, but it's very slow, so get only to:
>  direct dependencies: 22072 [max: 32768]
>
> But that's running a very uniform workload.
>
> However when I tried to cat /proc/lockdep to see if there is anything
> fishy already,
> I got this (on c2e7554e1b85935d962127efa3c2a76483b0b3b6).
>
> Some missing locks?
>
> ==
> BUG: KASAN: use-after-free in string_nocheck lib/vsprintf.c:611 [inline]
> BUG: KASAN: use-after-free in string+0x39c/0x3d0 lib/vsprintf.c:693
> Read of size 1 at addr 888295833740 by task less/1855
>
> CPU: 0 PID: 1855 Comm: less Tainted: GW 5.10.0-rc4+ #68
> Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS
> rel-1.13.0-44-g88ab0c15525c-prebuilt.qemu.org 04/01/2014
> Call Trace:
>  __dump_stack lib/dump_stack.c:77 [inline]
>  dump_stack+0x107/0x163 lib/dump_stack.c:118
>  print_address_description.constprop.0.cold+0xae/0x4c8 mm/kasan/report.c:385
>  __kasan_report mm/kasan/report.c:545 [inline]
>  kasan_report.cold+0x1f/0x37 mm/kasan/report.c:562
>  string_nocheck lib/vsprintf.c:611 [inline]
>  string+0x39c/0x3d0 lib/vsprintf.c:693
>  vsnprintf+0x71b/0x14f0 lib/vsprintf.c:2618
>  seq_vprintf fs/seq_file.c:398 [inline]
>  seq_printf+0x195/0x240 fs/seq_file.c:413
>  print_name+0x98/0x1d0 kernel/locking/lockdep_proc.c:50
>  l_show+0x111/0x2c0 kernel/locking/lockdep_proc.c:82
>  seq_read_iter+0xae4/0x10c0 fs/seq_file.c:268
>  proc_reg_read_iter+0x1fb/0x2d0 fs/proc/inode.c:310
>  call_read_iter include/linux/fs.h:1897 [inline]
>  new_sync_read+0x41e/0x6e0 fs/read_write.c:415
>  vfs_read+0x35c/0x570 fs/read_write.c:496
>  ksys_read+0x12d/0x250 fs/read_write.c:634
>  do_syscall_64+0x2d/0x70 arch/x86/entry/common.c:46
>  entry_SYSCALL_64_after_hwframe+0x44/0xa9
> RIP: 0033:0x7f1d48906310
> Code: 73 01 c3 48 8b 0d 28 4b 2b 00 31 d2 48 29 c2 64 89 11 48 83 c8
> ff eb ea 90 90 83 3d e5 a2 2b 00 00 75 10 b8 00 00 00 00 0f 05 <48> 3d
> 

Re: [PATCH v3] lockdep: Allow tuning tracing capacity constants.

2020-11-19 Thread Dmitry Vyukov
On Wed, Nov 18, 2020 at 4:32 PM Tetsuo Handa
 wrote:
>
> On 2020/11/19 0:10, Peter Zijlstra wrote:
> > On Wed, Nov 18, 2020 at 11:30:05PM +0900, Tetsuo Handa wrote:
> >> The problem is that we can't know what exactly is consuming these 
> >> resources.
> >> My question is do you have a plan to make it possible to know what exactly 
> >> is
> >> consuming these resources.
> >
> > I'm pretty sure it's in /proc/lockdep* somewhere.
>
> OK. Then...
>
> Dmitry, can you update syzkaller to dump /proc/lockdep* before terminating as
> a crash as soon as encountering one of
>
>   BUG: MAX_LOCKDEP_ENTRIES too low!
>   BUG: MAX_LOCKDEP_CHAIN_HLOCKS too low!
>   BUG: MAX_LOCKDEP_CHAINS too low!
>   BUG: MAX_LOCKDEP_KEYS too low!
>   WARNING in print_bfs_bug
>
> messages?
>
> On 2020/09/16 21:14, Dmitry Vyukov wrote:
> > On Wed, Sep 16, 2020 at 1:51 PM  wrote:
> >>
> >> On Wed, Sep 16, 2020 at 01:28:19PM +0200, Dmitry Vyukov wrote:
> >>> On Fri, Sep 4, 2020 at 6:05 PM Tetsuo Handa
> >>>  wrote:
> 
>  Hello. Can we apply this patch?
> 
>  This patch addresses top crashers for syzbot, and applying this patch
>  will help utilizing syzbot's resource for finding other bugs.
> >>>
> >>> Acked-by: Dmitry Vyukov 
> >>>
> >>> Peter, do you still have concerns with this?
> >>
> >> Yeah, I still hate it with a passion; it discourages thinking. A bad
> >> annotation that blows up the lockdep storage, no worries, we'll just
> >> increase this :/
> >>
> >> IIRC the issue with syzbot is that the current sysfs annotation is
> >> pretty terrible and generates a gazillion classes, and syzbot likes
> >> poking at /sys a lot and thus floods the system.
> >>
> >> I don't know enough about sysfs to suggest an alternative, and haven't
> >> exactly had spare time to look into it either :/
> >>
> >> Examples of bad annotations is getting every CPU a separate class, that
> >> leads to nr_cpus! chains if CPUs arbitrarily nest (nr_cpus^2 if there's
> >> only a single nesting level).
> >
> > Maybe on "BUG: MAX_LOCKDEP_CHAINS too low!" we should then aggregate,
> > sort and show existing chains so that it's possible to identify if
> > there are any worst offenders and who they are.
> >
> > Currently we only have a hypothesis that there are some worst
> > offenders vs lots of normal load. And we can't point fingers which
> > means that, say, sysfs, or other maintainers won't be too inclined to
> > fix anything.
> >
> > If we would know for sure that lock class X is guilty. That would make
> > the situation much more actionable.

I am trying to reproduce this locally first. syzbot caims it can
reproduce it with a number of very simpler reproducers (like spawn
process, unshare, create socket):
https://syzkaller.appspot.com/bug?id=8a18efe79140782a88dcd098808d6ab20ed740cc

I see a very slow drift, but it's very slow, so get only to:
 direct dependencies: 22072 [max: 32768]

But that's running a very uniform workload.

However when I tried to cat /proc/lockdep to see if there is anything
fishy already,
I got this (on c2e7554e1b85935d962127efa3c2a76483b0b3b6).

Some missing locks?

==
BUG: KASAN: use-after-free in string_nocheck lib/vsprintf.c:611 [inline]
BUG: KASAN: use-after-free in string+0x39c/0x3d0 lib/vsprintf.c:693
Read of size 1 at addr 888295833740 by task less/1855

CPU: 0 PID: 1855 Comm: less Tainted: GW 5.10.0-rc4+ #68
Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS
rel-1.13.0-44-g88ab0c15525c-prebuilt.qemu.org 04/01/2014
Call Trace:
 __dump_stack lib/dump_stack.c:77 [inline]
 dump_stack+0x107/0x163 lib/dump_stack.c:118
 print_address_description.constprop.0.cold+0xae/0x4c8 mm/kasan/report.c:385
 __kasan_report mm/kasan/report.c:545 [inline]
 kasan_report.cold+0x1f/0x37 mm/kasan/report.c:562
 string_nocheck lib/vsprintf.c:611 [inline]
 string+0x39c/0x3d0 lib/vsprintf.c:693
 vsnprintf+0x71b/0x14f0 lib/vsprintf.c:2618
 seq_vprintf fs/seq_file.c:398 [inline]
 seq_printf+0x195/0x240 fs/seq_file.c:413
 print_name+0x98/0x1d0 kernel/locking/lockdep_proc.c:50
 l_show+0x111/0x2c0 kernel/locking/lockdep_proc.c:82
 seq_read_iter+0xae4/0x10c0 fs/seq_file.c:268
 proc_reg_read_iter+0x1fb/0x2d0 fs/proc/inode.c:310
 call_read_iter include/linux/fs.h:1897 [inline]
 new_sync_read+0x41e/0x6e0 fs/read_write.c:415
 vfs_read+0x35c/0x570 fs/read_write.c:496
 ksys_read+0x12d/0x250 fs/read_write.c:634
 do_syscall_64+0x2d/0x70 arch/x86/entry/common.c:46
 entry_SYSCALL_64_after_hwframe+0x44/0xa9
RIP: 0033:0x7f1d48906310
Code: 73 01 c3 48 8b 0d 28 4b 2b 00 31 d2 48 29 c2 64 89 11 48 83 c8
ff eb ea 90 90 83 3d e5 a2 2b 00 00 75 10 b8 00 00 00 00 0f 05 <48> 3d
01 f04
RSP: 002b:7fff8ad3f048 EFLAGS: 0246 ORIG_RAX: 
RAX: ffda RBX: 00ae0ec0 RCX: 7f1d48906310
RDX: 2000 RSI: 00ae0eec RDI: 0004
RBP: 00072000 R08: 0038 R09: 0100
R10: 

Re: [PATCH v3] lockdep: Allow tuning tracing capacity constants.

2020-11-18 Thread Tetsuo Handa
On 2020/11/19 0:10, Peter Zijlstra wrote:
> On Wed, Nov 18, 2020 at 11:30:05PM +0900, Tetsuo Handa wrote:
>> The problem is that we can't know what exactly is consuming these resources.
>> My question is do you have a plan to make it possible to know what exactly is
>> consuming these resources.
> 
> I'm pretty sure it's in /proc/lockdep* somewhere.

OK. Then...

Dmitry, can you update syzkaller to dump /proc/lockdep* before terminating as
a crash as soon as encountering one of

  BUG: MAX_LOCKDEP_ENTRIES too low!
  BUG: MAX_LOCKDEP_CHAIN_HLOCKS too low!
  BUG: MAX_LOCKDEP_CHAINS too low!
  BUG: MAX_LOCKDEP_KEYS too low!
  WARNING in print_bfs_bug

messages?

On 2020/09/16 21:14, Dmitry Vyukov wrote:
> On Wed, Sep 16, 2020 at 1:51 PM  wrote:
>>
>> On Wed, Sep 16, 2020 at 01:28:19PM +0200, Dmitry Vyukov wrote:
>>> On Fri, Sep 4, 2020 at 6:05 PM Tetsuo Handa
>>>  wrote:

 Hello. Can we apply this patch?

 This patch addresses top crashers for syzbot, and applying this patch
 will help utilizing syzbot's resource for finding other bugs.
>>>
>>> Acked-by: Dmitry Vyukov 
>>>
>>> Peter, do you still have concerns with this?
>>
>> Yeah, I still hate it with a passion; it discourages thinking. A bad
>> annotation that blows up the lockdep storage, no worries, we'll just
>> increase this :/
>>
>> IIRC the issue with syzbot is that the current sysfs annotation is
>> pretty terrible and generates a gazillion classes, and syzbot likes
>> poking at /sys a lot and thus floods the system.
>>
>> I don't know enough about sysfs to suggest an alternative, and haven't
>> exactly had spare time to look into it either :/
>>
>> Examples of bad annotations is getting every CPU a separate class, that
>> leads to nr_cpus! chains if CPUs arbitrarily nest (nr_cpus^2 if there's
>> only a single nesting level).
> 
> Maybe on "BUG: MAX_LOCKDEP_CHAINS too low!" we should then aggregate,
> sort and show existing chains so that it's possible to identify if
> there are any worst offenders and who they are.
> 
> Currently we only have a hypothesis that there are some worst
> offenders vs lots of normal load. And we can't point fingers which
> means that, say, sysfs, or other maintainers won't be too inclined to
> fix anything.
> 
> If we would know for sure that lock class X is guilty. That would make
> the situation much more actionable.
> 


Re: [PATCH v3] lockdep: Allow tuning tracing capacity constants.

2020-11-18 Thread Peter Zijlstra
On Wed, Nov 18, 2020 at 11:30:05PM +0900, Tetsuo Handa wrote:
> The problem is that we can't know what exactly is consuming these resources.
> My question is do you have a plan to make it possible to know what exactly is
> consuming these resources.

I'm pretty sure it's in /proc/lockdep* somewhere.

IIRC you were suffering from "MAX_LOCKDEP_ENTRIES too low!", which you
find in alloc_list_entry(), that's used in add_lock_to_list(), which in
turn is used in check_prev_add() to add to ->locks_after and
->locks_before.

/me frobs in lockdep_proc.c and finds l_show() uses locks_after,
l_show() is the implementation for /proc/lockdep.

So something like:

$ grep "^ ->" /proc/lockdep | wc -l
5064

Should be roughly half (it only shows the forward dependencies) the
number of list_entries used.

/proc/lockdep_stats: direct dependencies: 11886 [max: 32768]

gives the number of list_entries used

Trick then is finding where they all go, because as you can see, my
machine is nowhere near saturated, even though it's been running a few
days.

So, go forth and analyize your problem.


Re: [PATCH v3] lockdep: Allow tuning tracing capacity constants.

2020-11-18 Thread Tetsuo Handa
The problem is that we can't know what exactly is consuming these resources.
My question is do you have a plan to make it possible to know what exactly is
consuming these resources.

On 2020/11/18 23:23, Peter Zijlstra wrote:
> On Wed, Nov 18, 2020 at 10:57:08PM +0900, Tetsuo Handa wrote:
>> Peter, do you have a plan to make this problem actionable?
> 
> I don't even know what the problem is.. nor do you. You can keep sending
> this bad paper-over-issues patch until you're blue in the face, I'll not
> take it for reasons I've explained before.
> 
> I've also previously said that the right way to go about this is to
> figure out what exactly is consuming these resources and then figure out
> how to cure _that_. Blindly increasing the number only enables bad
> actors to get away with being bad.
> 


Re: [PATCH v3] lockdep: Allow tuning tracing capacity constants.

2020-11-18 Thread Peter Zijlstra
On Wed, Nov 18, 2020 at 10:57:08PM +0900, Tetsuo Handa wrote:
> Peter, do you have a plan to make this problem actionable?

I don't even know what the problem is.. nor do you. You can keep sending
this bad paper-over-issues patch until you're blue in the face, I'll not
take it for reasons I've explained before.

I've also previously said that the right way to go about this is to
figure out what exactly is consuming these resources and then figure out
how to cure _that_. Blindly increasing the number only enables bad
actors to get away with being bad.


Re: [PATCH v3] lockdep: Allow tuning tracing capacity constants.

2020-11-18 Thread Tetsuo Handa
Peter, do you have a plan to make this problem actionable?

On 2020/10/18 22:02, Tetsuo Handa wrote:
> Peter, what do you think? Can we apply this patch?
> 
> A potential for-now workaround for syzkaller would be to allow syzkaller
> not to encounter the BUG: message (by masking BUG: message on the kernel
> side) when hitting these limits, for continue testing until the kernel
> crashes (due to other bugs like UAF) would be to some degree useful.
> 
> On 2020/10/10 21:58, Tetsuo Handa wrote:
>> Since syzkaller continues various test cases until the kernel crashes,
>> syzkaller tends to examine more locking dependencies than normal systems.
>> As a result, syzbot is reporting that the fuzz testing was terminated
>> due to hitting upper limits lockdep can track [1] [2] [3].
>>
>> Peter Zijlstra does not want to allow tuning these limits via kernel
>> config options, for such change discourages thinking. But currently we
>> are not actionable, for lockdep does not report the culprit for hitting
>> these limits [4].
>>
>> Therefore, I propose this patch again, with a caveat that this patch is
>> expected to be reverted after lockdep becomes capable of reporting the
>> culprit, for I consider that "postpone fixing lock related problems in
>> existing code" is less painful than "not detecting lock related problems
>> introduced by new patches".
>>
>> [1] 
>> https://syzkaller.appspot.com/bug?id=3d97ba93fb3566000c1c59691ea427370d33ea1b
>> [2] 
>> https://syzkaller.appspot.com/bug?id=381cb436fe60dc03d7fd2a092b46d7f09542a72a
>> [3] 
>> https://syzkaller.appspot.com/bug?id=a588183ac34c1437fc0785e8f220e88282e5a29f
>> [4] 
>> https://lkml.kernel.org/r/CACT4Y+agTiEF-1i9LbAgp-q_02oYF0kAPZGAAJ==-wx2xh7...@mail.gmail.com
>>
>> Reported-by: syzbot 
>> Reported-by: syzbot 
>> Reported-by: syzbot 
>> Signed-off-by: Tetsuo Handa 
>> Acked-by: Dmitry Vyukov 
>> ---
>>  kernel/locking/lockdep.c   |  2 +-
>>  kernel/locking/lockdep_internals.h |  8 +++---
>>  lib/Kconfig.debug  | 40 ++
>>  3 files changed, 45 insertions(+), 5 deletions(-)



Re: [PATCH v3] lockdep: Allow tuning tracing capacity constants.

2020-10-18 Thread Tetsuo Handa
Peter, what do you think? Can we apply this patch?

A potential for-now workaround for syzkaller would be to allow syzkaller
not to encounter the BUG: message (by masking BUG: message on the kernel
side) when hitting these limits, for continue testing until the kernel
crashes (due to other bugs like UAF) would be to some degree useful.

On 2020/10/10 21:58, Tetsuo Handa wrote:
> Since syzkaller continues various test cases until the kernel crashes,
> syzkaller tends to examine more locking dependencies than normal systems.
> As a result, syzbot is reporting that the fuzz testing was terminated
> due to hitting upper limits lockdep can track [1] [2] [3].
> 
> Peter Zijlstra does not want to allow tuning these limits via kernel
> config options, for such change discourages thinking. But currently we
> are not actionable, for lockdep does not report the culprit for hitting
> these limits [4].
> 
> Therefore, I propose this patch again, with a caveat that this patch is
> expected to be reverted after lockdep becomes capable of reporting the
> culprit, for I consider that "postpone fixing lock related problems in
> existing code" is less painful than "not detecting lock related problems
> introduced by new patches".
> 
> [1] 
> https://syzkaller.appspot.com/bug?id=3d97ba93fb3566000c1c59691ea427370d33ea1b
> [2] 
> https://syzkaller.appspot.com/bug?id=381cb436fe60dc03d7fd2a092b46d7f09542a72a
> [3] 
> https://syzkaller.appspot.com/bug?id=a588183ac34c1437fc0785e8f220e88282e5a29f
> [4] 
> https://lkml.kernel.org/r/CACT4Y+agTiEF-1i9LbAgp-q_02oYF0kAPZGAAJ==-wx2xh7...@mail.gmail.com
> 
> Reported-by: syzbot 
> Reported-by: syzbot 
> Reported-by: syzbot 
> Signed-off-by: Tetsuo Handa 
> Acked-by: Dmitry Vyukov 
> ---
>  kernel/locking/lockdep.c   |  2 +-
>  kernel/locking/lockdep_internals.h |  8 +++---
>  lib/Kconfig.debug  | 40 ++
>  3 files changed, 45 insertions(+), 5 deletions(-)


[PATCH v3] lockdep: Allow tuning tracing capacity constants.

2020-10-10 Thread Tetsuo Handa
Since syzkaller continues various test cases until the kernel crashes,
syzkaller tends to examine more locking dependencies than normal systems.
As a result, syzbot is reporting that the fuzz testing was terminated
due to hitting upper limits lockdep can track [1] [2] [3].

Peter Zijlstra does not want to allow tuning these limits via kernel
config options, for such change discourages thinking. But currently we
are not actionable, for lockdep does not report the culprit for hitting
these limits [4].

Therefore, I propose this patch again, with a caveat that this patch is
expected to be reverted after lockdep becomes capable of reporting the
culprit, for I consider that "postpone fixing lock related problems in
existing code" is less painful than "not detecting lock related problems
introduced by new patches".

[1] 
https://syzkaller.appspot.com/bug?id=3d97ba93fb3566000c1c59691ea427370d33ea1b
[2] 
https://syzkaller.appspot.com/bug?id=381cb436fe60dc03d7fd2a092b46d7f09542a72a
[3] 
https://syzkaller.appspot.com/bug?id=a588183ac34c1437fc0785e8f220e88282e5a29f
[4] 
https://lkml.kernel.org/r/CACT4Y+agTiEF-1i9LbAgp-q_02oYF0kAPZGAAJ==-wx2xh7...@mail.gmail.com

Reported-by: syzbot 
Reported-by: syzbot 
Reported-by: syzbot 
Signed-off-by: Tetsuo Handa 
Acked-by: Dmitry Vyukov 
---
 kernel/locking/lockdep.c   |  2 +-
 kernel/locking/lockdep_internals.h |  8 +++---
 lib/Kconfig.debug  | 40 ++
 3 files changed, 45 insertions(+), 5 deletions(-)

diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c
index 2facbbd146ec..2144708a867c 100644
--- a/kernel/locking/lockdep.c
+++ b/kernel/locking/lockdep.c
@@ -1349,7 +1349,7 @@ static int add_lock_to_list(struct lock_class *this,
 /*
  * For good efficiency of modular, we use power of 2
  */
-#define MAX_CIRCULAR_QUEUE_SIZE4096UL
+#define MAX_CIRCULAR_QUEUE_SIZE(1UL << 
CONFIG_LOCKDEP_CIRCULAR_QUEUE_BITS)
 #define CQ_MASK(MAX_CIRCULAR_QUEUE_SIZE-1)
 
 /*
diff --git a/kernel/locking/lockdep_internals.h 
b/kernel/locking/lockdep_internals.h
index b0be1560ed17..cf7752847eb7 100644
--- a/kernel/locking/lockdep_internals.h
+++ b/kernel/locking/lockdep_internals.h
@@ -96,16 +96,16 @@ static const unsigned long LOCKF_USED_IN_IRQ_READ =
 #define MAX_STACK_TRACE_ENTRIES262144UL
 #define STACK_TRACE_HASH_SIZE  8192
 #else
-#define MAX_LOCKDEP_ENTRIES32768UL
+#define MAX_LOCKDEP_ENTRIES(1UL << CONFIG_LOCKDEP_BITS)
 
-#define MAX_LOCKDEP_CHAINS_BITS16
+#define MAX_LOCKDEP_CHAINS_BITSCONFIG_LOCKDEP_CHAINS_BITS
 
 /*
  * Stack-trace: tightly packed array of stack backtrace
  * addresses. Protected by the hash_lock.
  */
-#define MAX_STACK_TRACE_ENTRIES524288UL
-#define STACK_TRACE_HASH_SIZE  16384
+#define MAX_STACK_TRACE_ENTRIES(1UL << CONFIG_LOCKDEP_STACK_TRACE_BITS)
+#define STACK_TRACE_HASH_SIZE  (1 << CONFIG_LOCKDEP_STACK_TRACE_HASH_BITS)
 #endif
 
 /*
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index 0c781f912f9f..41d083be7ec3 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -1311,6 +1311,46 @@ config LOCKDEP
 config LOCKDEP_SMALL
bool
 
+config LOCKDEP_BITS
+   int "Bitsize for MAX_LOCKDEP_ENTRIES"
+   depends on LOCKDEP && !LOCKDEP_SMALL
+   range 10 30
+   default 15
+   help
+ Try increasing this value if you hit "BUG: MAX_LOCKDEP_ENTRIES too 
low!" message.
+
+config LOCKDEP_CHAINS_BITS
+   int "Bitsize for MAX_LOCKDEP_CHAINS"
+   depends on LOCKDEP && !LOCKDEP_SMALL
+   range 10 30
+   default 16
+   help
+ Try increasing this value if you hit "BUG: MAX_LOCKDEP_CHAINS too 
low!" message.
+
+config LOCKDEP_STACK_TRACE_BITS
+   int "Bitsize for MAX_STACK_TRACE_ENTRIES"
+   depends on LOCKDEP && !LOCKDEP_SMALL
+   range 10 30
+   default 19
+   help
+ Try increasing this value if you hit "BUG: MAX_STACK_TRACE_ENTRIES 
too low!" message.
+
+config LOCKDEP_STACK_TRACE_HASH_BITS
+   int "Bitsize for STACK_TRACE_HASH_SIZE"
+   depends on LOCKDEP && !LOCKDEP_SMALL
+   range 10 30
+   default 14
+   help
+ Try increasing this value if you need large MAX_STACK_TRACE_ENTRIES.
+
+config LOCKDEP_CIRCULAR_QUEUE_BITS
+   int "Bitsize for elements in circular_queue struct"
+   depends on LOCKDEP
+   range 10 30
+   default 12
+   help
+ Try increasing this value if you hit "lockdep bfs error:-1" warning 
due to __cq_enqueue() failure.
+
 config DEBUG_LOCKDEP
bool "Lock dependency engine debugging"
depends on DEBUG_KERNEL && LOCKDEP
-- 
2.18.4