syszbot found an interesting use-after-free [1] happening while IPv4 fragment rhashtable was destroyed at netns dismantle.
While no insertions can possibly happen at the time a dismantling netns is destroying this rhashtable, timers can still fire and attempt to remove elements from this rhashtable. Since automatic shrinking is enabled, this might then schedule the work queue to perform the shrinking from process context, which was not expected. I have thought of simply forcing ht->p.automatic_shrinking to false before the cancel_work_sync(&ht->run_work); in rhashtable_free_and_destroy(), but that would be still racy. Lets use a separate boolean to not only prevent the work queue being scheduled, but also make sure rht_deferred_worker() would return early. [1] BUG: KASAN: use-after-free in __read_once_size include/linux/compiler.h:188 [inline] BUG: KASAN: use-after-free in rhashtable_last_table+0x216/0x240 lib/rhashtable.c:217 Read of size 8 at addr ffff88019a4c8840 by task kworker/0:4/8279 CPU: 0 PID: 8279 Comm: kworker/0:4 Not tainted 4.19.0-rc5+ #61 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 Workqueue: events rht_deferred_worker Call Trace: __dump_stack lib/dump_stack.c:77 [inline] dump_stack+0x1c4/0x2b4 lib/dump_stack.c:113 print_address_description.cold.8+0x9/0x1ff mm/kasan/report.c:256 kasan_report_error mm/kasan/report.c:354 [inline] kasan_report.cold.9+0x242/0x309 mm/kasan/report.c:412 __asan_report_load8_noabort+0x14/0x20 mm/kasan/report.c:433 __read_once_size include/linux/compiler.h:188 [inline] rhashtable_last_table+0x216/0x240 lib/rhashtable.c:217 rht_deferred_worker+0x157/0x1de0 lib/rhashtable.c:410 process_one_work+0xc90/0x1b90 kernel/workqueue.c:2153 worker_thread+0x17f/0x1390 kernel/workqueue.c:2296 kthread+0x35a/0x420 kernel/kthread.c:246 ret_from_fork+0x3a/0x50 arch/x86/entry/entry_64.S:413 Allocated by task 5: save_stack+0x43/0xd0 mm/kasan/kasan.c:448 set_track mm/kasan/kasan.c:460 [inline] kasan_kmalloc+0xc7/0xe0 mm/kasan/kasan.c:553 __do_kmalloc_node mm/slab.c:3682 [inline] __kmalloc_node+0x47/0x70 mm/slab.c:3689 kmalloc_node include/linux/slab.h:555 [inline] kvmalloc_node+0xb9/0xf0 mm/util.c:423 kvmalloc include/linux/mm.h:577 [inline] kvzalloc include/linux/mm.h:585 [inline] bucket_table_alloc+0x9a/0x4e0 lib/rhashtable.c:176 rhashtable_rehash_alloc+0x73/0x100 lib/rhashtable.c:353 rht_deferred_worker+0x278/0x1de0 lib/rhashtable.c:413 process_one_work+0xc90/0x1b90 kernel/workqueue.c:2153 worker_thread+0x17f/0x1390 kernel/workqueue.c:2296 kthread+0x35a/0x420 kernel/kthread.c:246 ret_from_fork+0x3a/0x50 arch/x86/entry/entry_64.S:413 Freed by task 8283: save_stack+0x43/0xd0 mm/kasan/kasan.c:448 set_track mm/kasan/kasan.c:460 [inline] __kasan_slab_free+0x102/0x150 mm/kasan/kasan.c:521 kasan_slab_free+0xe/0x10 mm/kasan/kasan.c:528 __cache_free mm/slab.c:3498 [inline] kfree+0xcf/0x230 mm/slab.c:3813 kvfree+0x61/0x70 mm/util.c:452 bucket_table_free+0xda/0x250 lib/rhashtable.c:108 rhashtable_free_and_destroy+0x152/0x900 lib/rhashtable.c:1163 inet_frags_exit_net+0x3d/0x50 net/ipv4/inet_fragment.c:96 ipv4_frags_exit_net+0x73/0x90 net/ipv4/ip_fragment.c:914 ops_exit_list.isra.7+0xb0/0x160 net/core/net_namespace.c:153 cleanup_net+0x555/0xb10 net/core/net_namespace.c:551 process_one_work+0xc90/0x1b90 kernel/workqueue.c:2153 worker_thread+0x17f/0x1390 kernel/workqueue.c:2296 kthread+0x35a/0x420 kernel/kthread.c:246 ret_from_fork+0x3a/0x50 arch/x86/entry/entry_64.S:413 The buggy address belongs to the object at ffff88019a4c8800 which belongs to the cache kmalloc-16384 of size 16384 The buggy address is located 64 bytes inside of 16384-byte region [ffff88019a4c8800, ffff88019a4cc800) The buggy address belongs to the page: page:ffffea0006693200 count:1 mapcount:0 mapping:ffff8801da802200 index:0x0 compound_mapcount: 0 flags: 0x2fffc0000008100(slab|head) raw: 02fffc0000008100 ffffea0006685608 ffffea0006617c08 ffff8801da802200 raw: 0000000000000000 ffff88019a4c8800 0000000100000001 0000000000000000 page dumped because: kasan: bad access detected Memory state around the buggy address: ffff88019a4c8700: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc ffff88019a4c8780: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc >ffff88019a4c8800: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb ^ ffff88019a4c8880: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb ffff88019a4c8900: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb Signed-off-by: Eric Dumazet <eduma...@google.com> Reported-by: syzbot <syzkal...@googlegroups.com> Cc: Thomas Graf <tg...@suug.ch> Cc: Herbert Xu <herb...@gondor.apana.org.au> --- include/linux/rhashtable-types.h | 2 ++ include/linux/rhashtable.h | 1 + lib/rhashtable.c | 25 ++++++++++++++----------- 3 files changed, 17 insertions(+), 11 deletions(-) diff --git a/include/linux/rhashtable-types.h b/include/linux/rhashtable-types.h index 763d613ce2c2f275037c3b1b5ce4d390394295c6..03d86e79dfd664c48c5e437c609deb62cbc33180 100644 --- a/include/linux/rhashtable-types.h +++ b/include/linux/rhashtable-types.h @@ -75,6 +75,7 @@ struct rhashtable_params { * @max_elems: Maximum number of elements in table * @p: Configuration parameters * @rhlist: True if this is an rhltable + * @dead: True if in dismantle phase * @run_work: Deferred worker to expand/shrink asynchronously * @mutex: Mutex to protect current/future table swapping * @lock: Spin lock to protect walker list @@ -86,6 +87,7 @@ struct rhashtable { unsigned int max_elems; struct rhashtable_params p; bool rhlist; + bool dead; struct work_struct run_work; struct mutex mutex; spinlock_t lock; diff --git a/include/linux/rhashtable.h b/include/linux/rhashtable.h index eb71110392479784db1d4a0b9d86d2eee6631789..5efc7d5cc66ba37c70c7e3d5ff98473df6a3837e 100644 --- a/include/linux/rhashtable.h +++ b/include/linux/rhashtable.h @@ -934,6 +934,7 @@ static inline int __rhashtable_remove_fast_one( if (err > 0) { atomic_dec(&ht->nelems); if (unlikely(ht->p.automatic_shrinking && + !ht->dead && rht_shrink_below_30(ht, tbl))) schedule_work(&ht->run_work); err = 0; diff --git a/lib/rhashtable.c b/lib/rhashtable.c index 30526afa8343124f06f0649592ee246dc4d88fbe..99f9aa5a768d849d9afe3f50f25c300c37d9c07d 100644 --- a/lib/rhashtable.c +++ b/lib/rhashtable.c @@ -406,19 +406,21 @@ static void rht_deferred_worker(struct work_struct *work) ht = container_of(work, struct rhashtable, run_work); mutex_lock(&ht->mutex); - tbl = rht_dereference(ht->tbl, ht); - tbl = rhashtable_last_table(ht, tbl); + if (!ht->dead) { + tbl = rht_dereference(ht->tbl, ht); + tbl = rhashtable_last_table(ht, tbl); - if (rht_grow_above_75(ht, tbl)) - err = rhashtable_rehash_alloc(ht, tbl, tbl->size * 2); - else if (ht->p.automatic_shrinking && rht_shrink_below_30(ht, tbl)) - err = rhashtable_shrink(ht); - else if (tbl->nest) - err = rhashtable_rehash_alloc(ht, tbl, tbl->size); - - if (!err) - err = rhashtable_rehash_table(ht); + if (rht_grow_above_75(ht, tbl)) + err = rhashtable_rehash_alloc(ht, tbl, tbl->size * 2); + else if (ht->p.automatic_shrinking && + rht_shrink_below_30(ht, tbl)) + err = rhashtable_shrink(ht); + else if (tbl->nest) + err = rhashtable_rehash_alloc(ht, tbl, tbl->size); + if (!err) + err = rhashtable_rehash_table(ht); + } mutex_unlock(&ht->mutex); if (err) @@ -1138,6 +1140,7 @@ void rhashtable_free_and_destroy(struct rhashtable *ht, struct bucket_table *tbl, *next_tbl; unsigned int i; + ht->dead = true; cancel_work_sync(&ht->run_work); mutex_lock(&ht->mutex); -- 2.19.0.605.g01d371f741-goog