Hi.

> Hi all,
>
> I encountered a kernel panic in the RCU core subsystem while running a 
> stress-ng on a virtualized ARM64 system.
>
> This panic consistently occurs regardless of whether I increase or decrease 
> the memory size.
>
> The crash seems to originate from rcu_do_batch(), jumping to a pointer 
> (0xffff00003a114000) that appears to be non-executable.
> The PTE for the address confirms XN=1. Given the heavy binderfs workload, I 
> suspect there may be a use-after-free or dangling pointer involved in a 
> callback invocation.
>
> Platform:
>  Architecture: arm64
>  Virtualized environment: Apple Silicon M2 (Apple Virtualization Framework)
>  Kernel version: 6.15.0-rc4+
>  Attached Config: CONFIG_PREEMPT_VOLUNTARY=y, CONFIG_KASAN=y
>
> Reproducer:
>  sudo ./stress-ng --binderfs 8 --binderfs-ops 10000 -t 15 \
>   --pathological --timestamp --tz --syslog --perf --no-rand-seed \
>   --times --metrics --klog-check --status 5 -x smi -v --interrupts 
> --change-cpu
>
> Crash details:
> [ 1977.262956] Unable to handle kernel execute from non-executable memory at 
> virtual address ffff00003a114000
> [ 1977.262980] Mem abort info:
> [ 1977.262988]   ESR = 0x000000008600000f
> [ 1977.262998]   EC = 0x21: IABT (current EL), IL = 32 bits
> [ 1977.263008]   SET = 0, FnV = 0
> [ 1977.263017]   EA = 0, S1PTW = 0
> [ 1977.263026]   FSC = 0x0f: level 3 permission fault
> [ 1977.263036] swapper pgtable: 4k pages, 48-bit VAs, pgdp=00000000dfd88000
> [ 1977.263047] [ffff00003a114000] pgd=18000000effff403, p4d=18000000effff403, 
> pud=18000000efffe403, pmd=18000000effad403, pte=006800007a114707
> [ 1977.263088] Internal error: Oops: 000000008600000f [#1]  SMP
> [ 1977.263097] Modules linked in: pcbc lrw xcbc wp512 nhpoly1305_neon 
> nhpoly1305 libpoly1305 michael_mic md4 streebog_generic rmd160 crc32_generic 
> twofish_generic twofish_common serpent_generic fcrypt cast6_generic 
> cast5_generic cast_common camellia_generic blowfish_generic blowfish_common 
> ecrdsa_generic des_generic libdes aegis128 overlay isofs uinput snd_seq_dummy 
> snd_hrtimer nf_conntrack_netbios_ns nf_conntrack_broadcast nft_fib_inet 
> nft_fib_ipv4 nft_fib_ipv6 nft_fib nft_reject_inet nf_reject_ipv4 
> nf_reject_ipv6 nft_reject nft_ct nft_chain_nat nf_nat nf_conntrack rfkill 
> nf_defrag_ipv6 nf_defrag_ipv4 ip_set nf_tables qrtr sunrpc virtio_snd snd_seq 
> snd_seq_device snd_pcm virtio_net snd_timer snd virtio_balloon net_failover 
> soundcore failover vfat fat joydev loop nfnetlink vsock_loopback 
> vmw_vsock_virtio_transport_common zram lz4hc_compress lz4_compress 
> vmw_vsock_vmci_transport vmw_vmci vsock uas polyval_ce polyval_generic 
> usb_storage ghash_ce sha3_ce sha512_ce sha512_arm64 virtio_gpu virtio_dma_buf 
> apple_mfi_fastcharge
> [ 1977.263372]  fuse
> [ 1977.263387] CPU: 2 UID: 0 PID: 27 Comm: ksoftirqd/2 Kdump: loaded Not 
> tainted 6.15.0-rc4+ #1 PREEMPT(voluntary)
> [ 1977.263398] Hardware name: Apple Inc. Apple Virtualization Generic 
> Platform, BIOS 2075.101.2.0.0 03/12/2025
> [ 1977.263406] pstate: 21400805 (nzCv daif +PAN -UAO -TCO +DIT -SSBS BTYPE=-c)
> [ 1977.263416] pc : 0xffff00003a114000
> [ 1977.263443] lr : rcu_do_batch+0x2dc/0x860
> [ 1977.263457] sp : ffff800080143c90
> [ 1977.263462] x29: ffff800080143cb0 x28: ffff000048608000 x27: 
> ffff00003a114000
> [ 1977.263478] x26: ffff800084442000 x25: 0000000000000000 x24: 
> ffff8000843d9b18
> [ 1977.263492] x23: ffff800082150ac0 x22: 0000000000000007 x21: 
> 000000000000000a
> [ 1977.263506] x20: ffff000030e08000 x19: ffff0000af4cfe00 x18: 
> 0000000000000002
> [ 1977.263521] x17: 0000000000000000 x16: 0000000000000001 x15: 
> 0000000000000017
> [ 1977.263535] x14: 0000000000000004 x13: ffff0000af4cfed0 x12: 
> 0000000000000002
> [ 1977.263549] x11: 0000000000110009 x10: 0000000000ff0100 x9 : 
> ffff80008385a580
> [ 1977.263563] x8 : 0000000100000100 x7 : 0000000000000000 x6 : 
> ffff8000803f89bc
> [ 1977.263577] x5 : 0000000000000000 x4 : 0000000000000000 x3 : 
> 0000000000000002
> [ 1977.263591] x2 : 0000000000000000 x1 : ffff800082a4aeb8 x0 : 
> ffff000048608000
> [ 1977.263605] Call trace:
> [ 1977.263611]  0xffff00003a114000 (P)
> [ 1977.263623]  rcu_core+0x2a0/0x4e8
> [ 1977.263635]  rcu_core_si+0x1c/0x30
> [ 1977.263646]  handle_softirqs+0x1b4/0x588
> [ 1977.263661]  run_ksoftirqd+0x5c/0xf8
> [ 1977.263670]  smpboot_thread_fn+0x27c/0x490
> [ 1977.263683]  kthread+0x2ac/0x318
> [ 1977.263697]  ret_from_fork+0x10/0x20
> [ 1977.263714] Code: dff29fc3 00200000 dff28fc3 00200000 (48608000)
> [ 1977.263723] SMP: stopping secondary CPUs
> [ 1977.264081] Starting crashdump kernel...
> [ 1977.264090] Bye!
>

This problem seems not related for RCU or architecture but some datarace
in binderfs while it removes its device on unmount.

Could you test it with below patch?
In my test, it seems being fixed.

@@ -79,6 +79,7 @@ static HLIST_HEAD(binder_deferred_list);
 static DEFINE_MUTEX(binder_deferred_lock);

 static HLIST_HEAD(binder_devices);
+static DEFINE_SPINLOCK(binder_devices_lock);
 static HLIST_HEAD(binder_procs);
 static DEFINE_MUTEX(binder_procs_lock);

@@ -6929,9 +6930,16 @@ const struct binder_debugfs_entry 
binder_debugfs_entries[] = {

 void binder_add_device(struct binder_device *device)
 {
+       guard(spinlock)(&binder_devices_lock);
        hlist_add_head(&device->hlist, &binder_devices);
 }

+void binder_del_device(struct binder_device *device)
+{
+       guard(spinlock)(&binder_devices_lock);
+       hlist_del_init(&device->hlist);
+}
+
 static int __init init_binder_device(const char *name)
 {
        int ret;
diff --git a/drivers/android/binder_internal.h 
b/drivers/android/binder_internal.h
index 6a66c9769c6c..a6ae4edc4ed5 100644
--- a/drivers/android/binder_internal.h
+++ b/drivers/android/binder_internal.h
@@ -588,4 +588,12 @@ struct binder_object {
  */
 void binder_add_device(struct binder_device *device);

+/**
+ * Del a binder device from binder_devices
+ * @device: the new binder device to add to the global list
+ *
+ * Not reentrant as the list is not protected by any locks
+ */
+void binder_del_device(struct binder_device *device);
+
 #endif /* _LINUX_BINDER_INTERNAL_H */
diff --git a/drivers/android/binderfs.c b/drivers/android/binderfs.c
index 94c6446604fc..a0779a8338d7 100644
--- a/drivers/android/binderfs.c
+++ b/drivers/android/binderfs.c
@@ -274,7 +274,7 @@ static void binderfs_evict_inode(struct inode *inode)
        mutex_unlock(&binderfs_minors_mutex);

        if (refcount_dec_and_test(&device->ref)) {
-               hlist_del_init(&device->hlist);
+               binder_del_device(device);
                kfree(device->context.name);
                kfree(device);
        }

--
Sincerely,
Yeoreum Yun
IMPORTANT NOTICE: The contents of this email and any attachments are 
confidential and may also be privileged. If you are not the intended recipient, 
please notify the sender immediately and do not disclose the contents to any 
other person, use it for any purpose, or store or copy the information in any 
medium. Thank you.

Reply via email to