pci: Preserve vfio-pci device files across Live Update

David Matlack Thu, 21 May 2026 16:59:49 -0700

On 2026-05-11 04:47 PM, Vipin Sharma wrote:

> +static int vfio_pci_liveupdate_freeze(struct liveupdate_file_op_args *args)
> +{
> +     struct vfio_device *device = vfio_device_from_file(args->file);
> +     struct vfio_pci_core_device *vdev;
> +     struct pci_dev *pdev;
> +     int ret;
> +
> +     vdev = container_of(device, struct vfio_pci_core_device, vdev);
> +     pdev = vdev->pdev;
> +
> +     guard(mutex)(&device->dev_set->lock);
> +
> +     /*
> +      * Userspace must disable interrupts on the device prior to freeze so
> +      * that the device does not send any interrupts until new interrupt
> +      * handlers have been established by the next kernel.
> +      */
> +     if (vdev->irq_type != VFIO_PCI_NUM_IRQS) {
> +             pci_err(pdev, "Freeze failed! Interrupts are still enabled.\n");
> +             return -EINVAL;
> +     }
> +
> +     guard(rwsem_write)(&vdev->memory_lock);
> +
> +     /*
> +      * Userspace must make sure device is not in the lower power state for
> +      * live update. We may relax this in future.
> +      */
> +     if (pdev->current_state != PCI_D0) {
> +             pci_err(pdev, "Freeze failed! Device not in D0 state.\n");
> +             return -EINVAL;
> +     }
> +
> +     /*
> +      * Reset is a temporary measure to provide kernel after kexec a clean
> +      * device while VFIO live update work is under development and not
> +      * fully supported.  It will go away once continuous DMA support is
> +      * added to device preservation.
> +      */
> +     vfio_pci_zap_bars(vdev);
> +     ret = pci_load_saved_state(pdev, vdev->pci_saved_state);
> +     if (ret)
> +             return ret;
> +     pci_clear_master(pdev);
> +     vfio_pci_core_try_reset(vdev);


I am seeing the following lockdep splat get triggered by this reset when
testing with this commit using vfio_pci_liveupdate_kexec_test. It seems to be
related to taking memory_lock above.

[ 2710.299017][T75672] ======================================================
[ 2710.305908][T75672] WARNING: possible circular locking dependency detected
[ 2710.312797][T75672] 7.1.0-dbg-DEV #59 Tainted: G S
[ 2710.319077][T75672] ------------------------------------------------------
[ 2710.325967][T75672] kexec/75672 is trying to acquire lock:
[ 2710.331474][T75672] ff46fd4fdbaeef08 (&group->mutex){+.+.}-{4:4}, at: 
pci_dev_reset_iommu_prepare+0x6e/0x1a0
[ 2710.341336][T75672]
[ 2710.341336][T75672] but task is already holding lock:
[ 2710.348574][T75672] ff46fd501f9a19a8 (&vdev->memory_lock){++++}-{4:4}, at: 
vfio_pci_liveupdate_freeze+0x51/0x100
[ 2710.358764][T75672]
[ 2710.358764][T75672] which lock already depends on the new lock.
[ 2710.358764][T75672]
[ 2710.369031][T75672]
[ 2710.369031][T75672] the existing dependency chain (in reverse order) is:
[ 2710.377916][T75672]
[ 2710.377916][T75672] -> #4 (&vdev->memory_lock){++++}-{4:4}:
[ 2710.385675][T75672]        down_read+0x3d/0x150
[ 2710.390235][T75672]        vfio_pci_mmap_huge_fault+0xb9/0x160
[ 2710.396091][T75672]        __do_fault+0x46/0x140
[ 2710.400734][T75672]        do_pte_missing+0x4c3/0xff0
[ 2710.405803][T75672]        handle_mm_fault+0x7c4/0xb30
[ 2710.410961][T75672]        fixup_user_fault+0x115/0x270
[ 2710.416209][T75672]        vaddr_get_pfns+0x1a1/0x390
[ 2710.421286][T75672]        vfio_pin_pages_remote+0x148/0x4d0
[ 2710.426959][T75672]        vfio_pin_map_dma+0xcc/0x260
[ 2710.432116][T75672]        vfio_iommu_type1_ioctl+0xda4/0xec0
[ 2710.437884][T75672]        __se_sys_ioctl+0x71/0xc0
[ 2710.442790][T75672]        do_syscall_64+0x15f/0x710
[ 2710.447788][T75672]        entry_SYSCALL_64_after_hwframe+0x77/0x7f
[ 2710.454074][T75672]
[ 2710.454074][T75672] -> #3 (&mm->mmap_lock){++++}-{4:4}:
[ 2710.461489][T75672]        down_read_killable+0x48/0x180
[ 2710.466821][T75672]        mmap_read_lock_killable+0x12/0x50
[ 2710.472505][T75672]        lock_mm_and_find_vma+0x11d/0x130
[ 2710.478093][T75672]        do_user_addr_fault+0x3a0/0x6c0
[ 2710.483521][T75672]        exc_page_fault+0x68/0xa0
[ 2710.488423][T75672]        asm_exc_page_fault+0x26/0x30
[ 2710.493669][T75672]        filldir+0xe2/0x190
[ 2710.498047][T75672]        ext4_readdir+0xb47/0xcf0
[ 2710.502950][T75672]        iterate_dir+0x84/0x160
[ 2710.507677][T75672]        __se_sys_getdents+0x74/0x120
[ 2710.512929][T75672]        do_syscall_64+0x15f/0x710
[ 2710.517919][T75672]        entry_SYSCALL_64_after_hwframe+0x77/0x7f
[ 2710.524202][T75672]
[ 2710.524202][T75672] -> #2 (&type->i_mutex_dir_key#4){++++}-{4:4}:
[ 2710.532478][T75672]        down_read+0x3d/0x150
[ 2710.537030][T75672]        lookup_slow+0x26/0x50
[ 2710.541675][T75672]        link_path_walk+0x42c/0x580
[ 2710.546743][T75672]        path_openat+0xd1/0xde0
[ 2710.551466][T75672]        do_file_open_root+0x114/0x250
[ 2710.556798][T75672]        file_open_root+0x89/0xb0
[ 2710.561703][T75672]        kernel_read_file_from_path_initns+0xba/0x130
[ 2710.568342][T75672]        _request_firmware+0x4ab/0x8c0
[ 2710.573677][T75672]        request_firmware_direct+0x36/0x50
[ 2710.579356][T75672]        request_microcode_fw+0xf2/0x510
[ 2710.584869][T75672]        reload_store+0x197/0x230
[ 2710.589766][T75672]        kernfs_fop_write_iter+0x13f/0x1d0
[ 2710.595452][T75672]        vfs_write+0x2be/0x3b0
[ 2710.600097][T75672]        ksys_write+0x73/0x100
[ 2710.604735][T75672]        do_syscall_64+0x15f/0x710
[ 2710.609723][T75672]        entry_SYSCALL_64_after_hwframe+0x77/0x7f
[ 2710.616009][T75672]
[ 2710.616009][T75672] -> #1 (cpu_hotplug_lock){++++}-{0:0}:
[ 2710.623591][T75672]        cpus_read_lock+0x3b/0xd0
[ 2710.628499][T75672]        __cpuhp_state_add_instance+0x19/0x40
[ 2710.634443][T75672]        iova_domain_init_rcaches+0x1ef/0x230
[ 2710.640385][T75672]        iommu_setup_dma_ops+0x175/0x540
[ 2710.645891][T75672]        iommu_device_register+0x188/0x220
[ 2710.651564][T75672]        intel_iommu_init+0x35a/0x440
[ 2710.656811][T75672]        pci_iommu_init+0x16/0x40
[ 2710.661713][T75672]        do_one_initcall+0xf5/0x3a0
[ 2710.666786][T75672]        do_initcall_level+0x82/0xa0
[ 2710.671953][T75672]        do_initcalls+0x43/0x70
[ 2710.676672][T75672]        kernel_init_freeable+0x152/0x1d0
[ 2710.682266][T75672]        kernel_init+0x1a/0x130
[ 2710.686996][T75672]        ret_from_fork+0x16b/0x310
[ 2710.691991][T75672]        ret_from_fork_asm+0x1a/0x30
[ 2710.697151][T75672]
[ 2710.697151][T75672] -> #0 (&group->mutex){+.+.}-{4:4}:
[ 2710.704478][T75672]        __lock_acquire+0x14c6/0x2800
[ 2710.709729][T75672]        lock_acquire+0xd3/0x2c0
[ 2710.714542][T75672]        __mutex_lock+0x8f/0xcd0
[ 2710.719349][T75672]        pci_dev_reset_iommu_prepare+0x6e/0x1a0
[ 2710.725461][T75672]        pcie_flr+0x32/0xc0
[ 2710.729842][T75672]        __pci_reset_function_locked+0x84/0x120
[ 2710.735954][T75672]        vfio_pci_core_try_reset+0x96/0xe0
[ 2710.741630][T75672]        vfio_pci_liveupdate_freeze+0x89/0x100
[ 2710.747653][T75672]        luo_file_freeze+0xba/0x280
[ 2710.752725][T75672]        luo_session_serialize+0x69/0x190
[ 2710.758321][T75672]        liveupdate_reboot+0x19/0x30
[ 2710.763490][T75672]        kernel_kexec+0x2f/0xa0
[ 2710.768220][T75672]        __se_sys_reboot+0xfd/0x210
[ 2710.773301][T75672]        do_syscall_64+0x15f/0x710
[ 2710.778284][T75672]        entry_SYSCALL_64_after_hwframe+0x77/0x7f
[ 2710.784568][T75672]
[ 2710.784568][T75672] other info that might help us debug this:
[ 2710.784568][T75672]
[ 2710.794663][T75672] Chain exists of:
[ 2710.794663][T75672]   &group->mutex --> &mm->mmap_lock --> &vdev->memory_lock
[ 2710.794663][T75672]
[ 2710.807543][T75672]  Possible unsafe locking scenario:
[ 2710.807543][T75672]
[ 2710.814863][T75672]        CPU0                    CPU1
[ 2710.820106][T75672]        ----                    ----
[ 2710.825352][T75672]   lock(&vdev->memory_lock);
[ 2710.829904][T75672]                                lock(&mm->mmap_lock);
[ 2710.836620][T75672]                                lock(&vdev->memory_lock);
[ 2710.843682][T75672]   lock(&group->mutex);
[ 2710.847798][T75672]
[ 2710.847798][T75672]  *** DEADLOCK ***
[ 2710.847798][T75672]
[ 2710.855818][T75672] 7 locks held by kexec/75672:
[ 2710.860457][T75672]  #0: ffffffff90a81330 
(system_transition_mutex){+.+.}-{4:4}, at: __se_sys_reboot+0xe4/0x210
[ 2710.870554][T75672]  #1: ffffffff90e1d0c0 
(luo_session_global.outgoing.rwsem){+.+.}-{4:4}, at: 
luo_session_serialize+0x1f/0x190
[ 2710.882043][T75672]  #2: ff46fd50602b7ae0 (&session->mutex){+.+.}-{4:4}, at: 
luo_session_serialize+0x4f/0x190
[ 2710.891972][T75672]  #3: ff46fd500bec0788 (&luo_file->mutex){+.+.}-{4:4}, 
at: luo_file_freeze+0x65/0x280
[ 2710.901463][T75672]  #4: ff46fd509d8106a8 (&new_dev_set->lock){+.+.}-{4:4}, 
at: vfio_pci_liveupdate_freeze+0x36/0x100
[ 2710.912086][T75672]  #5: ff46fd501f9a19a8 (&vdev->memory_lock){++++}-{4:4}, 
at: vfio_pci_liveupdate_freeze+0x51/0x100
[ 2710.922701][T75672]  #6: ff46fd4fd416c1f0 (&dev->mutex){....}-{4:4}, at: 
pci_dev_trylock+0x25/0x60
[ 2710.931676][T75672]
[ 2710.931676][T75672] stack backtrace:
[ 2710.937439][T75672] CPU: 193 UID: 0 PID: 75672 Comm: kexec Tainted: G S      
            7.1.0-dbg-DEV #59 PREEMPTLAZY
[ 2710.937442][T75672] Tainted: [S]=CPU_OUT_OF_SPEC
[ 2710.937442][T75672] Hardware name: Google Izumi-EMR/izumi, BIOS 
0.20251023.0-0 10/23/2025
[ 2710.937443][T75672] Call Trace:
[ 2710.937446][T75672]  <TASK>
[ 2710.937448][T75672]  dump_stack_lvl+0x54/0x70
[ 2710.937453][T75672]  print_circular_bug+0x2e1/0x300
[ 2710.937455][T75672]  check_noncircular+0xf9/0x120
[ 2710.937456][T75672]  ? __bfs+0x129/0x200
[ 2710.937458][T75672]  __lock_acquire+0x14c6/0x2800
[ 2710.937460][T75672]  ? __lock_acquire+0x1240/0x2800
[ 2710.937463][T75672]  ? pci_dev_reset_iommu_prepare+0x6e/0x1a0
[ 2710.937465][T75672]  lock_acquire+0xd3/0x2c0
[ 2710.937466][T75672]  ? pci_dev_reset_iommu_prepare+0x6e/0x1a0
[ 2710.937468][T75672]  ? lock_is_held_type+0x76/0x100
[ 2710.937471][T75672]  ? pci_dev_reset_iommu_prepare+0x6e/0x1a0
[ 2710.937473][T75672]  __mutex_lock+0x8f/0xcd0
[ 2710.937473][T75672]  ? pci_dev_reset_iommu_prepare+0x6e/0x1a0
[ 2710.937475][T75672]  ? lockdep_hardirqs_on_prepare+0x151/0x210
[ 2710.937477][T75672]  ? _raw_spin_unlock_irqrestore+0x35/0x50
[ 2710.937482][T75672]  pci_dev_reset_iommu_prepare+0x6e/0x1a0
[ 2710.937484][T75672]  pcie_flr+0x32/0xc0
[ 2710.937485][T75672]  __pci_reset_function_locked+0x84/0x120
[ 2710.937487][T75672]  vfio_pci_core_try_reset+0x96/0xe0
[ 2710.937489][T75672]  vfio_pci_liveupdate_freeze+0x89/0x100
[ 2710.937490][T75672]  luo_file_freeze+0xba/0x280
[ 2710.937492][T75672]  luo_session_serialize+0x69/0x190
[ 2710.937493][T75672]  liveupdate_reboot+0x19/0x30
[ 2710.937495][T75672]  kernel_kexec+0x2f/0xa0
[ 2710.937496][T75672]  __se_sys_reboot+0xfd/0x210
[ 2710.937497][T75672]  ? check_object+0x1ee/0x390
[ 2710.937500][T75672]  ? lock_release+0xef/0x350
[ 2710.937501][T75672]  ? kmem_cache_free+0x1b5/0x520
[ 2710.937506][T75672]  ? _raw_spin_unlock_irqrestore+0x35/0x50
[ 2710.937508][T75672]  ? kmem_cache_free+0x1b5/0x520
[ 2710.937509][T75672]  ? __x64_sys_close+0x3d/0x80
[ 2710.937510][T75672]  ? entry_SYSCALL_64_after_hwframe+0x77/0x7f
[ 2710.937511][T75672]  ? entry_SYSCALL_64_after_hwframe+0x77/0x7f
[ 2710.937512][T75672]  do_syscall_64+0x15f/0x710
[ 2710.937514][T75672]  entry_SYSCALL_64_after_hwframe+0x77/0x7f
[ 2710.937515][T75672] RIP: 0033:0x7fa57e4f2513
[ 2710.937519][T75672] Code: cc cc cc cc cc cc cc cc cc cc cc cc cc cc cc cc cc 
cc cc cc cc cc cc 89 fa b8 a9 00 00 00 bf ad de e1 fe be 69 19 12 28 0f 05 <48> 
3d 01 f0 ff ff 73 01 c3 f7 d8 48 8b 0d db 2c 07 00 64 89 01 48
[ 2710.937520][T75672] RSP: 002b:00007ffd16943748 EFLAGS: 00000246 ORIG_RAX: 
00000000000000a9
[ 2710.937523][T75672] RAX: ffffffffffffffda RBX: 0000000000000001 RCX: 
00007fa57e4f2513
[ 2710.937524][T75672] RDX: 0000000045584543 RSI: 0000000028121969 RDI: 
00000000fee1dead
[ 2710.937526][T75672] RBP: 00007ffd16943a60 R08: 0000000000000009 R09: 
00007fa57e5672e0
[ 2710.937527][T75672] R10: 0000000000000008 R11: 0000000000000246 R12: 
00007ffd169438e0
[ 2710.937528][T75672] R13: 0000000000000000 R14: 00007ffd169438e0 R15: 
0000000000000001
[ 2710.937532][T75672]  </TASK>

> +     pci_restore_state(pdev);
> +     return 0;
>  }

Re: [PATCH v4 02/16] vfio/pci: Preserve vfio-pci device files across Live Update

Reply via email to