Re: Xorg indefinitely hangs in kernelspace
On Tue, 6 Aug 2019 21:00:10 +0300 From: Jaak Ristioja > Hello! > > I'm writing to report a crash in the QXL / DRM code in the Linux kernel. > I originally filed the issue on LaunchPad and more details can be found > there, although I doubt whether these details are useful. > > https://bugs.launchpad.net/ubuntu/+source/linux/+bug/1813620 > > I first experienced these issues with: > > * Ubuntu 18.04 (probably kernel 4.15.something) > * Ubuntu 18.10 (kernel 4.18.0-13) > * Ubuntu 19.04 (kernel 5.0.0-13-generic) > * Ubuntu 19.04 (mainline kernel 5.1-rc7) > * Ubuntu 19.04 (mainline kernel 5.2.0-050200rc1-generic) > > Here is the crash output from dmesg: > > [354073.713350] INFO: task Xorg:920 blocked for more than 120 seconds. > [354073.717755] Not tainted 5.2.0-050200rc1-generic #201905191930 > [354073.722277] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" > disables this message. > [354073.738332] XorgD0 920854 0x00404004 > [354073.738334] Call Trace: > [354073.738340] __schedule+0x2ba/0x650 > [354073.738342] schedule+0x2d/0x90 > [354073.738343] schedule_preempt_disabled+0xe/0x10 > [354073.738345] __ww_mutex_lock.isra.11+0x3e0/0x750 > [354073.738346] __ww_mutex_lock_slowpath+0x16/0x20 > [354073.738347] ww_mutex_lock+0x34/0x50 > [354073.738352] ttm_eu_reserve_buffers+0x1f9/0x2e0 [ttm] > [354073.738356] qxl_release_reserve_list+0x67/0x150 [qxl] > [354073.738358] ? qxl_bo_pin+0xaa/0x190 [qxl] > [354073.738359] qxl_cursor_atomic_update+0x1b0/0x2e0 [qxl] > [354073.738367] drm_atomic_helper_commit_planes+0xb9/0x220 [drm_kms_helper] > [354073.738371] drm_atomic_helper_commit_tail+0x2b/0x70 [drm_kms_helper] > [354073.738374] commit_tail+0x67/0x70 [drm_kms_helper] > [354073.738378] drm_atomic_helper_commit+0x113/0x120 [drm_kms_helper] > [354073.738390] drm_atomic_commit+0x4a/0x50 [drm] > [354073.738394] drm_atomic_helper_update_plane+0xe9/0x100 [drm_kms_helper] > [354073.738402] __setplane_atomic+0xd3/0x120 [drm] > [354073.738410] drm_mode_cursor_universal+0x142/0x270 [drm] > [354073.738418] drm_mode_cursor_common+0xcb/0x220 [drm] > [354073.738425] ? drm_mode_cursor_ioctl+0x60/0x60 [drm] > [354073.738432] drm_mode_cursor2_ioctl+0xe/0x10 [drm] > [354073.738438] drm_ioctl_kernel+0xb0/0x100 [drm] > [354073.738440] ? ___sys_recvmsg+0x16c/0x200 > [354073.738445] drm_ioctl+0x233/0x410 [drm] > [354073.738452] ? drm_mode_cursor_ioctl+0x60/0x60 [drm] > [354073.738454] ? timerqueue_add+0x57/0x90 > [354073.738456] ? enqueue_hrtimer+0x3c/0x90 > [354073.738458] do_vfs_ioctl+0xa9/0x640 > [354073.738459] ? fput+0x13/0x20 > [354073.738461] ? __sys_recvmsg+0x88/0xa0 > [354073.738462] ksys_ioctl+0x67/0x90 > [354073.738463] __x64_sys_ioctl+0x1a/0x20 > [354073.738465] do_syscall_64+0x5a/0x140 > [354073.738467] entry_SYSCALL_64_after_hwframe+0x44/0xa9 > [354073.738468] RIP: 0033:0x7ffad14d3417 > [354073.738472] Code: Bad RIP value. > [354073.738472] RSP: 002b:7ffdd5679978 EFLAGS: 3246 ORIG_RAX: > 0010 > [354073.738473] RAX: ffda RBX: 56428a474610 RCX: > 7ffad14d3417 > [354073.738474] RDX: 7ffdd56799b0 RSI: c02464bb RDI: > 000e > [354073.738474] RBP: 7ffdd56799b0 R08: 0040 R09: > 0010 > [354073.738475] R10: 003f R11: 3246 R12: > c02464bb > [354073.738475] R13: 000e R14: R15: > 56428a4721d0 > [354073.738511] INFO: task kworker/1:0:27625 blocked for more than 120 > seconds. > [354073.745154] Not tainted 5.2.0-050200rc1-generic #201905191930 > [354073.751900] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" > disables this message. > [354073.762197] kworker/1:0 D0 27625 2 0x80004000 > [354073.762205] Workqueue: events qxl_client_monitors_config_work_func [qxl] > [354073.762206] Call Trace: > [354073.762211] __schedule+0x2ba/0x650 > [354073.762214] schedule+0x2d/0x90 > [354073.762215] schedule_preempt_disabled+0xe/0x10 > [354073.762216] __ww_mutex_lock.isra.11+0x3e0/0x750 > [354073.762217] ? __switch_to_asm+0x34/0x70 > [354073.762218] ? __switch_to_asm+0x40/0x70 > [354073.762219] ? __switch_to_asm+0x40/0x70 > [354073.762220] __ww_mutex_lock_slowpath+0x16/0x20 > [354073.762221] ww_mutex_lock+0x34/0x50 > [354073.762235] drm_modeset_lock+0x35/0xb0 [drm] > [354073.762243] drm_modeset_lock_all_ctx+0x5d/0xe0 [drm] > [354073.762251] drm_modeset_lock_all+0x5e/0xb0 [drm] > [354073.762252] qxl_display_read_client_monitors_config+0x1e1/0x370 [qxl] > [354073.762254] qxl_client_monitors_config_work_func+0x15/0x20 [qxl] > [354073.762256] process_one_work+0x20f/0x410 > [354073.762257] worker_thread+0x34/0x400 > [354073.762259] kthread+0x120/0x140 > [354073.762260] ? process_one_work+0x410/0x410 > [354073.762261] ? __kthread_parkme+0x70/0x70 > [354073.762262] ret_from_fork+0x35/0x40 > --- a/drivers/gpu/drm/ttm/ttm_execbuf_util.c +++ b/drivers/gpu/drm/ttm/ttm_execbuf_util.c @@ -97,8 +97
Re: [PATCH v2] drm/virtio: Use vmalloc for command buffer allocations.
> +/* How many bytes left in this page. */ > +static unsigned int rest_of_page(void *data) > +{ > + return PAGE_SIZE - offset_in_page(data); > +} Not needed. > +/* Create sg_table from a vmalloc'd buffer. */ > +static struct sg_table *vmalloc_to_sgt(char *data, uint32_t size, int > *sg_ents) > +{ > + int nents, ret, s, i; > + struct sg_table *sgt; > + struct scatterlist *sg; > + struct page *pg; > + > + *sg_ents = 0; > + > + sgt = kmalloc(sizeof(*sgt), GFP_KERNEL); > + if (!sgt) > + return NULL; > + > + nents = DIV_ROUND_UP(size, PAGE_SIZE) + 1; Why +1? > + ret = sg_alloc_table(sgt, nents, GFP_KERNEL); > + if (ret) { > + kfree(sgt); > + return NULL; > + } > + > + for_each_sg(sgt->sgl, sg, nents, i) { > + pg = vmalloc_to_page(data); > + if (!pg) { > + sg_free_table(sgt); > + kfree(sgt); > + return NULL; > + } > + > + s = rest_of_page(data); > + if (s > size) > + s = size; vmalloc memory is page aligned, so: s = min(PAGE_SIZE, size); > + sg_set_page(sg, pg, s, offset_in_page(data)); Offset is always zero. > + > + size -= s; > + data += s; > + *sg_ents += 1; sg_ents isn't used anywhere. > + > + if (size) { > + sg_unmark_end(sg); > + } else { > + sg_mark_end(sg); > + break; > + } That looks a bit strange. I guess you need only one of the two because the other is the default? > static int virtio_gpu_queue_fenced_ctrl_buffer(struct virtio_gpu_device > *vgdev, > struct virtio_gpu_vbuffer *vbuf, > struct virtio_gpu_ctrl_hdr *hdr, > struct virtio_gpu_fence *fence) > { > struct virtqueue *vq = vgdev->ctrlq.vq; > + struct scatterlist *vout = NULL, sg; > + struct sg_table *sgt = NULL; > int rc; > + int outcnt = 0; > + > + if (vbuf->data_size) { > + if (is_vmalloc_addr(vbuf->data_buf)) { > + sgt = vmalloc_to_sgt(vbuf->data_buf, vbuf->data_size, > + &outcnt); > + if (!sgt) > + return -ENOMEM; > + vout = sgt->sgl; > + } else { > + sg_init_one(&sg, vbuf->data_buf, vbuf->data_size); > + vout = &sg; > + outcnt = 1; outcnt must be set in both cases. > +static int virtio_gpu_queue_ctrl_buffer(struct virtio_gpu_device *vgdev, > + struct virtio_gpu_vbuffer *vbuf) > +{ > + return virtio_gpu_queue_fenced_ctrl_buffer(vgdev, vbuf, NULL, NULL); > +} Changing virtio_gpu_queue_ctrl_buffer to call virtio_gpu_queue_fenced_ctrl_buffer should be done in a separate patch. cheers, Gerd ___ Virtualization mailing list Virtualization@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/virtualization
Re: [RFC PATCH] virtio_ring: Use DMA API if guest memory is encrypted
On Mon, Aug 12, 2019 at 02:15:32PM +0200, Christoph Hellwig wrote: > On Sun, Aug 11, 2019 at 04:55:27AM -0400, Michael S. Tsirkin wrote: > > On Sun, Aug 11, 2019 at 07:56:07AM +0200, Christoph Hellwig wrote: > > > So we need a flag on the virtio device, exposed by the > > > hypervisor (or hardware for hw virtio devices) that says: hey, I'm real, > > > don't take a shortcut. > > > > The point here is that it's actually still not real. So we would still > > use a physical address. However Linux decides that it wants extra > > security by moving all data through the bounce buffer. The distinction > > made is that one can actually give device a physical address of the > > bounce buffer. > > Sure. The problem is just that you keep piling hacks on top of hacks. > We need the per-device flag anyway to properly support hardware virtio > device in all circumstances. Instead of coming up with another ad-hoc > hack to force DMA uses implement that one proper bit and reuse it here. The flag that you mention literally means "I am a real device" so for example, you can use VFIO with it. And this device isn't a real one, and you can't use VFIO with it, even though it's part of a power system which always has an IOMMU. Or here's another way to put it: we have a broken device that can only access physical addresses, not DMA addresses. But to enable SEV Linux requires DMA API. So we can still make it work if DMA address happens to be a physical address (not necessarily of the same page). This is where dma_addr_is_a_phys_addr() is coming from: it tells us this weird configuration can still work. What are we going to do for SEV if dma_addr_is_a_phys_addr does not apply? Fail probe I guess. So the proposal is really to make things safe and to this end, to add this in probe: if (sev_active() && !dma_addr_is_a_phys_addr(dev) && !virtio_has_feature(vdev, VIRTIO_F_IOMMU_PLATFORM)) return -EINVAL; the point being to prevent loading driver where it would corrupt guest memory. Put this way, any objections to adding dma_addr_is_a_phys_addr to the DMA API? -- MST ___ Virtualization mailing list Virtualization@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/virtualization
Re: [PATCH 2/2] vhost: re-introducing metadata acceleration through kernel virtual address
On Thu, 5 Sep 2019 20:27:36 +0800 From: Jason Wang > > +static void vhost_set_map_dirty(struct vhost_virtqueue *vq, > + struct vhost_map *map, int index) > +{ > + struct vhost_uaddr *uaddr = &vq->uaddrs[index]; > + int i; > + > + if (uaddr->write) { > + for (i = 0; i < map->npages; i++) > + set_page_dirty(map->pages[i]); > + } Not sure need to set page dirty under page lock. ___ Virtualization mailing list Virtualization@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/virtualization
[PATCH v2] drm/virtio: Use vmalloc for command buffer allocations.
Userspace requested command buffer allocations could be too large to make as a contiguous allocation. Use vmalloc if necessary to satisfy those allocations. Signed-off-by: David Riley --- drivers/gpu/drm/virtio/virtgpu_ioctl.c | 4 +- drivers/gpu/drm/virtio/virtgpu_vq.c| 114 - 2 files changed, 96 insertions(+), 22 deletions(-) diff --git a/drivers/gpu/drm/virtio/virtgpu_ioctl.c b/drivers/gpu/drm/virtio/virtgpu_ioctl.c index ac60be9b5c19..a8732a8af766 100644 --- a/drivers/gpu/drm/virtio/virtgpu_ioctl.c +++ b/drivers/gpu/drm/virtio/virtgpu_ioctl.c @@ -195,7 +195,7 @@ static int virtio_gpu_execbuffer_ioctl(struct drm_device *dev, void *data, if (ret) goto out_free; - buf = memdup_user(u64_to_user_ptr(exbuf->command), exbuf->size); + buf = vmemdup_user(u64_to_user_ptr(exbuf->command), exbuf->size); if (IS_ERR(buf)) { ret = PTR_ERR(buf); goto out_unresv; @@ -230,7 +230,7 @@ static int virtio_gpu_execbuffer_ioctl(struct drm_device *dev, void *data, return 0; out_memdup: - kfree(buf); + kvfree(buf); out_unresv: ttm_eu_backoff_reservation(&ticket, &validate_list); out_free: diff --git a/drivers/gpu/drm/virtio/virtgpu_vq.c b/drivers/gpu/drm/virtio/virtgpu_vq.c index 981ee16e3ee9..3ec89ae8478c 100644 --- a/drivers/gpu/drm/virtio/virtgpu_vq.c +++ b/drivers/gpu/drm/virtio/virtgpu_vq.c @@ -154,7 +154,7 @@ static void free_vbuf(struct virtio_gpu_device *vgdev, { if (vbuf->resp_size > MAX_INLINE_RESP_SIZE) kfree(vbuf->resp_buf); - kfree(vbuf->data_buf); + kvfree(vbuf->data_buf); kmem_cache_free(vgdev->vbufs, vbuf); } @@ -251,13 +251,70 @@ void virtio_gpu_dequeue_cursor_func(struct work_struct *work) wake_up(&vgdev->cursorq.ack_queue); } +/* How many bytes left in this page. */ +static unsigned int rest_of_page(void *data) +{ + return PAGE_SIZE - offset_in_page(data); +} + +/* Create sg_table from a vmalloc'd buffer. */ +static struct sg_table *vmalloc_to_sgt(char *data, uint32_t size, int *sg_ents) +{ + int nents, ret, s, i; + struct sg_table *sgt; + struct scatterlist *sg; + struct page *pg; + + *sg_ents = 0; + + sgt = kmalloc(sizeof(*sgt), GFP_KERNEL); + if (!sgt) + return NULL; + + nents = DIV_ROUND_UP(size, PAGE_SIZE) + 1; + ret = sg_alloc_table(sgt, nents, GFP_KERNEL); + if (ret) { + kfree(sgt); + return NULL; + } + + for_each_sg(sgt->sgl, sg, nents, i) { + pg = vmalloc_to_page(data); + if (!pg) { + sg_free_table(sgt); + kfree(sgt); + return NULL; + } + + s = rest_of_page(data); + if (s > size) + s = size; + + sg_set_page(sg, pg, s, offset_in_page(data)); + + size -= s; + data += s; + *sg_ents += 1; + + if (size) { + sg_unmark_end(sg); + } else { + sg_mark_end(sg); + break; + } + } + + return sgt; +} + static int virtio_gpu_queue_ctrl_buffer_locked(struct virtio_gpu_device *vgdev, - struct virtio_gpu_vbuffer *vbuf) + struct virtio_gpu_vbuffer *vbuf, + struct scatterlist *vout) __releases(&vgdev->ctrlq.qlock) __acquires(&vgdev->ctrlq.qlock) { struct virtqueue *vq = vgdev->ctrlq.vq; - struct scatterlist *sgs[3], vcmd, vout, vresp; + struct scatterlist *sgs[3], vcmd, vresp; int outcnt = 0, incnt = 0; int ret; @@ -268,9 +325,8 @@ static int virtio_gpu_queue_ctrl_buffer_locked(struct virtio_gpu_device *vgdev, sgs[outcnt + incnt] = &vcmd; outcnt++; - if (vbuf->data_size) { - sg_init_one(&vout, vbuf->data_buf, vbuf->data_size); - sgs[outcnt + incnt] = &vout; + if (vout) { + sgs[outcnt + incnt] = vout; outcnt++; } @@ -299,24 +355,30 @@ static int virtio_gpu_queue_ctrl_buffer_locked(struct virtio_gpu_device *vgdev, return ret; } -static int virtio_gpu_queue_ctrl_buffer(struct virtio_gpu_device *vgdev, - struct virtio_gpu_vbuffer *vbuf) -{ - int rc; - - spin_lock(&vgdev->ctrlq.qlock); - rc = virtio_gpu_queue_ctrl_buffer_locked(vgdev, vbuf); - spin_unlock(&vgdev->ctrlq.qlock); - return rc; -} - static int virtio_gpu_queue_fenced_ctrl_buffer(struct virtio_gpu_device *vgdev, struct virtio_gpu_vbuffer *vbuf,
[PATCH 16/18] virtiofs: Use virtio_fs_mutex for races w.r.t ->remove and mount path
It is possible that a mount is in progress and device is being removed at the same time. Use virtio_fs_mutex to avoid races. This also takes care of bunch of races and removes some TODO items. Signed-off-by: Vivek Goyal --- fs/fuse/virtio_fs.c | 32 ++-- 1 file changed, 22 insertions(+), 10 deletions(-) diff --git a/fs/fuse/virtio_fs.c b/fs/fuse/virtio_fs.c index 29ec2f5bbbe2..c483482185b6 100644 --- a/fs/fuse/virtio_fs.c +++ b/fs/fuse/virtio_fs.c @@ -13,7 +13,9 @@ #include #include "fuse_i.h" -/* List of virtio-fs device instances and a lock for the list */ +/* List of virtio-fs device instances and a lock for the list. Also provides + * mutual exclusion in device removal and mounting path + */ static DEFINE_MUTEX(virtio_fs_mutex); static LIST_HEAD(virtio_fs_instances); @@ -72,17 +74,19 @@ static void release_virtiofs_obj(struct kref *ref) kfree(vfs); } +/* Make sure virtiofs_mutex is held */ static void virtiofs_put(struct virtio_fs *fs) { - mutex_lock(&virtio_fs_mutex); kref_put(&fs->refcount, release_virtiofs_obj); - mutex_unlock(&virtio_fs_mutex); } static void virtio_fs_put(struct fuse_iqueue *fiq) { struct virtio_fs *vfs = fiq->priv; + + mutex_lock(&virtio_fs_mutex); virtiofs_put(vfs); + mutex_unlock(&virtio_fs_mutex); } static void virtio_fs_drain_queue(struct virtio_fs_vq *fsvq) @@ -596,9 +600,8 @@ static void virtio_fs_remove(struct virtio_device *vdev) struct virtio_fs *fs = vdev->priv; mutex_lock(&virtio_fs_mutex); + /* This device is going away. No one should get new reference */ list_del_init(&fs->list); - mutex_unlock(&virtio_fs_mutex); - virtio_fs_stop_all_queues(fs); virtio_fs_drain_all_queues(fs); vdev->config->reset(vdev); @@ -607,6 +610,7 @@ static void virtio_fs_remove(struct virtio_device *vdev) vdev->priv = NULL; /* Put device reference on virtio_fs object */ virtiofs_put(fs); + mutex_unlock(&virtio_fs_mutex); } #ifdef CONFIG_PM_SLEEP @@ -978,10 +982,15 @@ static int virtio_fs_fill_super(struct super_block *sb) .no_force_umount = true, }; - /* TODO lock */ - if (fs->vqs[VQ_REQUEST].fud) { - pr_err("virtio-fs: device already in use\n"); - err = -EBUSY; + mutex_lock(&virtio_fs_mutex); + + /* After holding mutex, make sure virtiofs device is still there. +* Though we are holding a refernce to it, drive ->remove might +* still have cleaned up virtual queues. In that case bail out. +*/ + err = -EINVAL; + if (list_empty(&fs->list)) { + pr_info("virtio-fs: tag <%s> not found\n", fs->tag); goto err; } @@ -1007,7 +1016,6 @@ static int virtio_fs_fill_super(struct super_block *sb) fc = fs->vqs[VQ_REQUEST].fud->fc; - /* TODO take fuse_mutex around this loop? */ for (i = 0; i < fs->nvqs; i++) { struct virtio_fs_vq *fsvq = &fs->vqs[i]; @@ -1020,6 +1028,7 @@ static int virtio_fs_fill_super(struct super_block *sb) /* Previous unmount will stop all queues. Start these again */ virtio_fs_start_all_queues(fs); fuse_send_init(fc, init_req); + mutex_unlock(&virtio_fs_mutex); return 0; err_free_init_req: @@ -1027,6 +1036,7 @@ static int virtio_fs_fill_super(struct super_block *sb) err_free_fuse_devs: virtio_fs_free_devs(fs); err: + mutex_unlock(&virtio_fs_mutex); return err; } @@ -1100,7 +1110,9 @@ static int virtio_fs_get_tree(struct fs_context *fsc) fc = kzalloc(sizeof(struct fuse_conn), GFP_KERNEL); if (!fc) { + mutex_lock(&virtio_fs_mutex); virtiofs_put(fs); + mutex_unlock(&virtio_fs_mutex); return -ENOMEM; } -- 2.20.1 ___ Virtualization mailing list Virtualization@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/virtualization
[PATCH 05/18] Maintain count of in flight requests for VQ_REQUEST queue
As of now we maintain this count only for VQ_HIPRIO. Maintain it for VQ_REQUEST as well so that later it can be used to drain VQ_REQUEST queue. Signed-off-by: Vivek Goyal --- fs/fuse/virtio_fs.c | 4 1 file changed, 4 insertions(+) diff --git a/fs/fuse/virtio_fs.c b/fs/fuse/virtio_fs.c index c46dd4d284d6..5df97dfee37d 100644 --- a/fs/fuse/virtio_fs.c +++ b/fs/fuse/virtio_fs.c @@ -360,6 +360,9 @@ static void virtio_fs_requests_done_work(struct work_struct *work) spin_unlock(&fpq->lock); fuse_request_end(fc, req); + spin_lock(&fsvq->lock); + fsvq->in_flight--; + spin_unlock(&fsvq->lock); } } @@ -769,6 +772,7 @@ static int virtio_fs_enqueue_req(struct virtio_fs_vq *fsvq, goto out; } + fsvq->in_flight++; notify = virtqueue_kick_prepare(vq); spin_unlock(&fsvq->lock); -- 2.20.1 ___ Virtualization mailing list Virtualization@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/virtualization
[PATCH 18/18] virtiofs: Remove TODO item from virtio_fs_free_devs()
virtio_fs_free_devs() is now called from ->kill_sb(). By this time all device queues have been quiesced. I am assuming that while ->kill_sb() is in progress, another mount instance will wait for it to finish (sb->s_umount mutex provides mutual exclusion). W.r.t ->remove path, we should be fine as we are not touching vdev or virtqueues. And we have reference on virtio_fs object, so we know rest of the data structures are valid. So I can't see the need of any additional locking yet. Signed-off-by: Vivek Goyal --- fs/fuse/virtio_fs.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/fs/fuse/virtio_fs.c b/fs/fuse/virtio_fs.c index eadaea6eb8e2..61aa3eba7b22 100644 --- a/fs/fuse/virtio_fs.c +++ b/fs/fuse/virtio_fs.c @@ -200,8 +200,6 @@ static void virtio_fs_free_devs(struct virtio_fs *fs) { unsigned int i; - /* TODO lock */ - for (i = 0; i < fs->nvqs; i++) { struct virtio_fs_vq *fsvq = &fs->vqs[i]; -- 2.20.1 ___ Virtualization mailing list Virtualization@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/virtualization
[PATCH 13/18] virtiofs: Do not access virtqueue in request submission path
In request submission path it is possible that virtqueue is already gone due to driver->remove(). So do not access it in dev_dbg(). Use pr_debug() instead. If virtuqueue is gone, this will result in NULL pointer deference. Signed-off-by: Vivek Goyal --- fs/fuse/virtio_fs.c | 8 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/fs/fuse/virtio_fs.c b/fs/fuse/virtio_fs.c index 40259368a6bd..01bbf2c0e144 100644 --- a/fs/fuse/virtio_fs.c +++ b/fs/fuse/virtio_fs.c @@ -888,10 +888,10 @@ __releases(fiq->waitq.lock) fs = fiq->priv; fc = fs->vqs[queue_id].fud->fc; - dev_dbg(&fs->vqs[queue_id].vq->vdev->dev, - "%s: opcode %u unique %#llx nodeid %#llx in.len %u out.len %u\n", - __func__, req->in.h.opcode, req->in.h.unique, req->in.h.nodeid, - req->in.h.len, fuse_len_args(req->out.numargs, req->out.args)); + pr_debug("%s: opcode %u unique %#llx nodeid %#llx in.len %u out.len %u" +"\n", __func__, req->in.h.opcode, req->in.h.unique, +req->in.h.nodeid, req->in.h.len, +fuse_len_args(req->out.numargs, req->out.args)); fpq = &fs->vqs[queue_id].fud->pq; spin_lock(&fpq->lock); -- 2.20.1 ___ Virtualization mailing list Virtualization@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/virtualization
[PATCH 09/18] virtiofs: Add an helper to start all the queues
This just marks are the queues are connected and ready to accept the request. Signed-off-by: Vivek Goyal --- fs/fuse/virtio_fs.c | 19 --- 1 file changed, 16 insertions(+), 3 deletions(-) diff --git a/fs/fuse/virtio_fs.c b/fs/fuse/virtio_fs.c index d5730a50b303..f2936daca39c 100644 --- a/fs/fuse/virtio_fs.c +++ b/fs/fuse/virtio_fs.c @@ -112,6 +112,19 @@ static void virtio_fs_drain_all_queues(struct virtio_fs *fs) } } +static void virtio_fs_start_all_queues(struct virtio_fs *fs) +{ + struct virtio_fs_vq *fsvq; + int i; + + for (i = 0; i < fs->nvqs; i++) { + fsvq = &fs->vqs[i]; + spin_lock(&fsvq->lock); + fsvq->connected = true; + spin_unlock(&fsvq->lock); + } +} + /* Add a new instance to the list or return -EEXIST if tag name exists*/ static int virtio_fs_add_instance(struct virtio_fs *fs) { @@ -483,10 +496,10 @@ static int virtio_fs_setup_vqs(struct virtio_device *vdev, if (ret < 0) goto out; - for (i = 0; i < fs->nvqs; i++) { + for (i = 0; i < fs->nvqs; i++) fs->vqs[i].vq = vqs[i]; - fs->vqs[i].connected = true; - } + + virtio_fs_start_all_queues(fs); out: kfree(names); kfree(callbacks); -- 2.20.1 ___ Virtualization mailing list Virtualization@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/virtualization
[PATCH 10/18] virtiofs: Do not use device managed mem for virtio_fs and virtio_fs_vq
These data structures should go away when virtio_fs object is going away. When deivce is going away, we need to just make sure virtqueues can go away and after that none of the code accesses vq and all the requests get error. So allocate memory for virtio_fs and virtio_fs_vq normally and free it at right time. This patch still frees up memory during device remove time. A later patch will make virtio_fs object reference counted and this memory will be freed when last reference to object is dropped. Signed-off-by: Vivek Goyal --- fs/fuse/virtio_fs.c | 13 - 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/fs/fuse/virtio_fs.c b/fs/fuse/virtio_fs.c index f2936daca39c..1ea0f889e804 100644 --- a/fs/fuse/virtio_fs.c +++ b/fs/fuse/virtio_fs.c @@ -446,7 +446,7 @@ static int virtio_fs_setup_vqs(struct virtio_device *vdev, vq_callback_t **callbacks; const char **names; unsigned int i; - int ret; + int ret = 0; virtio_cread(vdev, struct virtio_fs_config, num_queues, &fs->num_queues); @@ -454,9 +454,7 @@ static int virtio_fs_setup_vqs(struct virtio_device *vdev, return -EINVAL; fs->nvqs = 1 + fs->num_queues; - - fs->vqs = devm_kcalloc(&vdev->dev, fs->nvqs, - sizeof(fs->vqs[VQ_HIPRIO]), GFP_KERNEL); + fs->vqs = kcalloc(fs->nvqs, sizeof(fs->vqs[VQ_HIPRIO]), GFP_KERNEL); if (!fs->vqs) return -ENOMEM; @@ -504,6 +502,8 @@ static int virtio_fs_setup_vqs(struct virtio_device *vdev, kfree(names); kfree(callbacks); kfree(vqs); + if (ret) + kfree(fs->vqs); return ret; } @@ -519,7 +519,7 @@ static int virtio_fs_probe(struct virtio_device *vdev) struct virtio_fs *fs; int ret; - fs = devm_kzalloc(&vdev->dev, sizeof(*fs), GFP_KERNEL); + fs = kzalloc(sizeof(*fs), GFP_KERNEL); if (!fs) return -ENOMEM; vdev->priv = fs; @@ -552,6 +552,7 @@ static int virtio_fs_probe(struct virtio_device *vdev) out: vdev->priv = NULL; + kfree(fs); return ret; } @@ -582,6 +583,8 @@ static void virtio_fs_remove(struct virtio_device *vdev) mutex_unlock(&virtio_fs_mutex); vdev->priv = NULL; + kfree(fs->vqs); + kfree(fs); } #ifdef CONFIG_PM_SLEEP -- 2.20.1 ___ Virtualization mailing list Virtualization@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/virtualization
[PATCH 17/18] virtiofs: Remove TODO to quiesce/end_requests
We now stop queues and drain all the pending requests from all virtqueues. So this is not a TODO anymore. Got rid of incrementing fc->dev_count as well. It did not seem meaningful for virtio_fs. Signed-off-by: Vivek Goyal --- fs/fuse/virtio_fs.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/fs/fuse/virtio_fs.c b/fs/fuse/virtio_fs.c index c483482185b6..eadaea6eb8e2 100644 --- a/fs/fuse/virtio_fs.c +++ b/fs/fuse/virtio_fs.c @@ -208,7 +208,6 @@ static void virtio_fs_free_devs(struct virtio_fs *fs) if (!fsvq->fud) continue; - /* TODO need to quiesce/end_requests/decrement dev_count */ fuse_dev_free(fsvq->fud); fsvq->fud = NULL; } @@ -1022,7 +1021,6 @@ static int virtio_fs_fill_super(struct super_block *sb) if (i == VQ_REQUEST) continue; /* already initialized */ fuse_dev_install(fsvq->fud, fc); - atomic_inc(&fc->dev_count); } /* Previous unmount will stop all queues. Start these again */ -- 2.20.1 ___ Virtualization mailing list Virtualization@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/virtualization
[PATCH 11/18] virtiofs: stop and drain queues after sending DESTROY
During virtio_kill_sb() we first stop forget queue and drain it and then call fuse_kill_sb_anon(). This will result in sending DESTROY request to fuse server. Once finished, stop all the queues and drain one more time just to be sure and then free up the devices. Given drain queues will call flush_work() on various workers, remove this logic from virtio_free_devs(). Signed-off-by: Vivek Goyal --- fs/fuse/virtio_fs.c | 11 --- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/fs/fuse/virtio_fs.c b/fs/fuse/virtio_fs.c index 1ea0f889e804..a76bd5a04521 100644 --- a/fs/fuse/virtio_fs.c +++ b/fs/fuse/virtio_fs.c @@ -180,9 +180,6 @@ static void virtio_fs_free_devs(struct virtio_fs *fs) if (!fsvq->fud) continue; - flush_work(&fsvq->done_work); - flush_delayed_work(&fsvq->dispatch_work); - /* TODO need to quiesce/end_requests/decrement dev_count */ fuse_dev_free(fsvq->fud); fsvq->fud = NULL; @@ -994,6 +991,8 @@ static int virtio_fs_fill_super(struct super_block *sb) atomic_inc(&fc->dev_count); } + /* Previous unmount will stop all queues. Start these again */ + virtio_fs_start_all_queues(fs); fuse_send_init(fc, init_req); return 0; @@ -1026,6 +1025,12 @@ static void virtio_kill_sb(struct super_block *sb) virtio_fs_drain_all_queues(vfs); fuse_kill_sb_anon(sb); + + /* fuse_kill_sb_anon() must have sent destroy. Stop all queues +* and drain one more time and free fuse devices. +*/ + virtio_fs_stop_all_queues(vfs); + virtio_fs_drain_all_queues(vfs); virtio_fs_free_devs(vfs); } -- 2.20.1 ___ Virtualization mailing list Virtualization@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/virtualization
[PATCH 15/18] virtiofs: Make virtio_fs object refcounted
This object is used both by fuse_connection as well virt device. So make this object reference counted and that makes it easy to define life cycle of the object. Now deivce can be removed while filesystem is still mounted. This will cleanup all the virtqueues but virtio_fs object will still be around and will be cleaned when filesystem is unmounted and sb/fc drops its reference. Removing a device also stops all virt queues and any new reuqest gets error -ENOTCONN. All existing in flight requests are drained before ->remove returns. Signed-off-by: Vivek Goyal --- fs/fuse/virtio_fs.c | 52 + 1 file changed, 43 insertions(+), 9 deletions(-) diff --git a/fs/fuse/virtio_fs.c b/fs/fuse/virtio_fs.c index 01bbf2c0e144..29ec2f5bbbe2 100644 --- a/fs/fuse/virtio_fs.c +++ b/fs/fuse/virtio_fs.c @@ -37,6 +37,7 @@ struct virtio_fs_vq { /* A virtio-fs device instance */ struct virtio_fs { + struct kref refcount; struct list_head list;/* on virtio_fs_instances */ char *tag; struct virtio_fs_vq *vqs; @@ -63,6 +64,27 @@ static inline struct fuse_pqueue *vq_to_fpq(struct virtqueue *vq) return &vq_to_fsvq(vq)->fud->pq; } +static void release_virtiofs_obj(struct kref *ref) +{ + struct virtio_fs *vfs = container_of(ref, struct virtio_fs, refcount); + + kfree(vfs->vqs); + kfree(vfs); +} + +static void virtiofs_put(struct virtio_fs *fs) +{ + mutex_lock(&virtio_fs_mutex); + kref_put(&fs->refcount, release_virtiofs_obj); + mutex_unlock(&virtio_fs_mutex); +} + +static void virtio_fs_put(struct fuse_iqueue *fiq) +{ + struct virtio_fs *vfs = fiq->priv; + virtiofs_put(vfs); +} + static void virtio_fs_drain_queue(struct virtio_fs_vq *fsvq) { WARN_ON(fsvq->in_flight < 0); @@ -156,8 +178,10 @@ static struct virtio_fs *virtio_fs_find_instance(const char *tag) mutex_lock(&virtio_fs_mutex); list_for_each_entry(fs, &virtio_fs_instances, list) { - if (strcmp(fs->tag, tag) == 0) + if (strcmp(fs->tag, tag) == 0) { + kref_get(&fs->refcount); goto found; + } } fs = NULL; /* not found */ @@ -519,6 +543,7 @@ static int virtio_fs_probe(struct virtio_device *vdev) fs = kzalloc(sizeof(*fs), GFP_KERNEL); if (!fs) return -ENOMEM; + kref_init(&fs->refcount); vdev->priv = fs; ret = virtio_fs_read_tag(vdev, fs); @@ -570,18 +595,18 @@ static void virtio_fs_remove(struct virtio_device *vdev) { struct virtio_fs *fs = vdev->priv; + mutex_lock(&virtio_fs_mutex); + list_del_init(&fs->list); + mutex_unlock(&virtio_fs_mutex); + virtio_fs_stop_all_queues(fs); virtio_fs_drain_all_queues(fs); vdev->config->reset(vdev); virtio_fs_cleanup_vqs(vdev, fs); - mutex_lock(&virtio_fs_mutex); - list_del(&fs->list); - mutex_unlock(&virtio_fs_mutex); - vdev->priv = NULL; - kfree(fs->vqs); - kfree(fs); + /* Put device reference on virtio_fs object */ + virtiofs_put(fs); } #ifdef CONFIG_PM_SLEEP @@ -932,6 +957,7 @@ const static struct fuse_iqueue_ops virtio_fs_fiq_ops = { .wake_forget_and_unlock = virtio_fs_wake_forget_and_unlock, .wake_interrupt_and_unlock = virtio_fs_wake_interrupt_and_unlock, .wake_pending_and_unlock= virtio_fs_wake_pending_and_unlock, + .put= virtio_fs_put, }; static int virtio_fs_fill_super(struct super_block *sb) @@ -1026,7 +1052,9 @@ static void virtio_kill_sb(struct super_block *sb) fuse_kill_sb_anon(sb); /* fuse_kill_sb_anon() must have sent destroy. Stop all queues -* and drain one more time and free fuse devices. +* and drain one more time and free fuse devices. Freeing fuse +* devices will drop their reference on fuse_conn and that in +* turn will drop its reference on virtio_fs object. */ virtio_fs_stop_all_queues(vfs); virtio_fs_drain_all_queues(vfs); @@ -1060,6 +1088,10 @@ static int virtio_fs_get_tree(struct fs_context *fsc) struct fuse_conn *fc; int err; + /* This gets a reference on virtio_fs object. This ptr gets installed +* in fc->iq->priv. Once fuse_conn is going away, it calls ->put() +* to drop the reference to this object. +*/ fs = virtio_fs_find_instance(fsc->source); if (!fs) { pr_info("virtio-fs: tag <%s> not found\n", fsc->source); @@ -1067,8 +1099,10 @@ static int virtio_fs_get_tree(struct fs_context *fsc) } fc = kzalloc(sizeof(struct fuse_conn), GFP_KERNEL); - if (!fc) + if (!fc) { + virtiofs_put(fs); return -ENOMEM; + } fuse_conn_init(fc, get_user_ns(current_user_ns()), &virtio_f
[PATCH 00/18] virtiofs: Fix various races and cleanups round 1
Hi, Michael Tsirkin pointed out issues w.r.t various locking related TODO items and races w.r.t device removal. In this first round of cleanups, I have taken care of most pressing issues. These patches apply on top of following. git://git.kernel.org/pub/scm/linux/kernel/git/mszeredi/fuse.git#virtiofs-v4 I have tested these patches with mount/umount and device removal using qemu monitor. For example. virsh qemu-monitor-command --hmp vm9-f28 device_del myvirtiofs Now a mounted device can be removed and file system will return errors -ENOTCONN and one can unmount it. Miklos, if you are fine with the patches, I am fine if you fold these all into existing patch. I kept them separate so that review is easier. Any feedback or comments are welcome. Thanks Vivek Vivek Goyal (18): virtiofs: Remove request from processing list before calling end virtiofs: Check whether hiprio queue is connected at submission time virtiofs: Pass fsvq instead of vq as parameter to virtio_fs_enqueue_req virtiofs: Check connected state for VQ_REQUEST queue as well Maintain count of in flight requests for VQ_REQUEST queue virtiofs: ->remove should not clean virtiofs fuse devices virtiofs: Stop virtiofs queues when device is being removed virtiofs: Drain all pending requests during ->remove time virtiofs: Add an helper to start all the queues virtiofs: Do not use device managed mem for virtio_fs and virtio_fs_vq virtiofs: stop and drain queues after sending DESTROY virtiofs: Use virtio_fs_free_devs() in error path virtiofs: Do not access virtqueue in request submission path virtiofs: Add a fuse_iqueue operation to put() reference virtiofs: Make virtio_fs object refcounted virtiofs: Use virtio_fs_mutex for races w.r.t ->remove and mount path virtiofs: Remove TODO to quiesce/end_requests virtiofs: Remove TODO item from virtio_fs_free_devs() fs/fuse/fuse_i.h| 5 + fs/fuse/inode.c | 6 + fs/fuse/virtio_fs.c | 259 3 files changed, 198 insertions(+), 72 deletions(-) -- 2.20.1 ___ Virtualization mailing list Virtualization@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/virtualization
[PATCH 01/18] virtiofs: Remove request from processing list before calling end
In error path we are calling fuse_request_end() but we need to clear FR_SENT bit as well as remove request from processing queue. Otherwise fuse_request_end() triggers a warning as well as other issues show up. Signed-off-by: Vivek Goyal --- fs/fuse/virtio_fs.c | 4 1 file changed, 4 insertions(+) diff --git a/fs/fuse/virtio_fs.c b/fs/fuse/virtio_fs.c index 197e79e536f9..a708ccb65662 100644 --- a/fs/fuse/virtio_fs.c +++ b/fs/fuse/virtio_fs.c @@ -826,6 +826,10 @@ __releases(fiq->waitq.lock) } req->out.h.error = ret; pr_err("virtio-fs: virtio_fs_enqueue_req() failed %d\n", ret); + spin_lock(&fpq->lock); + clear_bit(FR_SENT, &req->flags); + list_del_init(&req->list); + spin_unlock(&fpq->lock); fuse_request_end(fc, req); return; } -- 2.20.1 ___ Virtualization mailing list Virtualization@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/virtualization
[PATCH 02/18] virtiofs: Check whether hiprio queue is connected at submission time
For hiprio queue (forget requests), we are keeping a state in queue whether queue is connected or not. If queue is not connected, do not try to submit request and return error instead. As of now, we are checking for this state only in path where worker tries to submit forget after first attempt failed. Check this state even in the path when request is being submitted first time. Signed-off-by: Vivek Goyal --- fs/fuse/virtio_fs.c | 9 - 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/fs/fuse/virtio_fs.c b/fs/fuse/virtio_fs.c index a708ccb65662..e9497b565dd8 100644 --- a/fs/fuse/virtio_fs.c +++ b/fs/fuse/virtio_fs.c @@ -577,9 +577,16 @@ __releases(fiq->waitq.lock) sg_init_one(&sg, forget, sizeof(*forget)); /* Enqueue the request */ + spin_lock(&fsvq->lock); + + if (!fsvq->connected) { + kfree(forget); + spin_unlock(&fsvq->lock); + goto out; + } + vq = fsvq->vq; dev_dbg(&vq->vdev->dev, "%s\n", __func__); - spin_lock(&fsvq->lock); ret = virtqueue_add_sgs(vq, sgs, 1, 0, forget, GFP_ATOMIC); if (ret < 0) { -- 2.20.1 ___ Virtualization mailing list Virtualization@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/virtualization
[PATCH 04/18] virtiofs: Check connected state for VQ_REQUEST queue as well
Right now we are checking ->connected state only for VQ_HIPRIO. Now we want to make use of this method for all queues. So check it for VQ_REQUEST as well. This will be helpful if device has been removed and virtqueue is gone. In that case ->connected will be false and request can't be submitted anymore and user space will see error -ENOTCONN. Signed-off-by: Vivek Goyal --- fs/fuse/virtio_fs.c | 6 ++ 1 file changed, 6 insertions(+) diff --git a/fs/fuse/virtio_fs.c b/fs/fuse/virtio_fs.c index 9d30530e3ca9..c46dd4d284d6 100644 --- a/fs/fuse/virtio_fs.c +++ b/fs/fuse/virtio_fs.c @@ -755,6 +755,12 @@ static int virtio_fs_enqueue_req(struct virtio_fs_vq *fsvq, spin_lock(&fsvq->lock); + if (!fsvq->connected) { + spin_unlock(&fsvq->lock); + ret = -ENOTCONN; + goto out; + } + vq = fsvq->vq; ret = virtqueue_add_sgs(vq, sgs, out_sgs, in_sgs, req, GFP_ATOMIC); if (ret < 0) { -- 2.20.1 ___ Virtualization mailing list Virtualization@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/virtualization
[PATCH 07/18] virtiofs: Stop virtiofs queues when device is being removed
Stop all the virt queues when device is going away. This will ensure that no new requests are submitted to virtqueue and and request will end with error -ENOTCONN. Signed-off-by: Vivek Goyal --- fs/fuse/virtio_fs.c | 14 ++ 1 file changed, 14 insertions(+) diff --git a/fs/fuse/virtio_fs.c b/fs/fuse/virtio_fs.c index f68a25ca9e9d..90e7b2f345e5 100644 --- a/fs/fuse/virtio_fs.c +++ b/fs/fuse/virtio_fs.c @@ -493,10 +493,24 @@ static int virtio_fs_probe(struct virtio_device *vdev) return ret; } +static void virtio_fs_stop_all_queues(struct virtio_fs *fs) +{ + struct virtio_fs_vq *fsvq; + int i; + + for (i = 0; i < fs->nvqs; i++) { + fsvq = &fs->vqs[i]; + spin_lock(&fsvq->lock); + fsvq->connected = false; + spin_unlock(&fsvq->lock); + } +} + static void virtio_fs_remove(struct virtio_device *vdev) { struct virtio_fs *fs = vdev->priv; + virtio_fs_stop_all_queues(fs); vdev->config->reset(vdev); virtio_fs_cleanup_vqs(vdev, fs); -- 2.20.1 ___ Virtualization mailing list Virtualization@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/virtualization
[PATCH 08/18] virtiofs: Drain all pending requests during ->remove time
When device is going away, drain all pending requests. Signed-off-by: Vivek Goyal --- fs/fuse/virtio_fs.c | 83 - 1 file changed, 51 insertions(+), 32 deletions(-) diff --git a/fs/fuse/virtio_fs.c b/fs/fuse/virtio_fs.c index 90e7b2f345e5..d5730a50b303 100644 --- a/fs/fuse/virtio_fs.c +++ b/fs/fuse/virtio_fs.c @@ -63,6 +63,55 @@ static inline struct fuse_pqueue *vq_to_fpq(struct virtqueue *vq) return &vq_to_fsvq(vq)->fud->pq; } +static void virtio_fs_drain_queue(struct virtio_fs_vq *fsvq) +{ + WARN_ON(fsvq->in_flight < 0); + + /* Wait for in flight requests to finish.*/ + while (1) { + spin_lock(&fsvq->lock); + if (!fsvq->in_flight) { + spin_unlock(&fsvq->lock); + break; + } + spin_unlock(&fsvq->lock); + usleep_range(1000, 2000); + } + + flush_work(&fsvq->done_work); + flush_delayed_work(&fsvq->dispatch_work); +} + +static inline void drain_hiprio_queued_reqs(struct virtio_fs_vq *fsvq) +{ + struct virtio_fs_forget *forget; + + spin_lock(&fsvq->lock); + while (1) { + forget = list_first_entry_or_null(&fsvq->queued_reqs, + struct virtio_fs_forget, list); + if (!forget) + break; + list_del(&forget->list); + kfree(forget); + } + spin_unlock(&fsvq->lock); +} + +static void virtio_fs_drain_all_queues(struct virtio_fs *fs) +{ + struct virtio_fs_vq *fsvq; + int i; + + for (i = 0; i < fs->nvqs; i++) { + fsvq = &fs->vqs[i]; + if (i == VQ_HIPRIO) + drain_hiprio_queued_reqs(fsvq); + + virtio_fs_drain_queue(fsvq); + } +} + /* Add a new instance to the list or return -EEXIST if tag name exists*/ static int virtio_fs_add_instance(struct virtio_fs *fs) { @@ -511,6 +560,7 @@ static void virtio_fs_remove(struct virtio_device *vdev) struct virtio_fs *fs = vdev->priv; virtio_fs_stop_all_queues(fs); + virtio_fs_drain_all_queues(fs); vdev->config->reset(vdev); virtio_fs_cleanup_vqs(vdev, fs); @@ -865,37 +915,6 @@ __releases(fiq->waitq.lock) } } -static void virtio_fs_flush_hiprio_queue(struct virtio_fs_vq *fsvq) -{ - struct virtio_fs_forget *forget; - - WARN_ON(fsvq->in_flight < 0); - - /* Go through pending forget requests and free them */ - spin_lock(&fsvq->lock); - while (1) { - forget = list_first_entry_or_null(&fsvq->queued_reqs, - struct virtio_fs_forget, list); - if (!forget) - break; - list_del(&forget->list); - kfree(forget); - } - - spin_unlock(&fsvq->lock); - - /* Wait for in flight requests to finish.*/ - while (1) { - spin_lock(&fsvq->lock); - if (!fsvq->in_flight) { - spin_unlock(&fsvq->lock); - break; - } - spin_unlock(&fsvq->lock); - usleep_range(1000, 2000); - } -} - const static struct fuse_iqueue_ops virtio_fs_fiq_ops = { .wake_forget_and_unlock = virtio_fs_wake_forget_and_unlock, .wake_interrupt_and_unlock = virtio_fs_wake_interrupt_and_unlock, @@ -988,7 +1007,7 @@ static void virtio_kill_sb(struct super_block *sb) spin_lock(&fsvq->lock); fsvq->connected = false; spin_unlock(&fsvq->lock); - virtio_fs_flush_hiprio_queue(fsvq); + virtio_fs_drain_all_queues(vfs); fuse_kill_sb_anon(sb); virtio_fs_free_devs(vfs); -- 2.20.1 ___ Virtualization mailing list Virtualization@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/virtualization
[PATCH 14/18] virtiofs: Add a fuse_iqueue operation to put() reference
Soon I will make virtio_fs object reference counted, where reference will be taken by device as well as by fuse_conn (fuse_conn->fuse_iqueue->fiq_priv). When fuse_connection is going away, it should put its reference on virtio_fs object. So add a fuse_iqueue method which can be used to call into virtio_fs to put the reference on the object (fiq_priv). Signed-off-by: Vivek Goyal --- fs/fuse/fuse_i.h | 5 + fs/fuse/inode.c | 6 ++ 2 files changed, 11 insertions(+) diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h index 85e2dcad68c1..04e2c000d63f 100644 --- a/fs/fuse/fuse_i.h +++ b/fs/fuse/fuse_i.h @@ -479,6 +479,11 @@ struct fuse_iqueue_ops { */ void (*wake_pending_and_unlock)(struct fuse_iqueue *fiq) __releases(fiq->waitq.lock); + + /** +* Put a reference on fiq_priv. +*/ + void (*put)(struct fuse_iqueue *fiq); }; /** /dev/fuse input queue operations */ diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c index 7fa0dcc6f565..70a433bdf01f 100644 --- a/fs/fuse/inode.c +++ b/fs/fuse/inode.c @@ -631,8 +631,14 @@ EXPORT_SYMBOL_GPL(fuse_conn_init); void fuse_conn_put(struct fuse_conn *fc) { if (refcount_dec_and_test(&fc->count)) { + struct fuse_iqueue *fiq = &fc->iq; + if (fc->destroy_req) fuse_request_free(fc->destroy_req); + if (fiq->priv && fiq->ops->put) { + fiq->ops->put(fiq); + fiq->priv = NULL; + } put_pid_ns(fc->pid_ns); put_user_ns(fc->user_ns); fc->release(fc); -- 2.20.1 ___ Virtualization mailing list Virtualization@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/virtualization
[PATCH 06/18] virtiofs: ->remove should not clean virtiofs fuse devices
We maintain a fuse device per virt queue. This fuse devices are allocated and installed during mount time and should be cleaned up when super block is going away. Device removal should not clean it. Device removal should stop queues and virtuques can go away. Signed-off-by: Vivek Goyal --- fs/fuse/virtio_fs.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/fs/fuse/virtio_fs.c b/fs/fuse/virtio_fs.c index 5df97dfee37d..f68a25ca9e9d 100644 --- a/fs/fuse/virtio_fs.c +++ b/fs/fuse/virtio_fs.c @@ -497,8 +497,6 @@ static void virtio_fs_remove(struct virtio_device *vdev) { struct virtio_fs *fs = vdev->priv; - virtio_fs_free_devs(fs); - vdev->config->reset(vdev); virtio_fs_cleanup_vqs(vdev, fs); -- 2.20.1 ___ Virtualization mailing list Virtualization@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/virtualization
[PATCH 12/18] virtiofs: Use virtio_fs_free_devs() in error path
We already have an helper to cleanup fuse devices. Use that instead of duplicating the code. Signed-off-by: Vivek Goyal --- fs/fuse/virtio_fs.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/fs/fuse/virtio_fs.c b/fs/fuse/virtio_fs.c index a76bd5a04521..40259368a6bd 100644 --- a/fs/fuse/virtio_fs.c +++ b/fs/fuse/virtio_fs.c @@ -999,8 +999,7 @@ static int virtio_fs_fill_super(struct super_block *sb) err_free_init_req: fuse_request_free(init_req); err_free_fuse_devs: - for (i = 0; i < fs->nvqs; i++) - fuse_dev_free(fs->vqs[i].fud); + virtio_fs_free_devs(fs); err: return err; } -- 2.20.1 ___ Virtualization mailing list Virtualization@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/virtualization
[PATCH 03/18] virtiofs: Pass fsvq instead of vq as parameter to virtio_fs_enqueue_req
Pass fsvq instead of vq as parameter to virtio_fs_enqueue_req(). We will retrieve vq from fsvq under spin lock. Later in the patch series we will retrieve vq only if fsvq is still connected other vq might have been cleaned up by device ->remove code and we will return error. Signed-off-by: Vivek Goyal --- fs/fuse/virtio_fs.c | 9 + 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/fs/fuse/virtio_fs.c b/fs/fuse/virtio_fs.c index e9497b565dd8..9d30530e3ca9 100644 --- a/fs/fuse/virtio_fs.c +++ b/fs/fuse/virtio_fs.c @@ -698,14 +698,15 @@ static unsigned int sg_init_fuse_args(struct scatterlist *sg, } /* Add a request to a virtqueue and kick the device */ -static int virtio_fs_enqueue_req(struct virtqueue *vq, struct fuse_req *req) +static int virtio_fs_enqueue_req(struct virtio_fs_vq *fsvq, +struct fuse_req *req) { /* requests need at least 4 elements */ struct scatterlist *stack_sgs[6]; struct scatterlist stack_sg[ARRAY_SIZE(stack_sgs)]; struct scatterlist **sgs = stack_sgs; struct scatterlist *sg = stack_sg; - struct virtio_fs_vq *fsvq; + struct virtqueue *vq; unsigned int argbuf_used = 0; unsigned int out_sgs = 0; unsigned int in_sgs = 0; @@ -752,9 +753,9 @@ static int virtio_fs_enqueue_req(struct virtqueue *vq, struct fuse_req *req) for (i = 0; i < total_sgs; i++) sgs[i] = &sg[i]; - fsvq = vq_to_fsvq(vq); spin_lock(&fsvq->lock); + vq = fsvq->vq; ret = virtqueue_add_sgs(vq, sgs, out_sgs, in_sgs, req, GFP_ATOMIC); if (ret < 0) { /* TODO handle full virtqueue */ @@ -824,7 +825,7 @@ __releases(fiq->waitq.lock) /* TODO check for FR_INTERRUPTED? */ retry: - ret = virtio_fs_enqueue_req(fs->vqs[queue_id].vq, req); + ret = virtio_fs_enqueue_req(&fs->vqs[queue_id], req); if (ret < 0) { if (ret == -ENOMEM || ret == -ENOSPC) { /* Virtqueue full. Retry submission */ -- 2.20.1 ___ Virtualization mailing list Virtualization@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/virtualization
Re: [PATCH v4 15/16] virtio-fs: add virtiofs filesystem
On Tue, Sep 03, 2019 at 09:55:49AM -0400, Michael S. Tsirkin wrote: [..] > What's with all of the TODOs? Some of these are really scary, > looks like they need to be figured out before this is merged. Hi Michael, One of the issue I noticed is races w.r.t device removal and super block initialization. I am about to post a set of patches which take care of these races and also get rid of some of the scary TODOs. Other TODOs like suspend/restore, multiqueue support etc are improvements which we can do over a period of time. [..] > > +/* Per-virtqueue state */ > > +struct virtio_fs_vq { > > + spinlock_t lock; > > + struct virtqueue *vq; /* protected by ->lock */ > > + struct work_struct done_work; > > + struct list_head queued_reqs; > > + struct delayed_work dispatch_work; > > + struct fuse_dev *fud; > > + bool connected; > > + long in_flight; > > + char name[24]; > > I'd keep names somewhere separate as they are not used on data path. Ok, this sounds like a nice to have. Will take care of this once base patch gets merged. [..] > > +struct virtio_fs_forget { > > + struct fuse_in_header ih; > > + struct fuse_forget_in arg; > > These structures are all native endian. > > Passing them to host will make cross-endian setups painful to support, > and hardware implementations impossible. > > How about converting everything to LE? So looks like endianness issue is now resolved (going by the other emails). So I will not worry about it. [..] > > +/* Add a new instance to the list or return -EEXIST if tag name exists*/ > > +static int virtio_fs_add_instance(struct virtio_fs *fs) > > +{ > > + struct virtio_fs *fs2; > > + bool duplicate = false; > > + > > + mutex_lock(&virtio_fs_mutex); > > + > > + list_for_each_entry(fs2, &virtio_fs_instances, list) { > > + if (strcmp(fs->tag, fs2->tag) == 0) > > + duplicate = true; > > + } > > + > > + if (!duplicate) > > + list_add_tail(&fs->list, &virtio_fs_instances); > > > This is O(N^2) as it's presumably called for each istance. > Doesn't scale - please switch to a tree or such. This is O(N) and not O(N^2) right? Addition of device is O(N), search during mount is O(N). This is not a frequent event at all and number of virtiofs instances per guest are expected to be fairly small (say less than 10). So I really don't think there is any value in converting this into a tree (instead of a list). [..] > > +static void virtio_fs_free_devs(struct virtio_fs *fs) > > +{ > > + unsigned int i; > > + > > + /* TODO lock */ > > Doesn't inspire confidence, does it? Agreed. Getting rid of this in set of fixes I am about to post. > > > + > > + for (i = 0; i < fs->nvqs; i++) { > > + struct virtio_fs_vq *fsvq = &fs->vqs[i]; > > + > > + if (!fsvq->fud) > > + continue; > > + > > + flush_work(&fsvq->done_work); > > + flush_delayed_work(&fsvq->dispatch_work); > > + > > + /* TODO need to quiesce/end_requests/decrement dev_count */ > > Indeed. Won't this crash if we don't? Took care of this as well. [..] > > +static void virtio_fs_hiprio_dispatch_work(struct work_struct *work) > > +{ > > + struct virtio_fs_forget *forget; > > + struct virtio_fs_vq *fsvq = container_of(work, struct virtio_fs_vq, > > +dispatch_work.work); > > + struct virtqueue *vq = fsvq->vq; > > + struct scatterlist sg; > > + struct scatterlist *sgs[] = {&sg}; > > + bool notify; > > + int ret; > > + > > + pr_debug("virtio-fs: worker %s called.\n", __func__); > > + while (1) { > > + spin_lock(&fsvq->lock); > > + forget = list_first_entry_or_null(&fsvq->queued_reqs, > > + struct virtio_fs_forget, list); > > + if (!forget) { > > + spin_unlock(&fsvq->lock); > > + return; > > + } > > + > > + list_del(&forget->list); > > + if (!fsvq->connected) { > > + spin_unlock(&fsvq->lock); > > + kfree(forget); > > + continue; > > + } > > + > > + sg_init_one(&sg, forget, sizeof(*forget)); > > This passes to host a structure including "struct list_head list"; > > Not a good idea. Ok, host does not have to see "struct list_head list". Its needed for guest. Will look into getting rid of this. > > > > + > > + /* Enqueue the request */ > > + dev_dbg(&vq->vdev->dev, "%s\n", __func__); > > + ret = virtqueue_add_sgs(vq, sgs, 1, 0, forget, GFP_ATOMIC); > > > This is easier as add_outbuf. Will look into it. > > Also - why GFP_ATOMIC? Hmm..., may be it can be GFP_KERNEL. I don't see atomic context here. Will look into it. > > > + if (ret < 0) { > > + if (ret == -ENOMEM || ret == -ENOSPC) { > > + pr_debug("virtio-fs: Could not queue FORGET: >
Re: DANGER WILL ROBINSON, DANGER
On Fri, Aug 23, 2019 at 12:39:21PM +, Mircea CIRJALIU - MELIU wrote: > > On Thu, Aug 15, 2019 at 03:19:29PM -0400, Jerome Glisse wrote: > > > On Tue, Aug 13, 2019 at 02:01:35PM +0300, Adalbert Lazăr wrote: > > > > On Fri, 9 Aug 2019 09:24:44 -0700, Matthew Wilcox > > wrote: > > > > > On Fri, Aug 09, 2019 at 07:00:26PM +0300, Adalbert Lazăr wrote: > > > > > > +++ b/include/linux/page-flags.h > > > > > > @@ -417,8 +417,10 @@ PAGEFLAG(Idle, idle, PF_ANY) > > > > > > */ > > > > > > #define PAGE_MAPPING_ANON 0x1 > > > > > > #define PAGE_MAPPING_MOVABLE 0x2 > > > > > > +#define PAGE_MAPPING_REMOTE0x4 > > > > > > > > > > Uh. How do you know page->mapping would otherwise have bit 2 > > clear? > > > > > Who's guaranteeing that? > > > > > > > > > > This is an awfully big patch to the memory management code, buried > > > > > in the middle of a gigantic series which almost guarantees nobody > > > > > would look at it. I call shenanigans. > > > > > > > > > > > @@ -1021,7 +1022,7 @@ void page_move_anon_rmap(struct page > > *page, struct vm_area_struct *vma) > > > > > > * __page_set_anon_rmap - set up new anonymous rmap > > > > > > * @page: Page or Hugepage to add to rmap > > > > > > * @vma: VM area to add page to. > > > > > > - * @address: User virtual address of the mapping > > > > > > + * @address: User virtual address of the mapping > > > > > > > > > > And mixing in fluff changes like this is a real no-no. Try again. > > > > > > > > > > > > > No bad intentions, just overzealous. > > > > I didn't want to hide anything from our patches. > > > > Once we advance with the introspection patches related to KVM we'll > > > > be back with the remote mapping patch, split and cleaned. > > > > > > They are not bit left in struct page ! Looking at the patch it seems > > > you want to have your own pin count just for KVM. This is bad, we are > > > already trying to solve the GUP thing (see all various patchset about > > > GUP posted recently). > > > > > > You need to rethink how you want to achieve this. Why not simply a > > > remote read()/write() into the process memory ie KVMI would call an > > > ioctl that allow to read or write into a remote process memory like > > > ptrace() but on steroid ... > > > > > > Adding this whole big complex infrastructure without justification of > > > why we need to avoid round trip is just too much really. > > > > Thinking a bit more about this, you can achieve the same thing without > > adding a single line to any mm code. Instead of having mmap with > > PROT_NONE | MAP_LOCKED you have userspace mmap some kvm device > > file (i am assuming this is something you already have and can control the > > mmap callback). > > > > So now kernel side you have a vma with a vm_operations_struct under your > > control this means that everything you want to block mm wise from within > > the inspector process can be block through those call- backs > > (find_special_page() specificaly for which you have to return NULL all the > > time). > > > > To mirror target process memory you can use hmm_mirror, when you > > populate the inspector process page table you use insert_pfn() (mmap of > > the kvm device file must mark this vma as PFNMAP). > > > > By following the hmm_mirror API, anytime the target process has a change in > > its page table (ie virtual address -> page) you will get a callback and all > > you > > have to do is clear the page table within the inspector process and flush > > tlb > > (use zap_page_range). > > > > On page fault within the inspector process the fault callback of vm_ops will > > get call and from there you call hmm_mirror following its API. > > > > Oh also mark the vma with VM_WIPEONFORK to avoid any issue if the > > inspector process use fork() (you could support fork but then you would > > need to mark the vma as SHARED and use unmap_mapping_pages instead of > > zap_page_range). > > > > > > There everything you want to do with already upstream mm code. > > I'm the author of remote mapping, so I owe everybody some explanations. > My requirement was to map pages from one QEMU process to another QEMU > process (our inspector process works in a virtual machine of its own). So I > had > to implement a KSM-like page sharing between processes, where an anon page > from the target QEMU's working memory is promoted to a remote page and > mapped in the inspector QEMU's working memory (both anon VMAs). > The extra page flag is for differentiating the page for rmap walking. > > The mapping requests come at PAGE_SIZE granularity for random addresses > within the target/inspector QEMUs, so I couldn't do any linear mapping that > would keep things simpler. > > I have an extra patch that does remote mapping by mirroring an entire VMA > from the target process by way of a device file. This thing creates a > separate > mirror VMA in my inspector process (at the moment a QEMU), but then I > bumped into the KVM hva->gpa mapping, which
Re: [PATCH] drm/virtio: fix command submission with objects but without fence.
On Wed, Sep 4, 2019 at 10:23 PM Gerd Hoffmann wrote: > > On Wed, Sep 04, 2019 at 04:10:30PM -0700, Chia-I Wu wrote: > > On Wed, Sep 4, 2019 at 12:48 AM Gerd Hoffmann wrote: > > > > > > Only call virtio_gpu_array_add_fence if we actually have a fence. > > > > > > Fixes: da758d51968a ("drm/virtio: rework virtio_gpu_execbuffer_ioctl > > > fencing") > > > Signed-off-by: Gerd Hoffmann > > > --- > > > drivers/gpu/drm/virtio/virtgpu_vq.c | 9 + > > > 1 file changed, 5 insertions(+), 4 deletions(-) > > > > > > diff --git a/drivers/gpu/drm/virtio/virtgpu_vq.c > > > b/drivers/gpu/drm/virtio/virtgpu_vq.c > > > index 595fa6ec2d58..7fd2851f7b97 100644 > > > --- a/drivers/gpu/drm/virtio/virtgpu_vq.c > > > +++ b/drivers/gpu/drm/virtio/virtgpu_vq.c > > > @@ -339,11 +339,12 @@ static void > > > virtio_gpu_queue_fenced_ctrl_buffer(struct virtio_gpu_device *vgdev, > > > goto again; > > > } > > > > > > - if (fence) > > > + if (fence) { > > > virtio_gpu_fence_emit(vgdev, hdr, fence); > > > - if (vbuf->objs) { > > > - virtio_gpu_array_add_fence(vbuf->objs, &fence->f); > > > - virtio_gpu_array_unlock_resv(vbuf->objs); > > > + if (vbuf->objs) { > > > + virtio_gpu_array_add_fence(vbuf->objs, &fence->f); > > > + virtio_gpu_array_unlock_resv(vbuf->objs); > > > + } > > This leaks when fence == NULL and vbuf->objs != NULL (which can really > > happen IIRC... not at my desk to check). > > Yes, it can happen, for example when flushing dumb buffers. > > But I don't think we leak in this case. The code paths which don't need > a fence also do not call virtio_gpu_array_lock_resv(), so things are > balanced. The actual release of the objs happens in > virtio_gpu_dequeue_ctrl_func() via virtio_gpu_array_put_free_delayed(). I misread and thought this was in virtio_gpu_dequeue_ctrl_func. Sorry :( Reviewed-by: Chia-I Wu > > cheers, > Gerd > ___ Virtualization mailing list Virtualization@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/virtualization
Re: [PATCH 0/2] Revert and rework on the metadata accelreation
On Thu, Sep 05, 2019 at 08:27:34PM +0800, Jason Wang wrote: > Hi: > > Per request from Michael and Jason, the metadata accelreation is > reverted in this version and rework in next version. > > Please review. > > Thanks > > Jason Wang (2): > Revert "vhost: access vq metadata through kernel virtual address" > vhost: re-introducing metadata acceleration through kernel virtual > address There are a bunch of patches in the queue already that will help vhost, and I a working on one for next cycle that will help alot more too. I think you should apply the revert this cycle and rebase the other patch for next.. Jason ___ Virtualization mailing list Virtualization@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/virtualization
Re: Xorg indefinitely hangs in kernelspace
On 05.09.19 10:14, Gerd Hoffmann wrote: > On Tue, Aug 06, 2019 at 09:00:10PM +0300, Jaak Ristioja wrote: >> Hello! >> >> I'm writing to report a crash in the QXL / DRM code in the Linux kernel. >> I originally filed the issue on LaunchPad and more details can be found >> there, although I doubt whether these details are useful. > > Any change with kernel 5.3-rc7 ? Didn't try. Did you change something? I could try, but I've done so before and every time this bug manifests itself with MAJOR.MINOR-rc# I get asked to try version MAJOR.(MINOR+1)-rc# so I guess I could as well give up? Alright, I'll install 5.3-rc7, but once more it might take some time for this bug to expose itself. One thing to note though, is that this occurred much more often with older kernels (or older versions of the Kubuntu desktop/Firefox in my VM), sometimes even several times in an hour of use. Best regards, J ___ Virtualization mailing list Virtualization@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/virtualization
[PATCH 2/2] vhost: re-introducing metadata acceleration through kernel virtual address
This is a rework on the commit 7f466032dc9e ("vhost: access vq metadata through kernel virtual address"). It was noticed that the copy_to/from_user() friends that was used to access virtqueue metdata tends to be very expensive for dataplane implementation like vhost since it involves lots of software checks, speculation barriers, hardware feature toggling (e.g SMAP). The extra cost will be more obvious when transferring small packets since the time spent on metadata accessing become more significant. This patch tries to eliminate those overheads by accessing them through direct mapping of those pages. Invalidation callbacks is implemented for co-operation with general VM management (swap, KSM, THP or NUMA balancing). We will try to get the direct mapping of vq metadata before each round of packet processing if it doesn't exist. If we fail, we will simplely fallback to copy_to/from_user() friends. This invalidation, direct mapping access and set are synchronized through spinlock. This takes a step back from the original commit 7f466032dc9e ("vhost: access vq metadata through kernel virtual address") which tries to RCU which is suspicious and hard to be reviewed. This won't perform as well as RCU because of the atomic, this could be addressed by the future optimization. This method might does not work for high mem page which requires temporary mapping so we just fallback to normal copy_to/from_user() and may not for arch that has virtual tagged cache since extra cache flushing is needed to eliminate the alias. This will result complex logic and bad performance. For those archs, this patch simply go for copy_to/from_user() friends. This is done by ruling out kernel mapping codes through ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE. Note that this is only done when device IOTLB is not enabled. We could use similar method to optimize IOTLB in the future. Tests shows at most about 22% improvement on TX PPS when using virtio-user + vhost_net + xdp1 + TAP on 4.0GHz Kaby Lake. SMAP on | SMAP off Before: 4.9Mpps | 6.9Mpps After: 6.0Mpps | 7.5Mpps On a elder CPU Sandy Bridge without SMAP support. TX PPS doesn't see any difference. Cc: Andrea Arcangeli Cc: James Bottomley Cc: Christoph Hellwig Cc: David Miller Cc: Jerome Glisse Cc: Jason Gunthorpe Cc: linux...@kvack.org Cc: linux-arm-ker...@lists.infradead.org Cc: linux-par...@vger.kernel.org Signed-off-by: Jason Wang Signed-off-by: Michael S. Tsirkin --- drivers/vhost/vhost.c | 551 +- drivers/vhost/vhost.h | 41 2 files changed, 589 insertions(+), 3 deletions(-) diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c index 791562e03fe0..f98155f28f02 100644 --- a/drivers/vhost/vhost.c +++ b/drivers/vhost/vhost.c @@ -298,6 +298,182 @@ static void vhost_vq_meta_reset(struct vhost_dev *d) __vhost_vq_meta_reset(d->vqs[i]); } +#if VHOST_ARCH_CAN_ACCEL_UACCESS +static void vhost_map_unprefetch(struct vhost_map *map) +{ + kfree(map->pages); + kfree(map); +} + +static void vhost_set_map_dirty(struct vhost_virtqueue *vq, + struct vhost_map *map, int index) +{ + struct vhost_uaddr *uaddr = &vq->uaddrs[index]; + int i; + + if (uaddr->write) { + for (i = 0; i < map->npages; i++) + set_page_dirty(map->pages[i]); + } +} + +static void vhost_uninit_vq_maps(struct vhost_virtqueue *vq) +{ + struct vhost_map *map[VHOST_NUM_ADDRS]; + int i; + + spin_lock(&vq->mmu_lock); + for (i = 0; i < VHOST_NUM_ADDRS; i++) { + map[i] = vq->maps[i]; + if (map[i]) { + vhost_set_map_dirty(vq, map[i], i); + vq->maps[i] = NULL; + } + } + spin_unlock(&vq->mmu_lock); + + /* No need for synchronization since we are serialized with +* memory accessors (e.g vq mutex held). +*/ + + for (i = 0; i < VHOST_NUM_ADDRS; i++) + if (map[i]) + vhost_map_unprefetch(map[i]); + +} + +static void vhost_reset_vq_maps(struct vhost_virtqueue *vq) +{ + int i; + + vhost_uninit_vq_maps(vq); + for (i = 0; i < VHOST_NUM_ADDRS; i++) + vq->uaddrs[i].size = 0; +} + +static bool vhost_map_range_overlap(struct vhost_uaddr *uaddr, +unsigned long start, +unsigned long end) +{ + if (unlikely(!uaddr->size)) + return false; + + return !(end < uaddr->uaddr || start > uaddr->uaddr - 1 + uaddr->size); +} + +static void inline vhost_vq_access_map_begin(struct vhost_virtqueue *vq) +{ + spin_lock(&vq->mmu_lock); +} + +static void inline vhost_vq_access_map_end(struct vhost_virtqueue *vq) +{ + spin_unlock(&vq->mmu_lock); +} + +static int vhost_invalidate_vq_start(struct vhost_virtqueue *vq, +int index, +
[PATCH 0/2] Revert and rework on the metadata accelreation
Hi: Per request from Michael and Jason, the metadata accelreation is reverted in this version and rework in next version. Please review. Thanks Jason Wang (2): Revert "vhost: access vq metadata through kernel virtual address" vhost: re-introducing metadata acceleration through kernel virtual address drivers/vhost/vhost.c | 202 +- drivers/vhost/vhost.h | 8 +- 2 files changed, 123 insertions(+), 87 deletions(-) -- 2.19.1 ___ Virtualization mailing list Virtualization@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/virtualization
[PATCH 1/2] Revert "vhost: access vq metadata through kernel virtual address"
It was reported that metadata acceleration introduces several issues, so this patch reverts commit ff466032dc9e5a61217f22ea34b2df932786bbfc, 73f628ec9e6bcc45b77c53fe6d0c0ec55eaf82af and 0b4a7092ffe568a55bf8f3cefdf79ff666586d91. We will rework it on the next version. Cc: Jason Gunthorpe Signed-off-by: Jason Wang --- drivers/vhost/vhost.c | 515 +- drivers/vhost/vhost.h | 41 2 files changed, 3 insertions(+), 553 deletions(-) diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c index 0536f8526359..791562e03fe0 100644 --- a/drivers/vhost/vhost.c +++ b/drivers/vhost/vhost.c @@ -298,160 +298,6 @@ static void vhost_vq_meta_reset(struct vhost_dev *d) __vhost_vq_meta_reset(d->vqs[i]); } -#if VHOST_ARCH_CAN_ACCEL_UACCESS -static void vhost_map_unprefetch(struct vhost_map *map) -{ - kfree(map->pages); - map->pages = NULL; - map->npages = 0; - map->addr = NULL; -} - -static void vhost_uninit_vq_maps(struct vhost_virtqueue *vq) -{ - struct vhost_map *map[VHOST_NUM_ADDRS]; - int i; - - spin_lock(&vq->mmu_lock); - for (i = 0; i < VHOST_NUM_ADDRS; i++) { - map[i] = rcu_dereference_protected(vq->maps[i], - lockdep_is_held(&vq->mmu_lock)); - if (map[i]) - rcu_assign_pointer(vq->maps[i], NULL); - } - spin_unlock(&vq->mmu_lock); - - synchronize_rcu(); - - for (i = 0; i < VHOST_NUM_ADDRS; i++) - if (map[i]) - vhost_map_unprefetch(map[i]); - -} - -static void vhost_reset_vq_maps(struct vhost_virtqueue *vq) -{ - int i; - - vhost_uninit_vq_maps(vq); - for (i = 0; i < VHOST_NUM_ADDRS; i++) - vq->uaddrs[i].size = 0; -} - -static bool vhost_map_range_overlap(struct vhost_uaddr *uaddr, -unsigned long start, -unsigned long end) -{ - if (unlikely(!uaddr->size)) - return false; - - return !(end < uaddr->uaddr || start > uaddr->uaddr - 1 + uaddr->size); -} - -static void vhost_invalidate_vq_start(struct vhost_virtqueue *vq, - int index, - unsigned long start, - unsigned long end) -{ - struct vhost_uaddr *uaddr = &vq->uaddrs[index]; - struct vhost_map *map; - int i; - - if (!vhost_map_range_overlap(uaddr, start, end)) - return; - - spin_lock(&vq->mmu_lock); - ++vq->invalidate_count; - - map = rcu_dereference_protected(vq->maps[index], - lockdep_is_held(&vq->mmu_lock)); - if (map) { - if (uaddr->write) { - for (i = 0; i < map->npages; i++) - set_page_dirty(map->pages[i]); - } - rcu_assign_pointer(vq->maps[index], NULL); - } - spin_unlock(&vq->mmu_lock); - - if (map) { - synchronize_rcu(); - vhost_map_unprefetch(map); - } -} - -static void vhost_invalidate_vq_end(struct vhost_virtqueue *vq, - int index, - unsigned long start, - unsigned long end) -{ - if (!vhost_map_range_overlap(&vq->uaddrs[index], start, end)) - return; - - spin_lock(&vq->mmu_lock); - --vq->invalidate_count; - spin_unlock(&vq->mmu_lock); -} - -static int vhost_invalidate_range_start(struct mmu_notifier *mn, - const struct mmu_notifier_range *range) -{ - struct vhost_dev *dev = container_of(mn, struct vhost_dev, -mmu_notifier); - int i, j; - - if (!mmu_notifier_range_blockable(range)) - return -EAGAIN; - - for (i = 0; i < dev->nvqs; i++) { - struct vhost_virtqueue *vq = dev->vqs[i]; - - for (j = 0; j < VHOST_NUM_ADDRS; j++) - vhost_invalidate_vq_start(vq, j, - range->start, - range->end); - } - - return 0; -} - -static void vhost_invalidate_range_end(struct mmu_notifier *mn, - const struct mmu_notifier_range *range) -{ - struct vhost_dev *dev = container_of(mn, struct vhost_dev, -mmu_notifier); - int i, j; - - for (i = 0; i < dev->nvqs; i++) { - struct vhost_virtqueue *vq = dev->vqs[i]; - - for (j = 0; j < VHOST_NUM_ADDRS; j++) - vhost_invalidate_vq_end(vq, j, - range->start, - range->end)
Re: [PATCH net-next] vsock/virtio: a better comment on credit update
From: "Michael S. Tsirkin" Date: Tue, 3 Sep 2019 03:38:16 -0400 > The comment we have is just repeating what the code does. > Include the *reason* for the condition instead. > > Cc: Stefano Garzarella > Signed-off-by: Michael S. Tsirkin Applied. ___ Virtualization mailing list Virtualization@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/virtualization
Re: Xorg indefinitely hangs in kernelspace
On Tue, Aug 06, 2019 at 09:00:10PM +0300, Jaak Ristioja wrote: > Hello! > > I'm writing to report a crash in the QXL / DRM code in the Linux kernel. > I originally filed the issue on LaunchPad and more details can be found > there, although I doubt whether these details are useful. Any change with kernel 5.3-rc7 ? cheers, Gerd ___ Virtualization mailing list Virtualization@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/virtualization
[PATCH 6/8] drm/qxl: switch to gem vma offset manager
Pass gem vma_offset_manager to ttm_bo_device_init(), so ttm uses it instead of its own embedded struct. This makes some gem functions (specifically drm_gem_object_lookup) work on ttm objects. Signed-off-by: Gerd Hoffmann --- drivers/gpu/drm/qxl/qxl_ttm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/qxl/qxl_ttm.c b/drivers/gpu/drm/qxl/qxl_ttm.c index 69da0eea6e4c..cbc6c2ba8630 100644 --- a/drivers/gpu/drm/qxl/qxl_ttm.c +++ b/drivers/gpu/drm/qxl/qxl_ttm.c @@ -325,7 +325,7 @@ int qxl_ttm_init(struct qxl_device *qdev) r = ttm_bo_device_init(&qdev->mman.bdev, &qxl_bo_driver, qdev->ddev.anon_inode->i_mapping, - NULL, + qdev->ddev.vma_offset_manager, false); if (r) { DRM_ERROR("failed initializing buffer object driver(%d).\n", r); -- 2.18.1 ___ Virtualization mailing list Virtualization@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/virtualization
[PATCH 1/8] drm/ttm: turn ttm_bo_device.vma_manager into a pointer
Rename the embedded struct vma_offset_manager, new name is _vma_manager. ttm_bo_device.vma_manager changed to a pointer. The ttm_bo_device_init() function gets an additional vma_manager argument which allows to initialize ttm with a different vma manager. When passing NULL the embedded _vma_manager is used. All callers are updated to pass NULL, so the behavior doesn't change. Signed-off-by: Gerd Hoffmann --- include/drm/ttm/ttm_bo_driver.h | 8 ++-- drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 1 + drivers/gpu/drm/drm_vram_mm_helper.c| 1 + drivers/gpu/drm/nouveau/nouveau_ttm.c | 1 + drivers/gpu/drm/qxl/qxl_ttm.c | 1 + drivers/gpu/drm/radeon/radeon_ttm.c | 1 + drivers/gpu/drm/ttm/ttm_bo.c| 13 + drivers/gpu/drm/ttm/ttm_bo_vm.c | 6 +++--- drivers/gpu/drm/vmwgfx/vmwgfx_drv.c | 1 + 9 files changed, 24 insertions(+), 9 deletions(-) diff --git a/include/drm/ttm/ttm_bo_driver.h b/include/drm/ttm/ttm_bo_driver.h index e88e00c6cbf2..e365434f92b3 100644 --- a/include/drm/ttm/ttm_bo_driver.h +++ b/include/drm/ttm/ttm_bo_driver.h @@ -441,7 +441,8 @@ extern struct ttm_bo_global { * * @driver: Pointer to a struct ttm_bo_driver struct setup by the driver. * @man: An array of mem_type_managers. - * @vma_manager: Address space manager + * @vma_manager: Address space manager (pointer) + * @_vma_manager: Address space manager (enbedded) * lru_lock: Spinlock that protects the buffer+device lru lists and * ddestroy lists. * @dev_mapping: A pointer to the struct address_space representing the @@ -464,7 +465,8 @@ struct ttm_bo_device { /* * Protected by internal locks. */ - struct drm_vma_offset_manager vma_manager; + struct drm_vma_offset_manager *vma_manager; + struct drm_vma_offset_manager _vma_manager; /* * Protected by the global:lru lock. @@ -585,6 +587,7 @@ int ttm_bo_device_release(struct ttm_bo_device *bdev); * @glob: A pointer to an initialized struct ttm_bo_global. * @driver: A pointer to a struct ttm_bo_driver set up by the caller. * @mapping: The address space to use for this bo. + * @vma_manager: A pointer to a vma manager or NULL. * @file_page_offset: Offset into the device address space that is available * for buffer data. This ensures compatibility with other users of the * address space. @@ -596,6 +599,7 @@ int ttm_bo_device_release(struct ttm_bo_device *bdev); int ttm_bo_device_init(struct ttm_bo_device *bdev, struct ttm_bo_driver *driver, struct address_space *mapping, + struct drm_vma_offset_manager *vma_manager, bool need_dma32); /** diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c index fb09314bcfd4..34ee5d725faf 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c @@ -1728,6 +1728,7 @@ int amdgpu_ttm_init(struct amdgpu_device *adev) r = ttm_bo_device_init(&adev->mman.bdev, &amdgpu_bo_driver, adev->ddev->anon_inode->i_mapping, + NULL, adev->need_dma32); if (r) { DRM_ERROR("failed initializing buffer object driver(%d).\n", r); diff --git a/drivers/gpu/drm/drm_vram_mm_helper.c b/drivers/gpu/drm/drm_vram_mm_helper.c index c911781d6728..56fd1519eb35 100644 --- a/drivers/gpu/drm/drm_vram_mm_helper.c +++ b/drivers/gpu/drm/drm_vram_mm_helper.c @@ -172,6 +172,7 @@ int drm_vram_mm_init(struct drm_vram_mm *vmm, struct drm_device *dev, ret = ttm_bo_device_init(&vmm->bdev, &bo_driver, dev->anon_inode->i_mapping, +NULL, true); if (ret) return ret; diff --git a/drivers/gpu/drm/nouveau/nouveau_ttm.c b/drivers/gpu/drm/nouveau/nouveau_ttm.c index f0daf958e03a..e67eb10843d1 100644 --- a/drivers/gpu/drm/nouveau/nouveau_ttm.c +++ b/drivers/gpu/drm/nouveau/nouveau_ttm.c @@ -236,6 +236,7 @@ nouveau_ttm_init(struct nouveau_drm *drm) ret = ttm_bo_device_init(&drm->ttm.bdev, &nouveau_bo_driver, dev->anon_inode->i_mapping, +NULL, drm->client.mmu.dmabits <= 32 ? true : false); if (ret) { NV_ERROR(drm, "error initialising bo driver, %d\n", ret); diff --git a/drivers/gpu/drm/qxl/qxl_ttm.c b/drivers/gpu/drm/qxl/qxl_ttm.c index 9b24514c75aa..69da0eea6e4c 100644 --- a/drivers/gpu/drm/qxl/qxl_ttm.c +++ b/drivers/gpu/drm/qxl/qxl_ttm.c @@ -325,6 +325,7 @@ int qxl_ttm_init(struct qxl_device *qdev) r = ttm_bo_device_init(&qdev->mman.bdev, &qxl_bo_driver, qdev->ddev.anon_inode