Re: Xorg indefinitely hangs in kernelspace

2019-09-05 Thread Hillf Danton


On Tue, 6 Aug 2019 21:00:10 +0300 From:   Jaak Ristioja 
> Hello!
> 
> I'm writing to report a crash in the QXL / DRM code in the Linux kernel.
> I originally filed the issue on LaunchPad and more details can be found
> there, although I doubt whether these details are useful.
> 
>   https://bugs.launchpad.net/ubuntu/+source/linux/+bug/1813620
> 
> I first experienced these issues with:
> 
> * Ubuntu 18.04 (probably kernel 4.15.something)
> * Ubuntu 18.10 (kernel 4.18.0-13)
> * Ubuntu 19.04 (kernel 5.0.0-13-generic)
> * Ubuntu 19.04 (mainline kernel 5.1-rc7)
> * Ubuntu 19.04 (mainline kernel 5.2.0-050200rc1-generic)
> 
> Here is the crash output from dmesg:
> 
> [354073.713350] INFO: task Xorg:920 blocked for more than 120 seconds.
> [354073.717755]   Not tainted 5.2.0-050200rc1-generic #201905191930
> [354073.722277] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs"
> disables this message.
> [354073.738332] XorgD0   920854 0x00404004
> [354073.738334] Call Trace:
> [354073.738340]  __schedule+0x2ba/0x650
> [354073.738342]  schedule+0x2d/0x90
> [354073.738343]  schedule_preempt_disabled+0xe/0x10
> [354073.738345]  __ww_mutex_lock.isra.11+0x3e0/0x750
> [354073.738346]  __ww_mutex_lock_slowpath+0x16/0x20
> [354073.738347]  ww_mutex_lock+0x34/0x50
> [354073.738352]  ttm_eu_reserve_buffers+0x1f9/0x2e0 [ttm]
> [354073.738356]  qxl_release_reserve_list+0x67/0x150 [qxl]
> [354073.738358]  ? qxl_bo_pin+0xaa/0x190 [qxl]
> [354073.738359]  qxl_cursor_atomic_update+0x1b0/0x2e0 [qxl]
> [354073.738367]  drm_atomic_helper_commit_planes+0xb9/0x220 [drm_kms_helper]
> [354073.738371]  drm_atomic_helper_commit_tail+0x2b/0x70 [drm_kms_helper]
> [354073.738374]  commit_tail+0x67/0x70 [drm_kms_helper]
> [354073.738378]  drm_atomic_helper_commit+0x113/0x120 [drm_kms_helper]
> [354073.738390]  drm_atomic_commit+0x4a/0x50 [drm]
> [354073.738394]  drm_atomic_helper_update_plane+0xe9/0x100 [drm_kms_helper]
> [354073.738402]  __setplane_atomic+0xd3/0x120 [drm]
> [354073.738410]  drm_mode_cursor_universal+0x142/0x270 [drm]
> [354073.738418]  drm_mode_cursor_common+0xcb/0x220 [drm]
> [354073.738425]  ? drm_mode_cursor_ioctl+0x60/0x60 [drm]
> [354073.738432]  drm_mode_cursor2_ioctl+0xe/0x10 [drm]
> [354073.738438]  drm_ioctl_kernel+0xb0/0x100 [drm]
> [354073.738440]  ? ___sys_recvmsg+0x16c/0x200
> [354073.738445]  drm_ioctl+0x233/0x410 [drm]
> [354073.738452]  ? drm_mode_cursor_ioctl+0x60/0x60 [drm]
> [354073.738454]  ? timerqueue_add+0x57/0x90
> [354073.738456]  ? enqueue_hrtimer+0x3c/0x90
> [354073.738458]  do_vfs_ioctl+0xa9/0x640
> [354073.738459]  ? fput+0x13/0x20
> [354073.738461]  ? __sys_recvmsg+0x88/0xa0
> [354073.738462]  ksys_ioctl+0x67/0x90
> [354073.738463]  __x64_sys_ioctl+0x1a/0x20
> [354073.738465]  do_syscall_64+0x5a/0x140
> [354073.738467]  entry_SYSCALL_64_after_hwframe+0x44/0xa9
> [354073.738468] RIP: 0033:0x7ffad14d3417
> [354073.738472] Code: Bad RIP value.
> [354073.738472] RSP: 002b:7ffdd5679978 EFLAGS: 3246 ORIG_RAX:
> 0010
> [354073.738473] RAX: ffda RBX: 56428a474610 RCX:
> 7ffad14d3417
> [354073.738474] RDX: 7ffdd56799b0 RSI: c02464bb RDI:
> 000e
> [354073.738474] RBP: 7ffdd56799b0 R08: 0040 R09:
> 0010
> [354073.738475] R10: 003f R11: 3246 R12:
> c02464bb
> [354073.738475] R13: 000e R14:  R15:
> 56428a4721d0
> [354073.738511] INFO: task kworker/1:0:27625 blocked for more than 120 
> seconds.
> [354073.745154]   Not tainted 5.2.0-050200rc1-generic #201905191930
> [354073.751900] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs"
> disables this message.
> [354073.762197] kworker/1:0 D0 27625  2 0x80004000
> [354073.762205] Workqueue: events qxl_client_monitors_config_work_func [qxl]
> [354073.762206] Call Trace:
> [354073.762211]  __schedule+0x2ba/0x650
> [354073.762214]  schedule+0x2d/0x90
> [354073.762215]  schedule_preempt_disabled+0xe/0x10
> [354073.762216]  __ww_mutex_lock.isra.11+0x3e0/0x750
> [354073.762217]  ? __switch_to_asm+0x34/0x70
> [354073.762218]  ? __switch_to_asm+0x40/0x70
> [354073.762219]  ? __switch_to_asm+0x40/0x70
> [354073.762220]  __ww_mutex_lock_slowpath+0x16/0x20
> [354073.762221]  ww_mutex_lock+0x34/0x50
> [354073.762235]  drm_modeset_lock+0x35/0xb0 [drm]
> [354073.762243]  drm_modeset_lock_all_ctx+0x5d/0xe0 [drm]
> [354073.762251]  drm_modeset_lock_all+0x5e/0xb0 [drm]
> [354073.762252]  qxl_display_read_client_monitors_config+0x1e1/0x370 [qxl]
> [354073.762254]  qxl_client_monitors_config_work_func+0x15/0x20 [qxl]
> [354073.762256]  process_one_work+0x20f/0x410
> [354073.762257]  worker_thread+0x34/0x400
> [354073.762259]  kthread+0x120/0x140
> [354073.762260]  ? process_one_work+0x410/0x410
> [354073.762261]  ? __kthread_parkme+0x70/0x70
> [354073.762262]  ret_from_fork+0x35/0x40
> 

--- a/drivers/gpu/drm/ttm/ttm_execbuf_util.c
+++ b/drivers/gpu/drm/ttm/ttm_execbuf_util.c
@@ -97,8 +97

Re: [PATCH v2] drm/virtio: Use vmalloc for command buffer allocations.

2019-09-05 Thread Gerd Hoffmann
> +/* How many bytes left in this page. */
> +static unsigned int rest_of_page(void *data)
> +{
> + return PAGE_SIZE - offset_in_page(data);
> +}

Not needed.

> +/* Create sg_table from a vmalloc'd buffer. */
> +static struct sg_table *vmalloc_to_sgt(char *data, uint32_t size, int 
> *sg_ents)
> +{
> + int nents, ret, s, i;
> + struct sg_table *sgt;
> + struct scatterlist *sg;
> + struct page *pg;
> +
> + *sg_ents = 0;
> +
> + sgt = kmalloc(sizeof(*sgt), GFP_KERNEL);
> + if (!sgt)
> + return NULL;
> +
> + nents = DIV_ROUND_UP(size, PAGE_SIZE) + 1;

Why +1?

> + ret = sg_alloc_table(sgt, nents, GFP_KERNEL);
> + if (ret) {
> + kfree(sgt);
> + return NULL;
> + }
> +
> + for_each_sg(sgt->sgl, sg, nents, i) {
> + pg = vmalloc_to_page(data);
> + if (!pg) {
> + sg_free_table(sgt);
> + kfree(sgt);
> + return NULL;
> + }
> +
> + s = rest_of_page(data);
> + if (s > size)
> + s = size;

vmalloc memory is page aligned, so:

s = min(PAGE_SIZE, size);

> + sg_set_page(sg, pg, s, offset_in_page(data));

Offset is always zero.

> +
> + size -= s;
> + data += s;
> + *sg_ents += 1;

sg_ents isn't used anywhere.

> +
> + if (size) {
> + sg_unmark_end(sg);
> + } else {
> + sg_mark_end(sg);
> + break;
> + }

That looks a bit strange.  I guess you need only one of the two because
the other is the default?

>  static int virtio_gpu_queue_fenced_ctrl_buffer(struct virtio_gpu_device 
> *vgdev,
>  struct virtio_gpu_vbuffer *vbuf,
>  struct virtio_gpu_ctrl_hdr *hdr,
>  struct virtio_gpu_fence *fence)
>  {
>   struct virtqueue *vq = vgdev->ctrlq.vq;
> + struct scatterlist *vout = NULL, sg;
> + struct sg_table *sgt = NULL;
>   int rc;
> + int outcnt = 0;
> +
> + if (vbuf->data_size) {
> + if (is_vmalloc_addr(vbuf->data_buf)) {
> + sgt = vmalloc_to_sgt(vbuf->data_buf, vbuf->data_size,
> +  &outcnt);
> + if (!sgt)
> + return -ENOMEM;
> + vout = sgt->sgl;
> + } else {
> + sg_init_one(&sg, vbuf->data_buf, vbuf->data_size);
> + vout = &sg;
> + outcnt = 1;

outcnt must be set in both cases.

> +static int virtio_gpu_queue_ctrl_buffer(struct virtio_gpu_device *vgdev,
> + struct virtio_gpu_vbuffer *vbuf)
> +{
> + return virtio_gpu_queue_fenced_ctrl_buffer(vgdev, vbuf, NULL, NULL);
> +}

Changing virtio_gpu_queue_ctrl_buffer to call
virtio_gpu_queue_fenced_ctrl_buffer should be done in a separate patch.

cheers,
  Gerd

___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization


Re: [RFC PATCH] virtio_ring: Use DMA API if guest memory is encrypted

2019-09-05 Thread Michael S. Tsirkin
On Mon, Aug 12, 2019 at 02:15:32PM +0200, Christoph Hellwig wrote:
> On Sun, Aug 11, 2019 at 04:55:27AM -0400, Michael S. Tsirkin wrote:
> > On Sun, Aug 11, 2019 at 07:56:07AM +0200, Christoph Hellwig wrote:
> > > So we need a flag on the virtio device, exposed by the
> > > hypervisor (or hardware for hw virtio devices) that says:  hey, I'm real,
> > > don't take a shortcut.
> > 
> > The point here is that it's actually still not real. So we would still
> > use a physical address. However Linux decides that it wants extra
> > security by moving all data through the bounce buffer.  The distinction
> > made is that one can actually give device a physical address of the
> > bounce buffer.
> 
> Sure.  The problem is just that you keep piling hacks on top of hacks.
> We need the per-device flag anyway to properly support hardware virtio
> device in all circumstances.  Instead of coming up with another ad-hoc
> hack to force DMA uses implement that one proper bit and reuse it here.

The flag that you mention literally means "I am a real device" so for
example, you can use VFIO with it. And this device isn't a real one,
and you can't use VFIO with it, even though it's part of a power
system which always has an IOMMU.



Or here's another way to put it: we have a broken device that can only
access physical addresses, not DMA addresses. But to enable SEV Linux
requires DMA API.  So we can still make it work if DMA address happens
to be a physical address (not necessarily of the same page).


This is where dma_addr_is_a_phys_addr() is coming from: it tells us this
weird configuration can still work.  What are we going to do for SEV if
dma_addr_is_a_phys_addr does not apply? Fail probe I guess.


So the proposal is really to make things safe and to this end,
to add this in probe:

if (sev_active() &&
!dma_addr_is_a_phys_addr(dev) &&
!virtio_has_feature(vdev, VIRTIO_F_IOMMU_PLATFORM))
return -EINVAL;


the point being to prevent loading driver where it would
corrupt guest memory. Put this way, any objections to adding
dma_addr_is_a_phys_addr to the DMA API?





-- 
MST
___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization


Re: [PATCH 2/2] vhost: re-introducing metadata acceleration through kernel virtual address

2019-09-05 Thread Hillf Danton


On Thu,  5 Sep 2019 20:27:36 +0800 From:   Jason Wang 
> 
> +static void vhost_set_map_dirty(struct vhost_virtqueue *vq,
> + struct vhost_map *map, int index)
> +{
> + struct vhost_uaddr *uaddr = &vq->uaddrs[index];
> + int i;
> +
> + if (uaddr->write) {
> + for (i = 0; i < map->npages; i++)
> + set_page_dirty(map->pages[i]);
> + }

Not sure need to set page dirty under page lock.


___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization


[PATCH v2] drm/virtio: Use vmalloc for command buffer allocations.

2019-09-05 Thread David Riley
Userspace requested command buffer allocations could be too large
to make as a contiguous allocation.  Use vmalloc if necessary to
satisfy those allocations.

Signed-off-by: David Riley 
---
 drivers/gpu/drm/virtio/virtgpu_ioctl.c |   4 +-
 drivers/gpu/drm/virtio/virtgpu_vq.c| 114 -
 2 files changed, 96 insertions(+), 22 deletions(-)

diff --git a/drivers/gpu/drm/virtio/virtgpu_ioctl.c 
b/drivers/gpu/drm/virtio/virtgpu_ioctl.c
index ac60be9b5c19..a8732a8af766 100644
--- a/drivers/gpu/drm/virtio/virtgpu_ioctl.c
+++ b/drivers/gpu/drm/virtio/virtgpu_ioctl.c
@@ -195,7 +195,7 @@ static int virtio_gpu_execbuffer_ioctl(struct drm_device 
*dev, void *data,
if (ret)
goto out_free;
 
-   buf = memdup_user(u64_to_user_ptr(exbuf->command), exbuf->size);
+   buf = vmemdup_user(u64_to_user_ptr(exbuf->command), exbuf->size);
if (IS_ERR(buf)) {
ret = PTR_ERR(buf);
goto out_unresv;
@@ -230,7 +230,7 @@ static int virtio_gpu_execbuffer_ioctl(struct drm_device 
*dev, void *data,
return 0;
 
 out_memdup:
-   kfree(buf);
+   kvfree(buf);
 out_unresv:
ttm_eu_backoff_reservation(&ticket, &validate_list);
 out_free:
diff --git a/drivers/gpu/drm/virtio/virtgpu_vq.c 
b/drivers/gpu/drm/virtio/virtgpu_vq.c
index 981ee16e3ee9..3ec89ae8478c 100644
--- a/drivers/gpu/drm/virtio/virtgpu_vq.c
+++ b/drivers/gpu/drm/virtio/virtgpu_vq.c
@@ -154,7 +154,7 @@ static void free_vbuf(struct virtio_gpu_device *vgdev,
 {
if (vbuf->resp_size > MAX_INLINE_RESP_SIZE)
kfree(vbuf->resp_buf);
-   kfree(vbuf->data_buf);
+   kvfree(vbuf->data_buf);
kmem_cache_free(vgdev->vbufs, vbuf);
 }
 
@@ -251,13 +251,70 @@ void virtio_gpu_dequeue_cursor_func(struct work_struct 
*work)
wake_up(&vgdev->cursorq.ack_queue);
 }
 
+/* How many bytes left in this page. */
+static unsigned int rest_of_page(void *data)
+{
+   return PAGE_SIZE - offset_in_page(data);
+}
+
+/* Create sg_table from a vmalloc'd buffer. */
+static struct sg_table *vmalloc_to_sgt(char *data, uint32_t size, int *sg_ents)
+{
+   int nents, ret, s, i;
+   struct sg_table *sgt;
+   struct scatterlist *sg;
+   struct page *pg;
+
+   *sg_ents = 0;
+
+   sgt = kmalloc(sizeof(*sgt), GFP_KERNEL);
+   if (!sgt)
+   return NULL;
+
+   nents = DIV_ROUND_UP(size, PAGE_SIZE) + 1;
+   ret = sg_alloc_table(sgt, nents, GFP_KERNEL);
+   if (ret) {
+   kfree(sgt);
+   return NULL;
+   }
+
+   for_each_sg(sgt->sgl, sg, nents, i) {
+   pg = vmalloc_to_page(data);
+   if (!pg) {
+   sg_free_table(sgt);
+   kfree(sgt);
+   return NULL;
+   }
+
+   s = rest_of_page(data);
+   if (s > size)
+   s = size;
+
+   sg_set_page(sg, pg, s, offset_in_page(data));
+
+   size -= s;
+   data += s;
+   *sg_ents += 1;
+
+   if (size) {
+   sg_unmark_end(sg);
+   } else {
+   sg_mark_end(sg);
+   break;
+   }
+   }
+
+   return sgt;
+}
+
 static int virtio_gpu_queue_ctrl_buffer_locked(struct virtio_gpu_device *vgdev,
-  struct virtio_gpu_vbuffer *vbuf)
+  struct virtio_gpu_vbuffer *vbuf,
+  struct scatterlist *vout)
__releases(&vgdev->ctrlq.qlock)
__acquires(&vgdev->ctrlq.qlock)
 {
struct virtqueue *vq = vgdev->ctrlq.vq;
-   struct scatterlist *sgs[3], vcmd, vout, vresp;
+   struct scatterlist *sgs[3], vcmd, vresp;
int outcnt = 0, incnt = 0;
int ret;
 
@@ -268,9 +325,8 @@ static int virtio_gpu_queue_ctrl_buffer_locked(struct 
virtio_gpu_device *vgdev,
sgs[outcnt + incnt] = &vcmd;
outcnt++;
 
-   if (vbuf->data_size) {
-   sg_init_one(&vout, vbuf->data_buf, vbuf->data_size);
-   sgs[outcnt + incnt] = &vout;
+   if (vout) {
+   sgs[outcnt + incnt] = vout;
outcnt++;
}
 
@@ -299,24 +355,30 @@ static int virtio_gpu_queue_ctrl_buffer_locked(struct 
virtio_gpu_device *vgdev,
return ret;
 }
 
-static int virtio_gpu_queue_ctrl_buffer(struct virtio_gpu_device *vgdev,
-   struct virtio_gpu_vbuffer *vbuf)
-{
-   int rc;
-
-   spin_lock(&vgdev->ctrlq.qlock);
-   rc = virtio_gpu_queue_ctrl_buffer_locked(vgdev, vbuf);
-   spin_unlock(&vgdev->ctrlq.qlock);
-   return rc;
-}
-
 static int virtio_gpu_queue_fenced_ctrl_buffer(struct virtio_gpu_device *vgdev,
   struct virtio_gpu_vbuffer *vbuf,

[PATCH 16/18] virtiofs: Use virtio_fs_mutex for races w.r.t ->remove and mount path

2019-09-05 Thread Vivek Goyal
It is possible that a mount is in progress and device is being removed at
the same time. Use virtio_fs_mutex to avoid races.

This also takes care of bunch of races and removes some TODO items.

Signed-off-by: Vivek Goyal 
---
 fs/fuse/virtio_fs.c | 32 ++--
 1 file changed, 22 insertions(+), 10 deletions(-)

diff --git a/fs/fuse/virtio_fs.c b/fs/fuse/virtio_fs.c
index 29ec2f5bbbe2..c483482185b6 100644
--- a/fs/fuse/virtio_fs.c
+++ b/fs/fuse/virtio_fs.c
@@ -13,7 +13,9 @@
 #include 
 #include "fuse_i.h"
 
-/* List of virtio-fs device instances and a lock for the list */
+/* List of virtio-fs device instances and a lock for the list. Also provides
+ * mutual exclusion in device removal and mounting path
+ */
 static DEFINE_MUTEX(virtio_fs_mutex);
 static LIST_HEAD(virtio_fs_instances);
 
@@ -72,17 +74,19 @@ static void release_virtiofs_obj(struct kref *ref)
kfree(vfs);
 }
 
+/* Make sure virtiofs_mutex is held */
 static void virtiofs_put(struct virtio_fs *fs)
 {
-   mutex_lock(&virtio_fs_mutex);
kref_put(&fs->refcount, release_virtiofs_obj);
-   mutex_unlock(&virtio_fs_mutex);
 }
 
 static void virtio_fs_put(struct fuse_iqueue *fiq)
 {
struct virtio_fs *vfs = fiq->priv;
+
+   mutex_lock(&virtio_fs_mutex);
virtiofs_put(vfs);
+   mutex_unlock(&virtio_fs_mutex);
 }
 
 static void virtio_fs_drain_queue(struct virtio_fs_vq *fsvq)
@@ -596,9 +600,8 @@ static void virtio_fs_remove(struct virtio_device *vdev)
struct virtio_fs *fs = vdev->priv;
 
mutex_lock(&virtio_fs_mutex);
+   /* This device is going away. No one should get new reference */
list_del_init(&fs->list);
-   mutex_unlock(&virtio_fs_mutex);
-
virtio_fs_stop_all_queues(fs);
virtio_fs_drain_all_queues(fs);
vdev->config->reset(vdev);
@@ -607,6 +610,7 @@ static void virtio_fs_remove(struct virtio_device *vdev)
vdev->priv = NULL;
/* Put device reference on virtio_fs object */
virtiofs_put(fs);
+   mutex_unlock(&virtio_fs_mutex);
 }
 
 #ifdef CONFIG_PM_SLEEP
@@ -978,10 +982,15 @@ static int virtio_fs_fill_super(struct super_block *sb)
.no_force_umount = true,
};
 
-   /* TODO lock */
-   if (fs->vqs[VQ_REQUEST].fud) {
-   pr_err("virtio-fs: device already in use\n");
-   err = -EBUSY;
+   mutex_lock(&virtio_fs_mutex);
+
+   /* After holding mutex, make sure virtiofs device is still there.
+* Though we are holding a refernce to it, drive ->remove might
+* still have cleaned up virtual queues. In that case bail out.
+*/
+   err = -EINVAL;
+   if (list_empty(&fs->list)) {
+   pr_info("virtio-fs: tag <%s> not found\n", fs->tag);
goto err;
}
 
@@ -1007,7 +1016,6 @@ static int virtio_fs_fill_super(struct super_block *sb)
 
fc = fs->vqs[VQ_REQUEST].fud->fc;
 
-   /* TODO take fuse_mutex around this loop? */
for (i = 0; i < fs->nvqs; i++) {
struct virtio_fs_vq *fsvq = &fs->vqs[i];
 
@@ -1020,6 +1028,7 @@ static int virtio_fs_fill_super(struct super_block *sb)
/* Previous unmount will stop all queues. Start these again */
virtio_fs_start_all_queues(fs);
fuse_send_init(fc, init_req);
+   mutex_unlock(&virtio_fs_mutex);
return 0;
 
 err_free_init_req:
@@ -1027,6 +1036,7 @@ static int virtio_fs_fill_super(struct super_block *sb)
 err_free_fuse_devs:
virtio_fs_free_devs(fs);
 err:
+   mutex_unlock(&virtio_fs_mutex);
return err;
 }
 
@@ -1100,7 +1110,9 @@ static int virtio_fs_get_tree(struct fs_context *fsc)
 
fc = kzalloc(sizeof(struct fuse_conn), GFP_KERNEL);
if (!fc) {
+   mutex_lock(&virtio_fs_mutex);
virtiofs_put(fs);
+   mutex_unlock(&virtio_fs_mutex);
return -ENOMEM;
}
 
-- 
2.20.1

___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization


[PATCH 05/18] Maintain count of in flight requests for VQ_REQUEST queue

2019-09-05 Thread Vivek Goyal
As of now we maintain this count only for VQ_HIPRIO. Maintain it for
VQ_REQUEST as well so that later it can be used to drain VQ_REQUEST
queue.

Signed-off-by: Vivek Goyal 
---
 fs/fuse/virtio_fs.c | 4 
 1 file changed, 4 insertions(+)

diff --git a/fs/fuse/virtio_fs.c b/fs/fuse/virtio_fs.c
index c46dd4d284d6..5df97dfee37d 100644
--- a/fs/fuse/virtio_fs.c
+++ b/fs/fuse/virtio_fs.c
@@ -360,6 +360,9 @@ static void virtio_fs_requests_done_work(struct work_struct 
*work)
spin_unlock(&fpq->lock);
 
fuse_request_end(fc, req);
+   spin_lock(&fsvq->lock);
+   fsvq->in_flight--;
+   spin_unlock(&fsvq->lock);
}
 }
 
@@ -769,6 +772,7 @@ static int virtio_fs_enqueue_req(struct virtio_fs_vq *fsvq,
goto out;
}
 
+   fsvq->in_flight++;
notify = virtqueue_kick_prepare(vq);
 
spin_unlock(&fsvq->lock);
-- 
2.20.1

___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization


[PATCH 18/18] virtiofs: Remove TODO item from virtio_fs_free_devs()

2019-09-05 Thread Vivek Goyal
virtio_fs_free_devs() is now called from ->kill_sb(). By this time
all device queues have been quiesced. I am assuming that while
->kill_sb() is in progress, another mount instance will wait for
it to finish (sb->s_umount mutex provides mutual exclusion).

W.r.t ->remove path, we should be fine as we are not touching vdev
or virtqueues. And we have reference on virtio_fs object, so we know
rest of the data structures are valid.

So I can't see the need of any additional locking yet.

Signed-off-by: Vivek Goyal 
---
 fs/fuse/virtio_fs.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/fs/fuse/virtio_fs.c b/fs/fuse/virtio_fs.c
index eadaea6eb8e2..61aa3eba7b22 100644
--- a/fs/fuse/virtio_fs.c
+++ b/fs/fuse/virtio_fs.c
@@ -200,8 +200,6 @@ static void virtio_fs_free_devs(struct virtio_fs *fs)
 {
unsigned int i;
 
-   /* TODO lock */
-
for (i = 0; i < fs->nvqs; i++) {
struct virtio_fs_vq *fsvq = &fs->vqs[i];
 
-- 
2.20.1

___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization


[PATCH 13/18] virtiofs: Do not access virtqueue in request submission path

2019-09-05 Thread Vivek Goyal
In request submission path it is possible that virtqueue is already gone
due to driver->remove(). So do not access it in dev_dbg(). Use pr_debug()
instead.

If virtuqueue is gone, this will result in NULL pointer deference.

Signed-off-by: Vivek Goyal 
---
 fs/fuse/virtio_fs.c | 8 
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/fs/fuse/virtio_fs.c b/fs/fuse/virtio_fs.c
index 40259368a6bd..01bbf2c0e144 100644
--- a/fs/fuse/virtio_fs.c
+++ b/fs/fuse/virtio_fs.c
@@ -888,10 +888,10 @@ __releases(fiq->waitq.lock)
fs = fiq->priv;
fc = fs->vqs[queue_id].fud->fc;
 
-   dev_dbg(&fs->vqs[queue_id].vq->vdev->dev,
-   "%s: opcode %u unique %#llx nodeid %#llx in.len %u out.len 
%u\n",
-   __func__, req->in.h.opcode, req->in.h.unique, req->in.h.nodeid,
-   req->in.h.len, fuse_len_args(req->out.numargs, req->out.args));
+   pr_debug("%s: opcode %u unique %#llx nodeid %#llx in.len %u out.len %u"
+"\n", __func__, req->in.h.opcode, req->in.h.unique,
+req->in.h.nodeid, req->in.h.len,
+fuse_len_args(req->out.numargs, req->out.args));
 
fpq = &fs->vqs[queue_id].fud->pq;
spin_lock(&fpq->lock);
-- 
2.20.1

___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization


[PATCH 09/18] virtiofs: Add an helper to start all the queues

2019-09-05 Thread Vivek Goyal
This just marks are the queues are connected and ready to accept the
request.

Signed-off-by: Vivek Goyal 
---
 fs/fuse/virtio_fs.c | 19 ---
 1 file changed, 16 insertions(+), 3 deletions(-)

diff --git a/fs/fuse/virtio_fs.c b/fs/fuse/virtio_fs.c
index d5730a50b303..f2936daca39c 100644
--- a/fs/fuse/virtio_fs.c
+++ b/fs/fuse/virtio_fs.c
@@ -112,6 +112,19 @@ static void virtio_fs_drain_all_queues(struct virtio_fs 
*fs)
}
 }
 
+static void virtio_fs_start_all_queues(struct virtio_fs *fs)
+{
+   struct virtio_fs_vq *fsvq;
+   int i;
+
+   for (i = 0; i < fs->nvqs; i++) {
+   fsvq = &fs->vqs[i];
+   spin_lock(&fsvq->lock);
+   fsvq->connected = true;
+   spin_unlock(&fsvq->lock);
+   }
+}
+
 /* Add a new instance to the list or return -EEXIST if tag name exists*/
 static int virtio_fs_add_instance(struct virtio_fs *fs)
 {
@@ -483,10 +496,10 @@ static int virtio_fs_setup_vqs(struct virtio_device *vdev,
if (ret < 0)
goto out;
 
-   for (i = 0; i < fs->nvqs; i++) {
+   for (i = 0; i < fs->nvqs; i++)
fs->vqs[i].vq = vqs[i];
-   fs->vqs[i].connected = true;
-   }
+
+   virtio_fs_start_all_queues(fs);
 out:
kfree(names);
kfree(callbacks);
-- 
2.20.1

___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization


[PATCH 10/18] virtiofs: Do not use device managed mem for virtio_fs and virtio_fs_vq

2019-09-05 Thread Vivek Goyal
These data structures should go away when virtio_fs object is going away.
When deivce is going away, we need to just make sure virtqueues can go
away and after that none of the code accesses vq and all the requests
get error.

So allocate memory for virtio_fs and virtio_fs_vq normally and free it
at right time.

This patch still frees up memory during device remove time. A later patch
will make virtio_fs object reference counted and this memory will be
freed when last reference to object is dropped.

Signed-off-by: Vivek Goyal 
---
 fs/fuse/virtio_fs.c | 13 -
 1 file changed, 8 insertions(+), 5 deletions(-)

diff --git a/fs/fuse/virtio_fs.c b/fs/fuse/virtio_fs.c
index f2936daca39c..1ea0f889e804 100644
--- a/fs/fuse/virtio_fs.c
+++ b/fs/fuse/virtio_fs.c
@@ -446,7 +446,7 @@ static int virtio_fs_setup_vqs(struct virtio_device *vdev,
vq_callback_t **callbacks;
const char **names;
unsigned int i;
-   int ret;
+   int ret = 0;
 
virtio_cread(vdev, struct virtio_fs_config, num_queues,
 &fs->num_queues);
@@ -454,9 +454,7 @@ static int virtio_fs_setup_vqs(struct virtio_device *vdev,
return -EINVAL;
 
fs->nvqs = 1 + fs->num_queues;
-
-   fs->vqs = devm_kcalloc(&vdev->dev, fs->nvqs,
-   sizeof(fs->vqs[VQ_HIPRIO]), GFP_KERNEL);
+   fs->vqs = kcalloc(fs->nvqs, sizeof(fs->vqs[VQ_HIPRIO]), GFP_KERNEL);
if (!fs->vqs)
return -ENOMEM;
 
@@ -504,6 +502,8 @@ static int virtio_fs_setup_vqs(struct virtio_device *vdev,
kfree(names);
kfree(callbacks);
kfree(vqs);
+   if (ret)
+   kfree(fs->vqs);
return ret;
 }
 
@@ -519,7 +519,7 @@ static int virtio_fs_probe(struct virtio_device *vdev)
struct virtio_fs *fs;
int ret;
 
-   fs = devm_kzalloc(&vdev->dev, sizeof(*fs), GFP_KERNEL);
+   fs = kzalloc(sizeof(*fs), GFP_KERNEL);
if (!fs)
return -ENOMEM;
vdev->priv = fs;
@@ -552,6 +552,7 @@ static int virtio_fs_probe(struct virtio_device *vdev)
 
 out:
vdev->priv = NULL;
+   kfree(fs);
return ret;
 }
 
@@ -582,6 +583,8 @@ static void virtio_fs_remove(struct virtio_device *vdev)
mutex_unlock(&virtio_fs_mutex);
 
vdev->priv = NULL;
+   kfree(fs->vqs);
+   kfree(fs);
 }
 
 #ifdef CONFIG_PM_SLEEP
-- 
2.20.1

___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization


[PATCH 17/18] virtiofs: Remove TODO to quiesce/end_requests

2019-09-05 Thread Vivek Goyal
We now stop queues and drain all the pending requests from all virtqueues.
So this is not a TODO anymore.

Got rid of incrementing fc->dev_count as well. It did not seem meaningful
for virtio_fs.

Signed-off-by: Vivek Goyal 
---
 fs/fuse/virtio_fs.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/fs/fuse/virtio_fs.c b/fs/fuse/virtio_fs.c
index c483482185b6..eadaea6eb8e2 100644
--- a/fs/fuse/virtio_fs.c
+++ b/fs/fuse/virtio_fs.c
@@ -208,7 +208,6 @@ static void virtio_fs_free_devs(struct virtio_fs *fs)
if (!fsvq->fud)
continue;
 
-   /* TODO need to quiesce/end_requests/decrement dev_count */
fuse_dev_free(fsvq->fud);
fsvq->fud = NULL;
}
@@ -1022,7 +1021,6 @@ static int virtio_fs_fill_super(struct super_block *sb)
if (i == VQ_REQUEST)
continue; /* already initialized */
fuse_dev_install(fsvq->fud, fc);
-   atomic_inc(&fc->dev_count);
}
 
/* Previous unmount will stop all queues. Start these again */
-- 
2.20.1

___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization


[PATCH 11/18] virtiofs: stop and drain queues after sending DESTROY

2019-09-05 Thread Vivek Goyal
During virtio_kill_sb() we first stop forget queue and drain it and then
call fuse_kill_sb_anon(). This will result in sending DESTROY request to
fuse server. Once finished, stop all the queues and drain one more time
just to be sure and then free up the devices.

Given drain queues will call flush_work() on various workers, remove this
logic from virtio_free_devs().

Signed-off-by: Vivek Goyal 
---
 fs/fuse/virtio_fs.c | 11 ---
 1 file changed, 8 insertions(+), 3 deletions(-)

diff --git a/fs/fuse/virtio_fs.c b/fs/fuse/virtio_fs.c
index 1ea0f889e804..a76bd5a04521 100644
--- a/fs/fuse/virtio_fs.c
+++ b/fs/fuse/virtio_fs.c
@@ -180,9 +180,6 @@ static void virtio_fs_free_devs(struct virtio_fs *fs)
if (!fsvq->fud)
continue;
 
-   flush_work(&fsvq->done_work);
-   flush_delayed_work(&fsvq->dispatch_work);
-
/* TODO need to quiesce/end_requests/decrement dev_count */
fuse_dev_free(fsvq->fud);
fsvq->fud = NULL;
@@ -994,6 +991,8 @@ static int virtio_fs_fill_super(struct super_block *sb)
atomic_inc(&fc->dev_count);
}
 
+   /* Previous unmount will stop all queues. Start these again */
+   virtio_fs_start_all_queues(fs);
fuse_send_init(fc, init_req);
return 0;
 
@@ -1026,6 +1025,12 @@ static void virtio_kill_sb(struct super_block *sb)
virtio_fs_drain_all_queues(vfs);
 
fuse_kill_sb_anon(sb);
+
+   /* fuse_kill_sb_anon() must have sent destroy. Stop all queues
+* and drain one more time and free fuse devices.
+*/
+   virtio_fs_stop_all_queues(vfs);
+   virtio_fs_drain_all_queues(vfs);
virtio_fs_free_devs(vfs);
 }
 
-- 
2.20.1

___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization


[PATCH 15/18] virtiofs: Make virtio_fs object refcounted

2019-09-05 Thread Vivek Goyal
This object is used both by fuse_connection as well virt device. So make
this object reference counted and that makes it easy to define life cycle
of the object.

Now deivce can be removed while filesystem is still mounted. This will
cleanup all the virtqueues but virtio_fs object will still be around and
will be cleaned when filesystem is unmounted and sb/fc drops its reference.

Removing a device also stops all virt queues and any new reuqest gets
error -ENOTCONN. All existing in flight requests are drained before
->remove returns.

Signed-off-by: Vivek Goyal 
---
 fs/fuse/virtio_fs.c | 52 +
 1 file changed, 43 insertions(+), 9 deletions(-)

diff --git a/fs/fuse/virtio_fs.c b/fs/fuse/virtio_fs.c
index 01bbf2c0e144..29ec2f5bbbe2 100644
--- a/fs/fuse/virtio_fs.c
+++ b/fs/fuse/virtio_fs.c
@@ -37,6 +37,7 @@ struct virtio_fs_vq {
 
 /* A virtio-fs device instance */
 struct virtio_fs {
+   struct kref refcount;
struct list_head list;/* on virtio_fs_instances */
char *tag;
struct virtio_fs_vq *vqs;
@@ -63,6 +64,27 @@ static inline struct fuse_pqueue *vq_to_fpq(struct virtqueue 
*vq)
return &vq_to_fsvq(vq)->fud->pq;
 }
 
+static void release_virtiofs_obj(struct kref *ref)
+{
+   struct virtio_fs *vfs = container_of(ref, struct virtio_fs, refcount);
+
+   kfree(vfs->vqs);
+   kfree(vfs);
+}
+
+static void virtiofs_put(struct virtio_fs *fs)
+{
+   mutex_lock(&virtio_fs_mutex);
+   kref_put(&fs->refcount, release_virtiofs_obj);
+   mutex_unlock(&virtio_fs_mutex);
+}
+
+static void virtio_fs_put(struct fuse_iqueue *fiq)
+{
+   struct virtio_fs *vfs = fiq->priv;
+   virtiofs_put(vfs);
+}
+
 static void virtio_fs_drain_queue(struct virtio_fs_vq *fsvq)
 {
WARN_ON(fsvq->in_flight < 0);
@@ -156,8 +178,10 @@ static struct virtio_fs *virtio_fs_find_instance(const 
char *tag)
mutex_lock(&virtio_fs_mutex);
 
list_for_each_entry(fs, &virtio_fs_instances, list) {
-   if (strcmp(fs->tag, tag) == 0)
+   if (strcmp(fs->tag, tag) == 0) {
+   kref_get(&fs->refcount);
goto found;
+   }
}
 
fs = NULL; /* not found */
@@ -519,6 +543,7 @@ static int virtio_fs_probe(struct virtio_device *vdev)
fs = kzalloc(sizeof(*fs), GFP_KERNEL);
if (!fs)
return -ENOMEM;
+   kref_init(&fs->refcount);
vdev->priv = fs;
 
ret = virtio_fs_read_tag(vdev, fs);
@@ -570,18 +595,18 @@ static void virtio_fs_remove(struct virtio_device *vdev)
 {
struct virtio_fs *fs = vdev->priv;
 
+   mutex_lock(&virtio_fs_mutex);
+   list_del_init(&fs->list);
+   mutex_unlock(&virtio_fs_mutex);
+
virtio_fs_stop_all_queues(fs);
virtio_fs_drain_all_queues(fs);
vdev->config->reset(vdev);
virtio_fs_cleanup_vqs(vdev, fs);
 
-   mutex_lock(&virtio_fs_mutex);
-   list_del(&fs->list);
-   mutex_unlock(&virtio_fs_mutex);
-
vdev->priv = NULL;
-   kfree(fs->vqs);
-   kfree(fs);
+   /* Put device reference on virtio_fs object */
+   virtiofs_put(fs);
 }
 
 #ifdef CONFIG_PM_SLEEP
@@ -932,6 +957,7 @@ const static struct fuse_iqueue_ops virtio_fs_fiq_ops = {
.wake_forget_and_unlock = virtio_fs_wake_forget_and_unlock,
.wake_interrupt_and_unlock  = virtio_fs_wake_interrupt_and_unlock,
.wake_pending_and_unlock= virtio_fs_wake_pending_and_unlock,
+   .put= virtio_fs_put,
 };
 
 static int virtio_fs_fill_super(struct super_block *sb)
@@ -1026,7 +1052,9 @@ static void virtio_kill_sb(struct super_block *sb)
fuse_kill_sb_anon(sb);
 
/* fuse_kill_sb_anon() must have sent destroy. Stop all queues
-* and drain one more time and free fuse devices.
+* and drain one more time and free fuse devices. Freeing fuse
+* devices will drop their reference on fuse_conn and that in
+* turn will drop its reference on virtio_fs object.
 */
virtio_fs_stop_all_queues(vfs);
virtio_fs_drain_all_queues(vfs);
@@ -1060,6 +1088,10 @@ static int virtio_fs_get_tree(struct fs_context *fsc)
struct fuse_conn *fc;
int err;
 
+   /* This gets a reference on virtio_fs object. This ptr gets installed
+* in fc->iq->priv. Once fuse_conn is going away, it calls ->put()
+* to drop the reference to this object.
+*/
fs = virtio_fs_find_instance(fsc->source);
if (!fs) {
pr_info("virtio-fs: tag <%s> not found\n", fsc->source);
@@ -1067,8 +1099,10 @@ static int virtio_fs_get_tree(struct fs_context *fsc)
}
 
fc = kzalloc(sizeof(struct fuse_conn), GFP_KERNEL);
-   if (!fc)
+   if (!fc) {
+   virtiofs_put(fs);
return -ENOMEM;
+   }
 
fuse_conn_init(fc, get_user_ns(current_user_ns()), &virtio_f

[PATCH 00/18] virtiofs: Fix various races and cleanups round 1

2019-09-05 Thread Vivek Goyal
Hi,

Michael Tsirkin pointed out issues w.r.t various locking related TODO
items and races w.r.t device removal. 

In this first round of cleanups, I have taken care of most pressing
issues.

These patches apply on top of following.

git://git.kernel.org/pub/scm/linux/kernel/git/mszeredi/fuse.git#virtiofs-v4

I have tested these patches with mount/umount and device removal using
qemu monitor. For example.

virsh qemu-monitor-command --hmp vm9-f28 device_del myvirtiofs

Now a mounted device can be removed and file system will return errors
-ENOTCONN and one can unmount it.

Miklos, if you are fine with the patches, I am fine if you fold these
all into existing patch. I kept them separate so that review is easier.

Any feedback or comments are welcome.

Thanks
Vivek


Vivek Goyal (18):
  virtiofs: Remove request from processing list before calling end
  virtiofs: Check whether hiprio queue is connected at submission time
  virtiofs: Pass fsvq instead of vq as parameter to
virtio_fs_enqueue_req
  virtiofs: Check connected state for VQ_REQUEST queue as well
  Maintain count of in flight requests for VQ_REQUEST queue
  virtiofs: ->remove should not clean virtiofs fuse devices
  virtiofs: Stop virtiofs queues when device is being removed
  virtiofs: Drain all pending requests during ->remove time
  virtiofs: Add an helper to start all the queues
  virtiofs: Do not use device managed mem for virtio_fs and virtio_fs_vq
  virtiofs: stop and drain queues after sending DESTROY
  virtiofs: Use virtio_fs_free_devs() in error path
  virtiofs: Do not access virtqueue in request submission path
  virtiofs: Add a fuse_iqueue operation to put() reference
  virtiofs: Make virtio_fs object refcounted
  virtiofs: Use virtio_fs_mutex for races w.r.t ->remove and mount path
  virtiofs: Remove TODO to quiesce/end_requests
  virtiofs: Remove TODO item from virtio_fs_free_devs()

 fs/fuse/fuse_i.h|   5 +
 fs/fuse/inode.c |   6 +
 fs/fuse/virtio_fs.c | 259 
 3 files changed, 198 insertions(+), 72 deletions(-)

-- 
2.20.1

___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization


[PATCH 01/18] virtiofs: Remove request from processing list before calling end

2019-09-05 Thread Vivek Goyal
In error path we are calling fuse_request_end() but we need to clear
FR_SENT bit as well as remove request from processing queue. Otherwise
fuse_request_end() triggers a warning as well as other issues show up.

Signed-off-by: Vivek Goyal 
---
 fs/fuse/virtio_fs.c | 4 
 1 file changed, 4 insertions(+)

diff --git a/fs/fuse/virtio_fs.c b/fs/fuse/virtio_fs.c
index 197e79e536f9..a708ccb65662 100644
--- a/fs/fuse/virtio_fs.c
+++ b/fs/fuse/virtio_fs.c
@@ -826,6 +826,10 @@ __releases(fiq->waitq.lock)
}
req->out.h.error = ret;
pr_err("virtio-fs: virtio_fs_enqueue_req() failed %d\n", ret);
+   spin_lock(&fpq->lock);
+   clear_bit(FR_SENT, &req->flags);
+   list_del_init(&req->list);
+   spin_unlock(&fpq->lock);
fuse_request_end(fc, req);
return;
}
-- 
2.20.1

___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization


[PATCH 02/18] virtiofs: Check whether hiprio queue is connected at submission time

2019-09-05 Thread Vivek Goyal
For hiprio queue (forget requests), we are keeping a state in queue whether
queue is connected or not. If queue is not connected, do not try to submit
request and return error instead.

As of now, we are checking for this state only in path where worker tries
to submit forget after first attempt failed. Check this state even in
the path when request is being submitted first time.

Signed-off-by: Vivek Goyal 
---
 fs/fuse/virtio_fs.c | 9 -
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/fs/fuse/virtio_fs.c b/fs/fuse/virtio_fs.c
index a708ccb65662..e9497b565dd8 100644
--- a/fs/fuse/virtio_fs.c
+++ b/fs/fuse/virtio_fs.c
@@ -577,9 +577,16 @@ __releases(fiq->waitq.lock)
sg_init_one(&sg, forget, sizeof(*forget));
 
/* Enqueue the request */
+   spin_lock(&fsvq->lock);
+
+   if (!fsvq->connected) {
+   kfree(forget);
+   spin_unlock(&fsvq->lock);
+   goto out;
+   }
+
vq = fsvq->vq;
dev_dbg(&vq->vdev->dev, "%s\n", __func__);
-   spin_lock(&fsvq->lock);
 
ret = virtqueue_add_sgs(vq, sgs, 1, 0, forget, GFP_ATOMIC);
if (ret < 0) {
-- 
2.20.1

___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization


[PATCH 04/18] virtiofs: Check connected state for VQ_REQUEST queue as well

2019-09-05 Thread Vivek Goyal
Right now we are checking ->connected state only for VQ_HIPRIO. Now we want
to make use of this method for all queues. So check it for VQ_REQUEST as
well.

This will be helpful if device has been removed and virtqueue is gone. In
that case ->connected will be false and request can't be submitted anymore
and user space will see error -ENOTCONN.

Signed-off-by: Vivek Goyal 
---
 fs/fuse/virtio_fs.c | 6 ++
 1 file changed, 6 insertions(+)

diff --git a/fs/fuse/virtio_fs.c b/fs/fuse/virtio_fs.c
index 9d30530e3ca9..c46dd4d284d6 100644
--- a/fs/fuse/virtio_fs.c
+++ b/fs/fuse/virtio_fs.c
@@ -755,6 +755,12 @@ static int virtio_fs_enqueue_req(struct virtio_fs_vq *fsvq,
 
spin_lock(&fsvq->lock);
 
+   if (!fsvq->connected) {
+   spin_unlock(&fsvq->lock);
+   ret = -ENOTCONN;
+   goto out;
+   }
+
vq = fsvq->vq;
ret = virtqueue_add_sgs(vq, sgs, out_sgs, in_sgs, req, GFP_ATOMIC);
if (ret < 0) {
-- 
2.20.1

___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization


[PATCH 07/18] virtiofs: Stop virtiofs queues when device is being removed

2019-09-05 Thread Vivek Goyal
Stop all the virt queues when device is going away. This will ensure that
no new requests are submitted to virtqueue and and request will end with
error -ENOTCONN.

Signed-off-by: Vivek Goyal 
---
 fs/fuse/virtio_fs.c | 14 ++
 1 file changed, 14 insertions(+)

diff --git a/fs/fuse/virtio_fs.c b/fs/fuse/virtio_fs.c
index f68a25ca9e9d..90e7b2f345e5 100644
--- a/fs/fuse/virtio_fs.c
+++ b/fs/fuse/virtio_fs.c
@@ -493,10 +493,24 @@ static int virtio_fs_probe(struct virtio_device *vdev)
return ret;
 }
 
+static void virtio_fs_stop_all_queues(struct virtio_fs *fs)
+{
+   struct virtio_fs_vq *fsvq;
+   int i;
+
+   for (i = 0; i < fs->nvqs; i++) {
+   fsvq = &fs->vqs[i];
+   spin_lock(&fsvq->lock);
+   fsvq->connected = false;
+   spin_unlock(&fsvq->lock);
+   }
+}
+
 static void virtio_fs_remove(struct virtio_device *vdev)
 {
struct virtio_fs *fs = vdev->priv;
 
+   virtio_fs_stop_all_queues(fs);
vdev->config->reset(vdev);
virtio_fs_cleanup_vqs(vdev, fs);
 
-- 
2.20.1

___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization


[PATCH 08/18] virtiofs: Drain all pending requests during ->remove time

2019-09-05 Thread Vivek Goyal
When device is going away, drain all pending requests.

Signed-off-by: Vivek Goyal 
---
 fs/fuse/virtio_fs.c | 83 -
 1 file changed, 51 insertions(+), 32 deletions(-)

diff --git a/fs/fuse/virtio_fs.c b/fs/fuse/virtio_fs.c
index 90e7b2f345e5..d5730a50b303 100644
--- a/fs/fuse/virtio_fs.c
+++ b/fs/fuse/virtio_fs.c
@@ -63,6 +63,55 @@ static inline struct fuse_pqueue *vq_to_fpq(struct virtqueue 
*vq)
return &vq_to_fsvq(vq)->fud->pq;
 }
 
+static void virtio_fs_drain_queue(struct virtio_fs_vq *fsvq)
+{
+   WARN_ON(fsvq->in_flight < 0);
+
+   /* Wait for in flight requests to finish.*/
+   while (1) {
+   spin_lock(&fsvq->lock);
+   if (!fsvq->in_flight) {
+   spin_unlock(&fsvq->lock);
+   break;
+   }
+   spin_unlock(&fsvq->lock);
+   usleep_range(1000, 2000);
+   }
+
+   flush_work(&fsvq->done_work);
+   flush_delayed_work(&fsvq->dispatch_work);
+}
+
+static inline void drain_hiprio_queued_reqs(struct virtio_fs_vq *fsvq)
+{
+   struct virtio_fs_forget *forget;
+
+   spin_lock(&fsvq->lock);
+   while (1) {
+   forget = list_first_entry_or_null(&fsvq->queued_reqs,
+   struct virtio_fs_forget, list);
+   if (!forget)
+   break;
+   list_del(&forget->list);
+   kfree(forget);
+   }
+   spin_unlock(&fsvq->lock);
+}
+
+static void virtio_fs_drain_all_queues(struct virtio_fs *fs)
+{
+   struct virtio_fs_vq *fsvq;
+   int i;
+
+   for (i = 0; i < fs->nvqs; i++) {
+   fsvq = &fs->vqs[i];
+   if (i == VQ_HIPRIO)
+   drain_hiprio_queued_reqs(fsvq);
+
+   virtio_fs_drain_queue(fsvq);
+   }
+}
+
 /* Add a new instance to the list or return -EEXIST if tag name exists*/
 static int virtio_fs_add_instance(struct virtio_fs *fs)
 {
@@ -511,6 +560,7 @@ static void virtio_fs_remove(struct virtio_device *vdev)
struct virtio_fs *fs = vdev->priv;
 
virtio_fs_stop_all_queues(fs);
+   virtio_fs_drain_all_queues(fs);
vdev->config->reset(vdev);
virtio_fs_cleanup_vqs(vdev, fs);
 
@@ -865,37 +915,6 @@ __releases(fiq->waitq.lock)
}
 }
 
-static void virtio_fs_flush_hiprio_queue(struct virtio_fs_vq *fsvq)
-{
-   struct virtio_fs_forget *forget;
-
-   WARN_ON(fsvq->in_flight < 0);
-
-   /* Go through pending forget requests and free them */
-   spin_lock(&fsvq->lock);
-   while (1) {
-   forget = list_first_entry_or_null(&fsvq->queued_reqs,
-   struct virtio_fs_forget, list);
-   if (!forget)
-   break;
-   list_del(&forget->list);
-   kfree(forget);
-   }
-
-   spin_unlock(&fsvq->lock);
-
-   /* Wait for in flight requests to finish.*/
-   while (1) {
-   spin_lock(&fsvq->lock);
-   if (!fsvq->in_flight) {
-   spin_unlock(&fsvq->lock);
-   break;
-   }
-   spin_unlock(&fsvq->lock);
-   usleep_range(1000, 2000);
-   }
-}
-
 const static struct fuse_iqueue_ops virtio_fs_fiq_ops = {
.wake_forget_and_unlock = virtio_fs_wake_forget_and_unlock,
.wake_interrupt_and_unlock  = virtio_fs_wake_interrupt_and_unlock,
@@ -988,7 +1007,7 @@ static void virtio_kill_sb(struct super_block *sb)
spin_lock(&fsvq->lock);
fsvq->connected = false;
spin_unlock(&fsvq->lock);
-   virtio_fs_flush_hiprio_queue(fsvq);
+   virtio_fs_drain_all_queues(vfs);
 
fuse_kill_sb_anon(sb);
virtio_fs_free_devs(vfs);
-- 
2.20.1

___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization


[PATCH 14/18] virtiofs: Add a fuse_iqueue operation to put() reference

2019-09-05 Thread Vivek Goyal
Soon I will make virtio_fs object reference counted, where reference will
be taken by device as well as by fuse_conn (fuse_conn->fuse_iqueue->fiq_priv).

When fuse_connection is going away, it should put its reference on virtio_fs
object.

So add a fuse_iqueue method which can be used to call into virtio_fs to
put the reference on the object (fiq_priv).

Signed-off-by: Vivek Goyal 
---
 fs/fuse/fuse_i.h | 5 +
 fs/fuse/inode.c  | 6 ++
 2 files changed, 11 insertions(+)

diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h
index 85e2dcad68c1..04e2c000d63f 100644
--- a/fs/fuse/fuse_i.h
+++ b/fs/fuse/fuse_i.h
@@ -479,6 +479,11 @@ struct fuse_iqueue_ops {
 */
void (*wake_pending_and_unlock)(struct fuse_iqueue *fiq)
__releases(fiq->waitq.lock);
+
+   /**
+* Put a reference on fiq_priv.
+*/
+   void (*put)(struct fuse_iqueue *fiq);
 };
 
 /** /dev/fuse input queue operations */
diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
index 7fa0dcc6f565..70a433bdf01f 100644
--- a/fs/fuse/inode.c
+++ b/fs/fuse/inode.c
@@ -631,8 +631,14 @@ EXPORT_SYMBOL_GPL(fuse_conn_init);
 void fuse_conn_put(struct fuse_conn *fc)
 {
if (refcount_dec_and_test(&fc->count)) {
+   struct fuse_iqueue *fiq = &fc->iq;
+
if (fc->destroy_req)
fuse_request_free(fc->destroy_req);
+   if (fiq->priv && fiq->ops->put) {
+   fiq->ops->put(fiq);
+   fiq->priv = NULL;
+   }
put_pid_ns(fc->pid_ns);
put_user_ns(fc->user_ns);
fc->release(fc);
-- 
2.20.1

___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization


[PATCH 06/18] virtiofs: ->remove should not clean virtiofs fuse devices

2019-09-05 Thread Vivek Goyal
We maintain a fuse device per virt queue. This fuse devices are allocated
and installed during mount time and should be cleaned up when super block
is going away. Device removal should not clean it. Device removal should
stop queues and virtuques can go away.

Signed-off-by: Vivek Goyal 
---
 fs/fuse/virtio_fs.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/fs/fuse/virtio_fs.c b/fs/fuse/virtio_fs.c
index 5df97dfee37d..f68a25ca9e9d 100644
--- a/fs/fuse/virtio_fs.c
+++ b/fs/fuse/virtio_fs.c
@@ -497,8 +497,6 @@ static void virtio_fs_remove(struct virtio_device *vdev)
 {
struct virtio_fs *fs = vdev->priv;
 
-   virtio_fs_free_devs(fs);
-
vdev->config->reset(vdev);
virtio_fs_cleanup_vqs(vdev, fs);
 
-- 
2.20.1

___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization


[PATCH 12/18] virtiofs: Use virtio_fs_free_devs() in error path

2019-09-05 Thread Vivek Goyal
We already have an helper to cleanup fuse devices. Use that instead of
duplicating the code.

Signed-off-by: Vivek Goyal 
---
 fs/fuse/virtio_fs.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/fs/fuse/virtio_fs.c b/fs/fuse/virtio_fs.c
index a76bd5a04521..40259368a6bd 100644
--- a/fs/fuse/virtio_fs.c
+++ b/fs/fuse/virtio_fs.c
@@ -999,8 +999,7 @@ static int virtio_fs_fill_super(struct super_block *sb)
 err_free_init_req:
fuse_request_free(init_req);
 err_free_fuse_devs:
-   for (i = 0; i < fs->nvqs; i++)
-   fuse_dev_free(fs->vqs[i].fud);
+   virtio_fs_free_devs(fs);
 err:
return err;
 }
-- 
2.20.1

___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization


[PATCH 03/18] virtiofs: Pass fsvq instead of vq as parameter to virtio_fs_enqueue_req

2019-09-05 Thread Vivek Goyal
Pass fsvq instead of vq as parameter to virtio_fs_enqueue_req(). We will
retrieve vq from fsvq under spin lock.

Later in the patch series we will retrieve vq only if fsvq is still connected
other vq might have been cleaned up by device ->remove code and we will
return error.

Signed-off-by: Vivek Goyal 
---
 fs/fuse/virtio_fs.c | 9 +
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/fs/fuse/virtio_fs.c b/fs/fuse/virtio_fs.c
index e9497b565dd8..9d30530e3ca9 100644
--- a/fs/fuse/virtio_fs.c
+++ b/fs/fuse/virtio_fs.c
@@ -698,14 +698,15 @@ static unsigned int sg_init_fuse_args(struct scatterlist 
*sg,
 }
 
 /* Add a request to a virtqueue and kick the device */
-static int virtio_fs_enqueue_req(struct virtqueue *vq, struct fuse_req *req)
+static int virtio_fs_enqueue_req(struct virtio_fs_vq *fsvq,
+struct fuse_req *req)
 {
/* requests need at least 4 elements */
struct scatterlist *stack_sgs[6];
struct scatterlist stack_sg[ARRAY_SIZE(stack_sgs)];
struct scatterlist **sgs = stack_sgs;
struct scatterlist *sg = stack_sg;
-   struct virtio_fs_vq *fsvq;
+   struct virtqueue *vq;
unsigned int argbuf_used = 0;
unsigned int out_sgs = 0;
unsigned int in_sgs = 0;
@@ -752,9 +753,9 @@ static int virtio_fs_enqueue_req(struct virtqueue *vq, 
struct fuse_req *req)
for (i = 0; i < total_sgs; i++)
sgs[i] = &sg[i];
 
-   fsvq = vq_to_fsvq(vq);
spin_lock(&fsvq->lock);
 
+   vq = fsvq->vq;
ret = virtqueue_add_sgs(vq, sgs, out_sgs, in_sgs, req, GFP_ATOMIC);
if (ret < 0) {
/* TODO handle full virtqueue */
@@ -824,7 +825,7 @@ __releases(fiq->waitq.lock)
/* TODO check for FR_INTERRUPTED? */
 
 retry:
-   ret = virtio_fs_enqueue_req(fs->vqs[queue_id].vq, req);
+   ret = virtio_fs_enqueue_req(&fs->vqs[queue_id], req);
if (ret < 0) {
if (ret == -ENOMEM || ret == -ENOSPC) {
/* Virtqueue full. Retry submission */
-- 
2.20.1

___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization


Re: [PATCH v4 15/16] virtio-fs: add virtiofs filesystem

2019-09-05 Thread Vivek Goyal
On Tue, Sep 03, 2019 at 09:55:49AM -0400, Michael S. Tsirkin wrote:
[..]
> What's with all of the TODOs? Some of these are really scary,
> looks like they need to be figured out before this is merged.

Hi Michael,

One of the issue I noticed is races w.r.t device removal and super
block initialization. I am about to post a set of patches which
take care of these races and also get rid of some of the scary
TODOs. Other TODOs like suspend/restore, multiqueue support etc
are improvements which we can do over a period of time.

[..]
> > +/* Per-virtqueue state */
> > +struct virtio_fs_vq {
> > +   spinlock_t lock;
> > +   struct virtqueue *vq; /* protected by ->lock */
> > +   struct work_struct done_work;
> > +   struct list_head queued_reqs;
> > +   struct delayed_work dispatch_work;
> > +   struct fuse_dev *fud;
> > +   bool connected;
> > +   long in_flight;
> > +   char name[24];
> 
> I'd keep names somewhere separate as they are not used on data path.

Ok, this sounds like a nice to have. Will take care of this once base
patch gets merged.

[..]
> > +struct virtio_fs_forget {
> > +   struct fuse_in_header ih;
> > +   struct fuse_forget_in arg;
> 
> These structures are all native endian.
> 
> Passing them to host will make cross-endian setups painful to support,
> and hardware implementations impossible.
> 
> How about converting everything to LE?

So looks like endianness issue is now resolved (going by the other
emails). So I will not worry about it.

[..]
> > +/* Add a new instance to the list or return -EEXIST if tag name exists*/
> > +static int virtio_fs_add_instance(struct virtio_fs *fs)
> > +{
> > +   struct virtio_fs *fs2;
> > +   bool duplicate = false;
> > +
> > +   mutex_lock(&virtio_fs_mutex);
> > +
> > +   list_for_each_entry(fs2, &virtio_fs_instances, list) {
> > +   if (strcmp(fs->tag, fs2->tag) == 0)
> > +   duplicate = true;
> > +   }
> > +
> > +   if (!duplicate)
> > +   list_add_tail(&fs->list, &virtio_fs_instances);
> 
> 
> This is O(N^2) as it's presumably called for each istance.
> Doesn't scale - please switch to a tree or such.

This is O(N) and not O(N^2) right? Addition of device is O(N), search
during mount is O(N).

This is not a frequent event at all and number of virtiofs instances
per guest are expected to be fairly small (say less than 10). So I 
really don't think there is any value in converting this into a tree
(instead of a list).

[..]
> > +static void virtio_fs_free_devs(struct virtio_fs *fs)
> > +{
> > +   unsigned int i;
> > +
> > +   /* TODO lock */
> 
> Doesn't inspire confidence, does it?

Agreed. Getting rid of this in set of fixes I am about to post.

> 
> > +
> > +   for (i = 0; i < fs->nvqs; i++) {
> > +   struct virtio_fs_vq *fsvq = &fs->vqs[i];
> > +
> > +   if (!fsvq->fud)
> > +   continue;
> > +
> > +   flush_work(&fsvq->done_work);
> > +   flush_delayed_work(&fsvq->dispatch_work);
> > +
> > +   /* TODO need to quiesce/end_requests/decrement dev_count */
> 
> Indeed. Won't this crash if we don't?

Took care of this as well.

[..]
> > +static void virtio_fs_hiprio_dispatch_work(struct work_struct *work)
> > +{
> > +   struct virtio_fs_forget *forget;
> > +   struct virtio_fs_vq *fsvq = container_of(work, struct virtio_fs_vq,
> > +dispatch_work.work);
> > +   struct virtqueue *vq = fsvq->vq;
> > +   struct scatterlist sg;
> > +   struct scatterlist *sgs[] = {&sg};
> > +   bool notify;
> > +   int ret;
> > +
> > +   pr_debug("virtio-fs: worker %s called.\n", __func__);
> > +   while (1) {
> > +   spin_lock(&fsvq->lock);
> > +   forget = list_first_entry_or_null(&fsvq->queued_reqs,
> > +   struct virtio_fs_forget, list);
> > +   if (!forget) {
> > +   spin_unlock(&fsvq->lock);
> > +   return;
> > +   }
> > +
> > +   list_del(&forget->list);
> > +   if (!fsvq->connected) {
> > +   spin_unlock(&fsvq->lock);
> > +   kfree(forget);
> > +   continue;
> > +   }
> > +
> > +   sg_init_one(&sg, forget, sizeof(*forget));
> 
> This passes to host a structure including "struct list_head list";
> 
> Not a good idea.

Ok, host does not have to see "struct list_head list". Its needed for
guest. Will look into getting rid of this.

> 
> 
> > +
> > +   /* Enqueue the request */
> > +   dev_dbg(&vq->vdev->dev, "%s\n", __func__);
> > +   ret = virtqueue_add_sgs(vq, sgs, 1, 0, forget, GFP_ATOMIC);
> 
> 
> This is easier as add_outbuf.

Will look into it.

> 
> Also - why GFP_ATOMIC?

Hmm..., may be it can be GFP_KERNEL. I don't see atomic context here. Will
look into it.

> 
> > +   if (ret < 0) {
> > +   if (ret == -ENOMEM || ret == -ENOSPC) {
> > +   pr_debug("virtio-fs: Could not queue FORGET: 
> 

Re: DANGER WILL ROBINSON, DANGER

2019-09-05 Thread Jerome Glisse
On Fri, Aug 23, 2019 at 12:39:21PM +, Mircea CIRJALIU - MELIU wrote:
> > On Thu, Aug 15, 2019 at 03:19:29PM -0400, Jerome Glisse wrote:
> > > On Tue, Aug 13, 2019 at 02:01:35PM +0300, Adalbert Lazăr wrote:
> > > > On Fri, 9 Aug 2019 09:24:44 -0700, Matthew Wilcox 
> > wrote:
> > > > > On Fri, Aug 09, 2019 at 07:00:26PM +0300, Adalbert Lazăr wrote:
> > > > > > +++ b/include/linux/page-flags.h
> > > > > > @@ -417,8 +417,10 @@ PAGEFLAG(Idle, idle, PF_ANY)
> > > > > >   */
> > > > > >  #define PAGE_MAPPING_ANON  0x1
> > > > > >  #define PAGE_MAPPING_MOVABLE   0x2
> > > > > > +#define PAGE_MAPPING_REMOTE0x4
> > > > >
> > > > > Uh.  How do you know page->mapping would otherwise have bit 2
> > clear?
> > > > > Who's guaranteeing that?
> > > > >
> > > > > This is an awfully big patch to the memory management code, buried
> > > > > in the middle of a gigantic series which almost guarantees nobody
> > > > > would look at it.  I call shenanigans.
> > > > >
> > > > > > @@ -1021,7 +1022,7 @@ void page_move_anon_rmap(struct page
> > *page, struct vm_area_struct *vma)
> > > > > >   * __page_set_anon_rmap - set up new anonymous rmap
> > > > > >   * @page:  Page or Hugepage to add to rmap
> > > > > >   * @vma:   VM area to add page to.
> > > > > > - * @address:   User virtual address of the mapping
> > > > > > + * @address:   User virtual address of the mapping
> > > > >
> > > > > And mixing in fluff changes like this is a real no-no.  Try again.
> > > > >
> > > >
> > > > No bad intentions, just overzealous.
> > > > I didn't want to hide anything from our patches.
> > > > Once we advance with the introspection patches related to KVM we'll
> > > > be back with the remote mapping patch, split and cleaned.
> > >
> > > They are not bit left in struct page ! Looking at the patch it seems
> > > you want to have your own pin count just for KVM. This is bad, we are
> > > already trying to solve the GUP thing (see all various patchset about
> > > GUP posted recently).
> > >
> > > You need to rethink how you want to achieve this. Why not simply a
> > > remote read()/write() into the process memory ie KVMI would call an
> > > ioctl that allow to read or write into a remote process memory like
> > > ptrace() but on steroid ...
> > >
> > > Adding this whole big complex infrastructure without justification of
> > > why we need to avoid round trip is just too much really.
> > 
> > Thinking a bit more about this, you can achieve the same thing without
> > adding a single line to any mm code. Instead of having mmap with
> > PROT_NONE | MAP_LOCKED you have userspace mmap some kvm device
> > file (i am assuming this is something you already have and can control the
> > mmap callback).
> > 
> > So now kernel side you have a vma with a vm_operations_struct under your
> > control this means that everything you want to block mm wise from within
> > the inspector process can be block through those call- backs
> > (find_special_page() specificaly for which you have to return NULL all the
> > time).
> > 
> > To mirror target process memory you can use hmm_mirror, when you
> > populate the inspector process page table you use insert_pfn() (mmap of
> > the kvm device file must mark this vma as PFNMAP).
> > 
> > By following the hmm_mirror API, anytime the target process has a change in
> > its page table (ie virtual address -> page) you will get a callback and all 
> > you
> > have to do is clear the page table within the inspector process and flush 
> > tlb
> > (use zap_page_range).
> > 
> > On page fault within the inspector process the fault callback of vm_ops will
> > get call and from there you call hmm_mirror following its API.
> > 
> > Oh also mark the vma with VM_WIPEONFORK to avoid any issue if the
> > inspector process use fork() (you could support fork but then you would
> > need to mark the vma as SHARED and use unmap_mapping_pages instead of
> > zap_page_range).
> > 
> > 
> > There everything you want to do with already upstream mm code.
> 
> I'm the author of remote mapping, so I owe everybody some explanations.
> My requirement was to map pages from one QEMU process to another QEMU 
> process (our inspector process works in a virtual machine of its own). So I 
> had 
> to implement a KSM-like page sharing between processes, where an anon page
> from the target QEMU's working memory is promoted to a remote page and 
> mapped in the inspector QEMU's working memory (both anon VMAs). 
> The extra page flag is for differentiating the page for rmap walking.
> 
> The mapping requests come at PAGE_SIZE granularity for random addresses 
> within the target/inspector QEMUs, so I couldn't do any linear mapping that
> would keep things simpler. 
> 
> I have an extra patch that does remote mapping by mirroring an entire VMA
> from the target process by way of a device file. This thing creates a 
> separate 
> mirror VMA in my inspector process (at the moment a QEMU), but then I 
> bumped into the KVM hva->gpa mapping, which 

Re: [PATCH] drm/virtio: fix command submission with objects but without fence.

2019-09-05 Thread Chia-I Wu
On Wed, Sep 4, 2019 at 10:23 PM Gerd Hoffmann  wrote:
>
> On Wed, Sep 04, 2019 at 04:10:30PM -0700, Chia-I Wu wrote:
> > On Wed, Sep 4, 2019 at 12:48 AM Gerd Hoffmann  wrote:
> > >
> > > Only call virtio_gpu_array_add_fence if we actually have a fence.
> > >
> > > Fixes: da758d51968a ("drm/virtio: rework virtio_gpu_execbuffer_ioctl 
> > > fencing")
> > > Signed-off-by: Gerd Hoffmann 
> > > ---
> > >  drivers/gpu/drm/virtio/virtgpu_vq.c | 9 +
> > >  1 file changed, 5 insertions(+), 4 deletions(-)
> > >
> > > diff --git a/drivers/gpu/drm/virtio/virtgpu_vq.c 
> > > b/drivers/gpu/drm/virtio/virtgpu_vq.c
> > > index 595fa6ec2d58..7fd2851f7b97 100644
> > > --- a/drivers/gpu/drm/virtio/virtgpu_vq.c
> > > +++ b/drivers/gpu/drm/virtio/virtgpu_vq.c
> > > @@ -339,11 +339,12 @@ static void 
> > > virtio_gpu_queue_fenced_ctrl_buffer(struct virtio_gpu_device *vgdev,
> > > goto again;
> > > }
> > >
> > > -   if (fence)
> > > +   if (fence) {
> > > virtio_gpu_fence_emit(vgdev, hdr, fence);
> > > -   if (vbuf->objs) {
> > > -   virtio_gpu_array_add_fence(vbuf->objs, &fence->f);
> > > -   virtio_gpu_array_unlock_resv(vbuf->objs);
> > > +   if (vbuf->objs) {
> > > +   virtio_gpu_array_add_fence(vbuf->objs, &fence->f);
> > > +   virtio_gpu_array_unlock_resv(vbuf->objs);
> > > +   }
> > This leaks when fence == NULL and vbuf->objs != NULL (which can really
> > happen IIRC... not at my desk to check).
>
> Yes, it can happen, for example when flushing dumb buffers.
>
> But I don't think we leak in this case.  The code paths which don't need
> a fence also do not call virtio_gpu_array_lock_resv(), so things are
> balanced.  The actual release of the objs happens in
> virtio_gpu_dequeue_ctrl_func() via virtio_gpu_array_put_free_delayed().
I misread and thought this was in virtio_gpu_dequeue_ctrl_func.  Sorry :(

Reviewed-by: Chia-I Wu 



>
> cheers,
>   Gerd
>
___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization


Re: [PATCH 0/2] Revert and rework on the metadata accelreation

2019-09-05 Thread Jason Gunthorpe
On Thu, Sep 05, 2019 at 08:27:34PM +0800, Jason Wang wrote:
> Hi:
> 
> Per request from Michael and Jason, the metadata accelreation is
> reverted in this version and rework in next version.
> 
> Please review.
> 
> Thanks
> 
> Jason Wang (2):
>   Revert "vhost: access vq metadata through kernel virtual address"
>   vhost: re-introducing metadata acceleration through kernel virtual
> address

There are a bunch of patches in the queue already that will help
vhost, and I a working on one for next cycle that will help alot more
too.

I think you should apply the revert this cycle and rebase the other
patch for next..

Jason
___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization


Re: Xorg indefinitely hangs in kernelspace

2019-09-05 Thread Jaak Ristioja
On 05.09.19 10:14, Gerd Hoffmann wrote:
> On Tue, Aug 06, 2019 at 09:00:10PM +0300, Jaak Ristioja wrote:
>> Hello!
>>
>> I'm writing to report a crash in the QXL / DRM code in the Linux kernel.
>> I originally filed the issue on LaunchPad and more details can be found
>> there, although I doubt whether these details are useful.
> 
> Any change with kernel 5.3-rc7 ?

Didn't try. Did you change something? I could try, but I've done so
before and every time this bug manifests itself with MAJOR.MINOR-rc# I
get asked to try version MAJOR.(MINOR+1)-rc# so I guess I could as well
give up?

Alright, I'll install 5.3-rc7, but once more it might take some time for
this bug to expose itself.

One thing to note though, is that this occurred much more often with
older kernels (or older versions of the Kubuntu desktop/Firefox in my
VM), sometimes even several times in an hour of use.


Best regards,
J
___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization


[PATCH 2/2] vhost: re-introducing metadata acceleration through kernel virtual address

2019-09-05 Thread Jason Wang
This is a rework on the commit 7f466032dc9e ("vhost: access vq
metadata through kernel virtual address").

It was noticed that the copy_to/from_user() friends that was used to
access virtqueue metdata tends to be very expensive for dataplane
implementation like vhost since it involves lots of software checks,
speculation barriers, hardware feature toggling (e.g SMAP). The
extra cost will be more obvious when transferring small packets since
the time spent on metadata accessing become more significant.

This patch tries to eliminate those overheads by accessing them
through direct mapping of those pages. Invalidation callbacks is
implemented for co-operation with general VM management (swap, KSM,
THP or NUMA balancing). We will try to get the direct mapping of vq
metadata before each round of packet processing if it doesn't
exist. If we fail, we will simplely fallback to copy_to/from_user()
friends.

This invalidation, direct mapping access and set are synchronized
through spinlock. This takes a step back from the original commit
7f466032dc9e ("vhost: access vq metadata through kernel virtual
address") which tries to RCU which is suspicious and hard to be
reviewed. This won't perform as well as RCU because of the atomic,
this could be addressed by the future optimization.

This method might does not work for high mem page which requires
temporary mapping so we just fallback to normal
copy_to/from_user() and may not for arch that has virtual tagged cache
since extra cache flushing is needed to eliminate the alias. This will
result complex logic and bad performance. For those archs, this patch
simply go for copy_to/from_user() friends. This is done by ruling out
kernel mapping codes through ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE.

Note that this is only done when device IOTLB is not enabled. We
could use similar method to optimize IOTLB in the future.

Tests shows at most about 22% improvement on TX PPS when using
virtio-user + vhost_net + xdp1 + TAP on 4.0GHz Kaby Lake.

SMAP on | SMAP off
Before: 4.9Mpps | 6.9Mpps
After:  6.0Mpps | 7.5Mpps

On a elder CPU Sandy Bridge without SMAP support. TX PPS doesn't see
any difference.

Cc: Andrea Arcangeli 
Cc: James Bottomley 
Cc: Christoph Hellwig 
Cc: David Miller 
Cc: Jerome Glisse 
Cc: Jason Gunthorpe 
Cc: linux...@kvack.org
Cc: linux-arm-ker...@lists.infradead.org
Cc: linux-par...@vger.kernel.org
Signed-off-by: Jason Wang 
Signed-off-by: Michael S. Tsirkin 
---
 drivers/vhost/vhost.c | 551 +-
 drivers/vhost/vhost.h |  41 
 2 files changed, 589 insertions(+), 3 deletions(-)

diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c
index 791562e03fe0..f98155f28f02 100644
--- a/drivers/vhost/vhost.c
+++ b/drivers/vhost/vhost.c
@@ -298,6 +298,182 @@ static void vhost_vq_meta_reset(struct vhost_dev *d)
__vhost_vq_meta_reset(d->vqs[i]);
 }
 
+#if VHOST_ARCH_CAN_ACCEL_UACCESS
+static void vhost_map_unprefetch(struct vhost_map *map)
+{
+   kfree(map->pages);
+   kfree(map);
+}
+
+static void vhost_set_map_dirty(struct vhost_virtqueue *vq,
+   struct vhost_map *map, int index)
+{
+   struct vhost_uaddr *uaddr = &vq->uaddrs[index];
+   int i;
+
+   if (uaddr->write) {
+   for (i = 0; i < map->npages; i++)
+   set_page_dirty(map->pages[i]);
+   }
+}
+
+static void vhost_uninit_vq_maps(struct vhost_virtqueue *vq)
+{
+   struct vhost_map *map[VHOST_NUM_ADDRS];
+   int i;
+
+   spin_lock(&vq->mmu_lock);
+   for (i = 0; i < VHOST_NUM_ADDRS; i++) {
+   map[i] = vq->maps[i];
+   if (map[i]) {
+   vhost_set_map_dirty(vq, map[i], i);
+   vq->maps[i] = NULL;
+   }
+   }
+   spin_unlock(&vq->mmu_lock);
+
+   /* No need for synchronization since we are serialized with
+* memory accessors (e.g vq mutex held).
+*/
+
+   for (i = 0; i < VHOST_NUM_ADDRS; i++)
+   if (map[i])
+   vhost_map_unprefetch(map[i]);
+
+}
+
+static void vhost_reset_vq_maps(struct vhost_virtqueue *vq)
+{
+   int i;
+
+   vhost_uninit_vq_maps(vq);
+   for (i = 0; i < VHOST_NUM_ADDRS; i++)
+   vq->uaddrs[i].size = 0;
+}
+
+static bool vhost_map_range_overlap(struct vhost_uaddr *uaddr,
+unsigned long start,
+unsigned long end)
+{
+   if (unlikely(!uaddr->size))
+   return false;
+
+   return !(end < uaddr->uaddr || start > uaddr->uaddr - 1 + uaddr->size);
+}
+
+static void inline vhost_vq_access_map_begin(struct vhost_virtqueue *vq)
+{
+   spin_lock(&vq->mmu_lock);
+}
+
+static void inline vhost_vq_access_map_end(struct vhost_virtqueue *vq)
+{
+   spin_unlock(&vq->mmu_lock);
+}
+
+static int vhost_invalidate_vq_start(struct vhost_virtqueue *vq,
+int index,
+   

[PATCH 0/2] Revert and rework on the metadata accelreation

2019-09-05 Thread Jason Wang
Hi:

Per request from Michael and Jason, the metadata accelreation is
reverted in this version and rework in next version.

Please review.

Thanks

Jason Wang (2):
  Revert "vhost: access vq metadata through kernel virtual address"
  vhost: re-introducing metadata acceleration through kernel virtual
address

 drivers/vhost/vhost.c | 202 +-
 drivers/vhost/vhost.h |   8 +-
 2 files changed, 123 insertions(+), 87 deletions(-)

-- 
2.19.1

___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization


[PATCH 1/2] Revert "vhost: access vq metadata through kernel virtual address"

2019-09-05 Thread Jason Wang
It was reported that metadata acceleration introduces several issues,
so this patch reverts commit ff466032dc9e5a61217f22ea34b2df932786bbfc,
73f628ec9e6bcc45b77c53fe6d0c0ec55eaf82af and
0b4a7092ffe568a55bf8f3cefdf79ff666586d91.

We will rework it on the next version.

Cc: Jason Gunthorpe 
Signed-off-by: Jason Wang 
---
 drivers/vhost/vhost.c | 515 +-
 drivers/vhost/vhost.h |  41 
 2 files changed, 3 insertions(+), 553 deletions(-)

diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c
index 0536f8526359..791562e03fe0 100644
--- a/drivers/vhost/vhost.c
+++ b/drivers/vhost/vhost.c
@@ -298,160 +298,6 @@ static void vhost_vq_meta_reset(struct vhost_dev *d)
__vhost_vq_meta_reset(d->vqs[i]);
 }
 
-#if VHOST_ARCH_CAN_ACCEL_UACCESS
-static void vhost_map_unprefetch(struct vhost_map *map)
-{
-   kfree(map->pages);
-   map->pages = NULL;
-   map->npages = 0;
-   map->addr = NULL;
-}
-
-static void vhost_uninit_vq_maps(struct vhost_virtqueue *vq)
-{
-   struct vhost_map *map[VHOST_NUM_ADDRS];
-   int i;
-
-   spin_lock(&vq->mmu_lock);
-   for (i = 0; i < VHOST_NUM_ADDRS; i++) {
-   map[i] = rcu_dereference_protected(vq->maps[i],
- lockdep_is_held(&vq->mmu_lock));
-   if (map[i])
-   rcu_assign_pointer(vq->maps[i], NULL);
-   }
-   spin_unlock(&vq->mmu_lock);
-
-   synchronize_rcu();
-
-   for (i = 0; i < VHOST_NUM_ADDRS; i++)
-   if (map[i])
-   vhost_map_unprefetch(map[i]);
-
-}
-
-static void vhost_reset_vq_maps(struct vhost_virtqueue *vq)
-{
-   int i;
-
-   vhost_uninit_vq_maps(vq);
-   for (i = 0; i < VHOST_NUM_ADDRS; i++)
-   vq->uaddrs[i].size = 0;
-}
-
-static bool vhost_map_range_overlap(struct vhost_uaddr *uaddr,
-unsigned long start,
-unsigned long end)
-{
-   if (unlikely(!uaddr->size))
-   return false;
-
-   return !(end < uaddr->uaddr || start > uaddr->uaddr - 1 + uaddr->size);
-}
-
-static void vhost_invalidate_vq_start(struct vhost_virtqueue *vq,
- int index,
- unsigned long start,
- unsigned long end)
-{
-   struct vhost_uaddr *uaddr = &vq->uaddrs[index];
-   struct vhost_map *map;
-   int i;
-
-   if (!vhost_map_range_overlap(uaddr, start, end))
-   return;
-
-   spin_lock(&vq->mmu_lock);
-   ++vq->invalidate_count;
-
-   map = rcu_dereference_protected(vq->maps[index],
-   lockdep_is_held(&vq->mmu_lock));
-   if (map) {
-   if (uaddr->write) {
-   for (i = 0; i < map->npages; i++)
-   set_page_dirty(map->pages[i]);
-   }
-   rcu_assign_pointer(vq->maps[index], NULL);
-   }
-   spin_unlock(&vq->mmu_lock);
-
-   if (map) {
-   synchronize_rcu();
-   vhost_map_unprefetch(map);
-   }
-}
-
-static void vhost_invalidate_vq_end(struct vhost_virtqueue *vq,
-   int index,
-   unsigned long start,
-   unsigned long end)
-{
-   if (!vhost_map_range_overlap(&vq->uaddrs[index], start, end))
-   return;
-
-   spin_lock(&vq->mmu_lock);
-   --vq->invalidate_count;
-   spin_unlock(&vq->mmu_lock);
-}
-
-static int vhost_invalidate_range_start(struct mmu_notifier *mn,
-   const struct mmu_notifier_range *range)
-{
-   struct vhost_dev *dev = container_of(mn, struct vhost_dev,
-mmu_notifier);
-   int i, j;
-
-   if (!mmu_notifier_range_blockable(range))
-   return -EAGAIN;
-
-   for (i = 0; i < dev->nvqs; i++) {
-   struct vhost_virtqueue *vq = dev->vqs[i];
-
-   for (j = 0; j < VHOST_NUM_ADDRS; j++)
-   vhost_invalidate_vq_start(vq, j,
- range->start,
- range->end);
-   }
-
-   return 0;
-}
-
-static void vhost_invalidate_range_end(struct mmu_notifier *mn,
-  const struct mmu_notifier_range *range)
-{
-   struct vhost_dev *dev = container_of(mn, struct vhost_dev,
-mmu_notifier);
-   int i, j;
-
-   for (i = 0; i < dev->nvqs; i++) {
-   struct vhost_virtqueue *vq = dev->vqs[i];
-
-   for (j = 0; j < VHOST_NUM_ADDRS; j++)
-   vhost_invalidate_vq_end(vq, j,
-   range->start,
-   range->end)

Re: [PATCH net-next] vsock/virtio: a better comment on credit update

2019-09-05 Thread David Miller
From: "Michael S. Tsirkin" 
Date: Tue, 3 Sep 2019 03:38:16 -0400

> The comment we have is just repeating what the code does.
> Include the *reason* for the condition instead.
> 
> Cc: Stefano Garzarella 
> Signed-off-by: Michael S. Tsirkin 

Applied.
___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization


Re: Xorg indefinitely hangs in kernelspace

2019-09-05 Thread Gerd Hoffmann
On Tue, Aug 06, 2019 at 09:00:10PM +0300, Jaak Ristioja wrote:
> Hello!
> 
> I'm writing to report a crash in the QXL / DRM code in the Linux kernel.
> I originally filed the issue on LaunchPad and more details can be found
> there, although I doubt whether these details are useful.

Any change with kernel 5.3-rc7 ?

cheers,
  Gerd

___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization


[PATCH 6/8] drm/qxl: switch to gem vma offset manager

2019-09-05 Thread Gerd Hoffmann
Pass gem vma_offset_manager to ttm_bo_device_init(), so ttm uses it
instead of its own embedded struct.  This makes some gem functions
(specifically drm_gem_object_lookup) work on ttm objects.

Signed-off-by: Gerd Hoffmann 
---
 drivers/gpu/drm/qxl/qxl_ttm.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/qxl/qxl_ttm.c b/drivers/gpu/drm/qxl/qxl_ttm.c
index 69da0eea6e4c..cbc6c2ba8630 100644
--- a/drivers/gpu/drm/qxl/qxl_ttm.c
+++ b/drivers/gpu/drm/qxl/qxl_ttm.c
@@ -325,7 +325,7 @@ int qxl_ttm_init(struct qxl_device *qdev)
r = ttm_bo_device_init(&qdev->mman.bdev,
   &qxl_bo_driver,
   qdev->ddev.anon_inode->i_mapping,
-  NULL,
+  qdev->ddev.vma_offset_manager,
   false);
if (r) {
DRM_ERROR("failed initializing buffer object driver(%d).\n", r);
-- 
2.18.1

___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization


[PATCH 1/8] drm/ttm: turn ttm_bo_device.vma_manager into a pointer

2019-09-05 Thread Gerd Hoffmann
Rename the embedded struct vma_offset_manager, new name is _vma_manager.
ttm_bo_device.vma_manager changed to a pointer.

The ttm_bo_device_init() function gets an additional vma_manager
argument which allows to initialize ttm with a different vma manager.
When passing NULL the embedded _vma_manager is used.

All callers are updated to pass NULL, so the behavior doesn't change.

Signed-off-by: Gerd Hoffmann 
---
 include/drm/ttm/ttm_bo_driver.h |  8 ++--
 drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c |  1 +
 drivers/gpu/drm/drm_vram_mm_helper.c|  1 +
 drivers/gpu/drm/nouveau/nouveau_ttm.c   |  1 +
 drivers/gpu/drm/qxl/qxl_ttm.c   |  1 +
 drivers/gpu/drm/radeon/radeon_ttm.c |  1 +
 drivers/gpu/drm/ttm/ttm_bo.c| 13 +
 drivers/gpu/drm/ttm/ttm_bo_vm.c |  6 +++---
 drivers/gpu/drm/vmwgfx/vmwgfx_drv.c |  1 +
 9 files changed, 24 insertions(+), 9 deletions(-)

diff --git a/include/drm/ttm/ttm_bo_driver.h b/include/drm/ttm/ttm_bo_driver.h
index e88e00c6cbf2..e365434f92b3 100644
--- a/include/drm/ttm/ttm_bo_driver.h
+++ b/include/drm/ttm/ttm_bo_driver.h
@@ -441,7 +441,8 @@ extern struct ttm_bo_global {
  *
  * @driver: Pointer to a struct ttm_bo_driver struct setup by the driver.
  * @man: An array of mem_type_managers.
- * @vma_manager: Address space manager
+ * @vma_manager: Address space manager (pointer)
+ * @_vma_manager: Address space manager (enbedded)
  * lru_lock: Spinlock that protects the buffer+device lru lists and
  * ddestroy lists.
  * @dev_mapping: A pointer to the struct address_space representing the
@@ -464,7 +465,8 @@ struct ttm_bo_device {
/*
 * Protected by internal locks.
 */
-   struct drm_vma_offset_manager vma_manager;
+   struct drm_vma_offset_manager *vma_manager;
+   struct drm_vma_offset_manager _vma_manager;
 
/*
 * Protected by the global:lru lock.
@@ -585,6 +587,7 @@ int ttm_bo_device_release(struct ttm_bo_device *bdev);
  * @glob: A pointer to an initialized struct ttm_bo_global.
  * @driver: A pointer to a struct ttm_bo_driver set up by the caller.
  * @mapping: The address space to use for this bo.
+ * @vma_manager: A pointer to a vma manager or NULL.
  * @file_page_offset: Offset into the device address space that is available
  * for buffer data. This ensures compatibility with other users of the
  * address space.
@@ -596,6 +599,7 @@ int ttm_bo_device_release(struct ttm_bo_device *bdev);
 int ttm_bo_device_init(struct ttm_bo_device *bdev,
   struct ttm_bo_driver *driver,
   struct address_space *mapping,
+  struct drm_vma_offset_manager *vma_manager,
   bool need_dma32);
 
 /**
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
index fb09314bcfd4..34ee5d725faf 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
@@ -1728,6 +1728,7 @@ int amdgpu_ttm_init(struct amdgpu_device *adev)
r = ttm_bo_device_init(&adev->mman.bdev,
   &amdgpu_bo_driver,
   adev->ddev->anon_inode->i_mapping,
+  NULL,
   adev->need_dma32);
if (r) {
DRM_ERROR("failed initializing buffer object driver(%d).\n", r);
diff --git a/drivers/gpu/drm/drm_vram_mm_helper.c 
b/drivers/gpu/drm/drm_vram_mm_helper.c
index c911781d6728..56fd1519eb35 100644
--- a/drivers/gpu/drm/drm_vram_mm_helper.c
+++ b/drivers/gpu/drm/drm_vram_mm_helper.c
@@ -172,6 +172,7 @@ int drm_vram_mm_init(struct drm_vram_mm *vmm, struct 
drm_device *dev,
 
ret = ttm_bo_device_init(&vmm->bdev, &bo_driver,
 dev->anon_inode->i_mapping,
+NULL,
 true);
if (ret)
return ret;
diff --git a/drivers/gpu/drm/nouveau/nouveau_ttm.c 
b/drivers/gpu/drm/nouveau/nouveau_ttm.c
index f0daf958e03a..e67eb10843d1 100644
--- a/drivers/gpu/drm/nouveau/nouveau_ttm.c
+++ b/drivers/gpu/drm/nouveau/nouveau_ttm.c
@@ -236,6 +236,7 @@ nouveau_ttm_init(struct nouveau_drm *drm)
ret = ttm_bo_device_init(&drm->ttm.bdev,
  &nouveau_bo_driver,
  dev->anon_inode->i_mapping,
+NULL,
  drm->client.mmu.dmabits <= 32 ? true : false);
if (ret) {
NV_ERROR(drm, "error initialising bo driver, %d\n", ret);
diff --git a/drivers/gpu/drm/qxl/qxl_ttm.c b/drivers/gpu/drm/qxl/qxl_ttm.c
index 9b24514c75aa..69da0eea6e4c 100644
--- a/drivers/gpu/drm/qxl/qxl_ttm.c
+++ b/drivers/gpu/drm/qxl/qxl_ttm.c
@@ -325,6 +325,7 @@ int qxl_ttm_init(struct qxl_device *qdev)
r = ttm_bo_device_init(&qdev->mman.bdev,
   &qxl_bo_driver,
   qdev->ddev.anon_inode