Hi, Maarten,

As you know I have been having my doubts about this change.
To me it seems insane to be forced to read the fence pointer under a
reserved lock, simply because when you take the reserve lock, another
process may have it and there is a substantial chance that that process
will also be waiting for idle while holding the reserve lock.

In essence, to read the fence pointer, there is a large chance you will
be waiting for idle to be able to access it. That's why it's protected by
a separate spinlock in the first place.

So even if this change might not affect current driver much it's a change
to the TTM api that leads to an IMHO very poor design.

/Thomas


On 10/12/2012 05:23 PM, Maarten Lankhorst wrote:
> With the nouveau calls fixed there were only 2 places left that used
> fence_lock without a reservation, those are fixed in this patch:
>
> ttm_bo_cleanup_refs_or_queue is fixed by simply doing things the other
> way around.
>
> ttm_bo_cleanup_refs is fixed by taking a reservation first, then a pointer
> to the fence object and backs off from the reservation if waiting has to
> be performed.
>
> Signed-off-by: Maarten Lankhorst <maarten.lankhorst at canonical.com>
> ---
> This patch should be applied only after all the previous patches I sent are 
> applied,
> since it depends on sync_obj_arg removal (kinda, probably fails to apply 
> otherwise),
> uses ttm_bo_is_reserved, and depends on nouveau wait behavior being fixed.
>
>   drivers/gpu/drm/nouveau/nouveau_bo.c   |    4 -
>   drivers/gpu/drm/nouveau/nouveau_gem.c  |   15 +---
>   drivers/gpu/drm/radeon/radeon_object.c |    2 -
>   drivers/gpu/drm/ttm/ttm_bo.c           |  124 
> ++++++++++++--------------------
>   drivers/gpu/drm/ttm/ttm_bo_util.c      |    3 -
>   drivers/gpu/drm/ttm/ttm_bo_vm.c        |    5 -
>   drivers/gpu/drm/ttm/ttm_execbuf_util.c |    2 -
>   drivers/gpu/drm/vmwgfx/vmwgfx_drv.c    |    3 -
>   include/drm/ttm/ttm_bo_api.h           |   12 +--
>   include/drm/ttm/ttm_bo_driver.h        |    3 -
>   10 files changed, 52 insertions(+), 121 deletions(-)
>
> diff --git a/drivers/gpu/drm/nouveau/nouveau_bo.c 
> b/drivers/gpu/drm/nouveau/nouveau_bo.c
> index cee7448..9749c45 100644
> --- a/drivers/gpu/drm/nouveau/nouveau_bo.c
> +++ b/drivers/gpu/drm/nouveau/nouveau_bo.c
> @@ -1449,10 +1449,8 @@ nouveau_bo_fence(struct nouveau_bo *nvbo, struct 
> nouveau_fence *fence)
>       if (likely(fence))
>               nouveau_fence_ref(fence);
>   
> -     spin_lock(&nvbo->bo.bdev->fence_lock);
>       old_fence = nvbo->bo.sync_obj;
>       nvbo->bo.sync_obj = fence;
> -     spin_unlock(&nvbo->bo.bdev->fence_lock);
>   
>       nouveau_fence_unref(&old_fence);
>   }
> @@ -1552,9 +1550,7 @@ nouveau_bo_vma_del(struct nouveau_bo *nvbo, struct 
> nouveau_vma *vma)
>       if (vma->node) {
>               if (nvbo->bo.mem.mem_type != TTM_PL_SYSTEM) {
>                       ttm_bo_reserve(&nvbo->bo, false, false, false, 0);
> -                     spin_lock(&nvbo->bo.bdev->fence_lock);
>                       ttm_bo_wait(&nvbo->bo, false, false, false);
> -                     spin_unlock(&nvbo->bo.bdev->fence_lock);
>                       ttm_bo_unreserve(&nvbo->bo);
>                       nouveau_vm_unmap(vma);
>               }
> diff --git a/drivers/gpu/drm/nouveau/nouveau_gem.c 
> b/drivers/gpu/drm/nouveau/nouveau_gem.c
> index 6d8391d..eaa700a 100644
> --- a/drivers/gpu/drm/nouveau/nouveau_gem.c
> +++ b/drivers/gpu/drm/nouveau/nouveau_gem.c
> @@ -391,18 +391,11 @@ retry:
>   static int
>   validate_sync(struct nouveau_channel *chan, struct nouveau_bo *nvbo)
>   {
> -     struct nouveau_fence *fence = NULL;
> +     struct nouveau_fence *fence = nvbo->bo.sync_obj;
>       int ret = 0;
>   
> -     spin_lock(&nvbo->bo.bdev->fence_lock);
> -     if (nvbo->bo.sync_obj)
> -             fence = nouveau_fence_ref(nvbo->bo.sync_obj);
> -     spin_unlock(&nvbo->bo.bdev->fence_lock);
> -
> -     if (fence) {
> +     if (fence)
>               ret = nouveau_fence_sync(fence, chan);
> -             nouveau_fence_unref(&fence);
> -     }
>   
>       return ret;
>   }
> @@ -614,9 +607,7 @@ nouveau_gem_pushbuf_reloc_apply(struct drm_device *dev,
>                               data |= r->vor;
>               }
>   
> -             spin_lock(&nvbo->bo.bdev->fence_lock);
>               ret = ttm_bo_wait(&nvbo->bo, false, false, false);
> -             spin_unlock(&nvbo->bo.bdev->fence_lock);
>               if (ret) {
>                       NV_ERROR(drm, "reloc wait_idle failed: %d\n", ret);
>                       break;
> @@ -848,11 +839,9 @@ nouveau_gem_ioctl_cpu_prep(struct drm_device *dev, void 
> *data,
>   
>       ret = ttm_bo_reserve(&nvbo->bo, true, false, false, 0);
>       if (!ret) {
> -             spin_lock(&nvbo->bo.bdev->fence_lock);
>               ret = ttm_bo_wait(&nvbo->bo, true, true, true);
>               if (!no_wait && ret)
>                       fence = nouveau_fence_ref(nvbo->bo.sync_obj);
> -             spin_unlock(&nvbo->bo.bdev->fence_lock);
>   
>               ttm_bo_unreserve(&nvbo->bo);
>       }
> diff --git a/drivers/gpu/drm/radeon/radeon_object.c 
> b/drivers/gpu/drm/radeon/radeon_object.c
> index 808c444..df430e4 100644
> --- a/drivers/gpu/drm/radeon/radeon_object.c
> +++ b/drivers/gpu/drm/radeon/radeon_object.c
> @@ -613,12 +613,10 @@ int radeon_bo_wait(struct radeon_bo *bo, u32 *mem_type, 
> bool no_wait)
>       r = ttm_bo_reserve(&bo->tbo, true, no_wait, false, 0);
>       if (unlikely(r != 0))
>               return r;
> -     spin_lock(&bo->tbo.bdev->fence_lock);
>       if (mem_type)
>               *mem_type = bo->tbo.mem.mem_type;
>       if (bo->tbo.sync_obj)
>               r = ttm_bo_wait(&bo->tbo, true, true, no_wait);
> -     spin_unlock(&bo->tbo.bdev->fence_lock);
>       ttm_bo_unreserve(&bo->tbo);
>       return r;
>   }
> diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c
> index f6d7026..69fc29b 100644
> --- a/drivers/gpu/drm/ttm/ttm_bo.c
> +++ b/drivers/gpu/drm/ttm/ttm_bo.c
> @@ -498,28 +498,23 @@ static void ttm_bo_cleanup_refs_or_queue(struct 
> ttm_buffer_object *bo)
>   {
>       struct ttm_bo_device *bdev = bo->bdev;
>       struct ttm_bo_global *glob = bo->glob;
> -     struct ttm_bo_driver *driver;
> +     struct ttm_bo_driver *driver = bdev->driver;
>       void *sync_obj = NULL;
>       int put_count;
>       int ret;
>   
> -     spin_lock(&bdev->fence_lock);
> -     (void) ttm_bo_wait(bo, false, false, true);
> -     if (!bo->sync_obj) {
> -
> -             spin_lock(&glob->lru_lock);
> -
> -             /**
> -              * Lock inversion between bo:reserve and bdev::fence_lock here,
> -              * but that's OK, since we're only trylocking.
> -              */
> -
> -             ret = ttm_bo_reserve_locked(bo, false, true, false, 0);
> +     spin_lock(&glob->lru_lock);
> +     ret = ttm_bo_reserve_locked(bo, false, true, false, 0);
> +     if (!ret) {
> +             ret = ttm_bo_wait(bo, false, false, true);
>   
> -             if (unlikely(ret == -EBUSY))
> +             if (unlikely(ret == -EBUSY)) {
> +                     sync_obj = driver->sync_obj_ref(bo->sync_obj);
> +                     atomic_set(&bo->reserved, 0);
> +                     wake_up_all(&bo->event_queue);
>                       goto queue;
> +             }
>   
> -             spin_unlock(&bdev->fence_lock);
>               put_count = ttm_bo_del_from_lru(bo);
>   
>               spin_unlock(&glob->lru_lock);
> @@ -528,18 +523,11 @@ static void ttm_bo_cleanup_refs_or_queue(struct 
> ttm_buffer_object *bo)
>               ttm_bo_list_ref_sub(bo, put_count, true);
>   
>               return;
> -     } else {
> -             spin_lock(&glob->lru_lock);
>       }
>   queue:
> -     driver = bdev->driver;
> -     if (bo->sync_obj)
> -             sync_obj = driver->sync_obj_ref(bo->sync_obj);
> -
>       kref_get(&bo->list_kref);
>       list_add_tail(&bo->ddestroy, &bdev->ddestroy);
>       spin_unlock(&glob->lru_lock);
> -     spin_unlock(&bdev->fence_lock);
>   
>       if (sync_obj) {
>               driver->sync_obj_flush(sync_obj);
> @@ -565,25 +553,15 @@ static int ttm_bo_cleanup_refs(struct ttm_buffer_object 
> *bo,
>                              bool no_wait_gpu)
>   {
>       struct ttm_bo_device *bdev = bo->bdev;
> +     struct ttm_bo_driver *driver = bdev->driver;
>       struct ttm_bo_global *glob = bo->glob;
>       int put_count;
>       int ret = 0;
> +     void *sync_obj;
>   
>   retry:
> -     spin_lock(&bdev->fence_lock);
> -     ret = ttm_bo_wait(bo, false, interruptible, no_wait_gpu);
> -     spin_unlock(&bdev->fence_lock);
> -
> -     if (unlikely(ret != 0))
> -             return ret;
> -
>       spin_lock(&glob->lru_lock);
>   
> -     if (unlikely(list_empty(&bo->ddestroy))) {
> -             spin_unlock(&glob->lru_lock);
> -             return 0;
> -     }
> -
>       ret = ttm_bo_reserve_locked(bo, interruptible,
>                                   no_wait_reserve, false, 0);
>   
> @@ -592,18 +570,37 @@ retry:
>               return ret;
>       }
>   
> -     /**
> -      * We can re-check for sync object without taking
> -      * the bo::lock since setting the sync object requires
> -      * also bo::reserved. A busy object at this point may
> -      * be caused by another thread recently starting an accelerated
> -      * eviction.
> -      */
> +     if (unlikely(list_empty(&bo->ddestroy))) {
> +             atomic_set(&bo->reserved, 0);
> +             wake_up_all(&bo->event_queue);
> +             spin_unlock(&glob->lru_lock);
> +             return 0;
> +     }
> +     ret = ttm_bo_wait(bo, false, false, true);
> +
> +     if (ret) {
> +             if (no_wait_gpu) {
> +                     atomic_set(&bo->reserved, 0);
> +                     wake_up_all(&bo->event_queue);
> +                     spin_unlock(&glob->lru_lock);
> +                     return ret;
> +             }
>   
> -     if (unlikely(bo->sync_obj)) {
> +             /**
> +              * Take a reference to the fence and unreserve, if the wait
> +              * was succesful and no new sync_obj was attached,
> +              * ttm_bo_wait in retry will return ret = 0, and end the loop.
> +              */
> +
> +             sync_obj = driver->sync_obj_ref(&bo->sync_obj);
>               atomic_set(&bo->reserved, 0);
>               wake_up_all(&bo->event_queue);
>               spin_unlock(&glob->lru_lock);
> +
> +             ret = driver->sync_obj_wait(bo->sync_obj, false, interruptible);
> +             driver->sync_obj_unref(&sync_obj);
> +             if (ret)
> +                     return ret;
>               goto retry;
>       }
>   
> @@ -735,9 +732,7 @@ static int ttm_bo_evict(struct ttm_buffer_object *bo, 
> bool interruptible,
>       struct ttm_placement placement;
>       int ret = 0;
>   
> -     spin_lock(&bdev->fence_lock);
>       ret = ttm_bo_wait(bo, false, interruptible, no_wait_gpu);
> -     spin_unlock(&bdev->fence_lock);
>   
>       if (unlikely(ret != 0)) {
>               if (ret != -ERESTARTSYS) {
> @@ -1054,7 +1049,6 @@ int ttm_bo_move_buffer(struct ttm_buffer_object *bo,
>   {
>       int ret = 0;
>       struct ttm_mem_reg mem;
> -     struct ttm_bo_device *bdev = bo->bdev;
>   
>       BUG_ON(!ttm_bo_is_reserved(bo));
>   
> @@ -1063,9 +1057,7 @@ int ttm_bo_move_buffer(struct ttm_buffer_object *bo,
>        * Have the driver move function wait for idle when necessary,
>        * instead of doing it here.
>        */
> -     spin_lock(&bdev->fence_lock);
>       ret = ttm_bo_wait(bo, false, interruptible, no_wait_gpu);
> -     spin_unlock(&bdev->fence_lock);
>       if (ret)
>               return ret;
>       mem.num_pages = bo->num_pages;
> @@ -1554,7 +1546,6 @@ int ttm_bo_device_init(struct ttm_bo_device *bdev,
>       bdev->glob = glob;
>       bdev->need_dma32 = need_dma32;
>       bdev->val_seq = 0;
> -     spin_lock_init(&bdev->fence_lock);
>       mutex_lock(&glob->device_list_mutex);
>       list_add_tail(&bdev->device_list, &glob->device_list);
>       mutex_unlock(&glob->device_list_mutex);
> @@ -1690,52 +1681,33 @@ int ttm_bo_wait(struct ttm_buffer_object *bo,
>               bool lazy, bool interruptible, bool no_wait)
>   {
>       struct ttm_bo_driver *driver = bo->bdev->driver;
> -     struct ttm_bo_device *bdev = bo->bdev;
> -     void *sync_obj;
>       int ret = 0;
>   
> +     WARN_ON_ONCE(!ttm_bo_is_reserved(bo));
> +
>       if (likely(bo->sync_obj == NULL))
>               return 0;
>   
> -     while (bo->sync_obj) {
> -
> +     if (bo->sync_obj) {
>               if (driver->sync_obj_signaled(bo->sync_obj)) {
>                       void *tmp_obj = bo->sync_obj;
>                       bo->sync_obj = NULL;
>                       clear_bit(TTM_BO_PRIV_FLAG_MOVING, &bo->priv_flags);
> -                     spin_unlock(&bdev->fence_lock);
>                       driver->sync_obj_unref(&tmp_obj);
> -                     spin_lock(&bdev->fence_lock);
> -                     continue;
> +                     return 0;
>               }
>   
>               if (no_wait)
>                       return -EBUSY;
>   
> -             sync_obj = driver->sync_obj_ref(bo->sync_obj);
> -             spin_unlock(&bdev->fence_lock);
> -             ret = driver->sync_obj_wait(sync_obj,
> +             ret = driver->sync_obj_wait(bo->sync_obj,
>                                           lazy, interruptible);
>               if (unlikely(ret != 0)) {
> -                     driver->sync_obj_unref(&sync_obj);
> -                     spin_lock(&bdev->fence_lock);
>                       return ret;
>               }
> -             spin_lock(&bdev->fence_lock);
> -             if (likely(bo->sync_obj == sync_obj)) {
> -                     void *tmp_obj = bo->sync_obj;
> -                     bo->sync_obj = NULL;
> -                     clear_bit(TTM_BO_PRIV_FLAG_MOVING,
> -                               &bo->priv_flags);
> -                     spin_unlock(&bdev->fence_lock);
> -                     driver->sync_obj_unref(&sync_obj);
> -                     driver->sync_obj_unref(&tmp_obj);
> -                     spin_lock(&bdev->fence_lock);
> -             } else {
> -                     spin_unlock(&bdev->fence_lock);
> -                     driver->sync_obj_unref(&sync_obj);
> -                     spin_lock(&bdev->fence_lock);
> -             }
> +             driver->sync_obj_unref(&bo->sync_obj);
> +             clear_bit(TTM_BO_PRIV_FLAG_MOVING,
> +                       &bo->priv_flags);
>       }
>       return 0;
>   }
> @@ -1799,9 +1771,7 @@ static int ttm_bo_swapout(struct ttm_mem_shrink *shrink)
>        * Wait for GPU, then move to system cached.
>        */
>   
> -     spin_lock(&bo->bdev->fence_lock);
>       ret = ttm_bo_wait(bo, false, false, false);
> -     spin_unlock(&bo->bdev->fence_lock);
>   
>       if (unlikely(ret != 0))
>               goto out;
> diff --git a/drivers/gpu/drm/ttm/ttm_bo_util.c 
> b/drivers/gpu/drm/ttm/ttm_bo_util.c
> index d80d1e8..84d6e01 100644
> --- a/drivers/gpu/drm/ttm/ttm_bo_util.c
> +++ b/drivers/gpu/drm/ttm/ttm_bo_util.c
> @@ -622,7 +622,6 @@ int ttm_bo_move_accel_cleanup(struct ttm_buffer_object 
> *bo,
>       struct ttm_buffer_object *ghost_obj;
>       void *tmp_obj = NULL;
>   
> -     spin_lock(&bdev->fence_lock);
>       if (bo->sync_obj) {
>               tmp_obj = bo->sync_obj;
>               bo->sync_obj = NULL;
> @@ -630,7 +629,6 @@ int ttm_bo_move_accel_cleanup(struct ttm_buffer_object 
> *bo,
>       bo->sync_obj = driver->sync_obj_ref(sync_obj);
>       if (evict) {
>               ret = ttm_bo_wait(bo, false, false, false);
> -             spin_unlock(&bdev->fence_lock);
>               if (tmp_obj)
>                       driver->sync_obj_unref(&tmp_obj);
>               if (ret)
> @@ -653,7 +651,6 @@ int ttm_bo_move_accel_cleanup(struct ttm_buffer_object 
> *bo,
>                */
>   
>               set_bit(TTM_BO_PRIV_FLAG_MOVING, &bo->priv_flags);
> -             spin_unlock(&bdev->fence_lock);
>               if (tmp_obj)
>                       driver->sync_obj_unref(&tmp_obj);
>   
> diff --git a/drivers/gpu/drm/ttm/ttm_bo_vm.c b/drivers/gpu/drm/ttm/ttm_bo_vm.c
> index a877813..55718c2 100644
> --- a/drivers/gpu/drm/ttm/ttm_bo_vm.c
> +++ b/drivers/gpu/drm/ttm/ttm_bo_vm.c
> @@ -122,17 +122,14 @@ static int ttm_bo_vm_fault(struct vm_area_struct *vma, 
> struct vm_fault *vmf)
>        * move.
>        */
>   
> -     spin_lock(&bdev->fence_lock);
>       if (test_bit(TTM_BO_PRIV_FLAG_MOVING, &bo->priv_flags)) {
>               ret = ttm_bo_wait(bo, false, true, false);
> -             spin_unlock(&bdev->fence_lock);
>               if (unlikely(ret != 0)) {
>                       retval = (ret != -ERESTARTSYS) ?
>                           VM_FAULT_SIGBUS : VM_FAULT_NOPAGE;
>                       goto out_unlock;
>               }
> -     } else
> -             spin_unlock(&bdev->fence_lock);
> +     }
>   
>       ret = ttm_mem_io_lock(man, true);
>       if (unlikely(ret != 0)) {
> diff --git a/drivers/gpu/drm/ttm/ttm_execbuf_util.c 
> b/drivers/gpu/drm/ttm/ttm_execbuf_util.c
> index 721886e..51b5e97 100644
> --- a/drivers/gpu/drm/ttm/ttm_execbuf_util.c
> +++ b/drivers/gpu/drm/ttm/ttm_execbuf_util.c
> @@ -207,7 +207,6 @@ void ttm_eu_fence_buffer_objects(struct list_head *list, 
> void *sync_obj)
>       driver = bdev->driver;
>       glob = bo->glob;
>   
> -     spin_lock(&bdev->fence_lock);
>       spin_lock(&glob->lru_lock);
>   
>       list_for_each_entry(entry, list, head) {
> @@ -218,7 +217,6 @@ void ttm_eu_fence_buffer_objects(struct list_head *list, 
> void *sync_obj)
>               entry->reserved = false;
>       }
>       spin_unlock(&glob->lru_lock);
> -     spin_unlock(&bdev->fence_lock);
>   
>       list_for_each_entry(entry, list, head) {
>               if (entry->old_sync_obj)
> diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c 
> b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c
> index ed3c1e7..e038c9a 100644
> --- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c
> +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c
> @@ -251,13 +251,10 @@ static void vmw_dummy_query_bo_prepare(struct 
> vmw_private *dev_priv)
>       volatile SVGA3dQueryResult *result;
>       bool dummy;
>       int ret;
> -     struct ttm_bo_device *bdev = &dev_priv->bdev;
>       struct ttm_buffer_object *bo = dev_priv->dummy_query_bo;
>   
>       ttm_bo_reserve(bo, false, false, false, 0);
> -     spin_lock(&bdev->fence_lock);
>       ret = ttm_bo_wait(bo, false, false, false);
> -     spin_unlock(&bdev->fence_lock);
>       if (unlikely(ret != 0))
>               (void) vmw_fallback_wait(dev_priv, false, true, 0, false,
>                                        10*HZ);
> diff --git a/include/drm/ttm/ttm_bo_api.h b/include/drm/ttm/ttm_bo_api.h
> index 816d9b1..6c69224 100644
> --- a/include/drm/ttm/ttm_bo_api.h
> +++ b/include/drm/ttm/ttm_bo_api.h
> @@ -222,6 +222,8 @@ struct ttm_buffer_object {
>       struct file *persistent_swap_storage;
>       struct ttm_tt *ttm;
>       bool evicted;
> +     void *sync_obj;
> +     unsigned long priv_flags;
>   
>       /**
>        * Members protected by the bdev::lru_lock.
> @@ -242,16 +244,6 @@ struct ttm_buffer_object {
>       atomic_t reserved;
>   
>       /**
> -      * Members protected by struct buffer_object_device::fence_lock
> -      * In addition, setting sync_obj to anything else
> -      * than NULL requires bo::reserved to be held. This allows for
> -      * checking NULL while reserved but not holding the mentioned lock.
> -      */
> -
> -     void *sync_obj;
> -     unsigned long priv_flags;
> -
> -     /**
>        * Members protected by the bdev::vm_lock
>        */
>   
> diff --git a/include/drm/ttm/ttm_bo_driver.h b/include/drm/ttm/ttm_bo_driver.h
> index 0c8c3b5..aac101b 100644
> --- a/include/drm/ttm/ttm_bo_driver.h
> +++ b/include/drm/ttm/ttm_bo_driver.h
> @@ -515,8 +515,6 @@ struct ttm_bo_global {
>    *
>    * @driver: Pointer to a struct ttm_bo_driver struct setup by the driver.
>    * @man: An array of mem_type_managers.
> - * @fence_lock: Protects the synchronizing members on *all* bos belonging
> - * to this device.
>    * @addr_space_mm: Range manager for the device address space.
>    * lru_lock: Spinlock that protects the buffer+device lru lists and
>    * ddestroy lists.
> @@ -539,7 +537,6 @@ struct ttm_bo_device {
>       struct ttm_bo_driver *driver;
>       rwlock_t vm_lock;
>       struct ttm_mem_type_manager man[TTM_NUM_MEM_TYPES];
> -     spinlock_t fence_lock;
>       /*
>        * Protected by the vm lock.
>        */
>
> _______________________________________________
> dri-devel mailing list
> dri-devel at lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/dri-devel



Reply via email to