Scanout bo's which are dmabuf backed in RAM and
imported via prime will not update their content
with new rendering from the renderoffload gpu
once they've been flipped onto the scanout once.
The reason is that at preparation of first flip
they get pinned into VRAM, then unpinned at some
later point, but they stay in the VRAM memory domain,
so updates to the system RAM dmabuf object by the
exporting render offload gpu don't lead to updates
of the content in VRAM - it becomes stale.

For prime imported dmabufs we solve this by first
pinning the bo into GTT, which will reset the bos
domain back to GTT, then unpinning again, so the
followup pinning into VRAM will actually upload an up
to date display buffer from dmabuf GTT backing store.

During the pinning into GTT, we skip the actual data move
from VRAM to GTT to avoid a needless bo copy of stale
image data.

Signed-off-by: Mario Kleiner <mario.kleiner.de at gmail.com>
---
 drivers/gpu/drm/nouveau/nouveau_bo.c      | 35 +++++++++++++++++++++++++++++--
 drivers/gpu/drm/nouveau/nouveau_bo.h      |  1 +
 drivers/gpu/drm/nouveau/nouveau_display.c | 17 +++++++++++++++
 drivers/gpu/drm/nouveau/nouveau_prime.c   |  1 +
 4 files changed, 52 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/nouveau/nouveau_bo.c 
b/drivers/gpu/drm/nouveau/nouveau_bo.c
index 6190035..87052e4 100644
--- a/drivers/gpu/drm/nouveau/nouveau_bo.c
+++ b/drivers/gpu/drm/nouveau/nouveau_bo.c
@@ -38,6 +38,18 @@
 #include "nouveau_ttm.h"
 #include "nouveau_gem.h"

+static inline bool nouveau_dmabuf_skip_op(struct ttm_buffer_object *bo,
+                                         struct ttm_mem_reg *new_mem)
+{
+       struct nouveau_bo *nvbo = nouveau_bo(bo);
+
+       /*
+        * Return true if a expensive operation as part of a dmabuf
+        * bo copy from VRAM to GTT can be skipped on this bo.
+        */
+       return nvbo->prime_imported && new_mem && new_mem->mem_type == 
TTM_PL_TT;
+}
+
 /*
  * NV10-NV40 tiling helpers
  */
@@ -1026,13 +1038,15 @@ nouveau_bo_move_m2mf(struct ttm_buffer_object *bo, int 
evict, bool intr,
        struct nouveau_channel *chan = drm->ttm.chan;
        struct nouveau_cli *cli = (void *)chan->user.client;
        struct nouveau_fence *fence;
+       bool skip_prime = !evict && nouveau_dmabuf_skip_op(bo, new_mem);
        int ret;

        /* create temporary vmas for the transfer and attach them to the
         * old nvkm_mem node, these will get cleaned up after ttm has
         * destroyed the ttm_mem_reg
         */
-       if (drm->device.info.family >= NV_DEVICE_INFO_V0_TESLA) {
+       if (drm->device.info.family >= NV_DEVICE_INFO_V0_TESLA &&
+           !skip_prime) {
                ret = nouveau_bo_move_prep(drm, bo, new_mem);
                if (ret)
                        return ret;
@@ -1041,7 +1055,21 @@ nouveau_bo_move_m2mf(struct ttm_buffer_object *bo, int 
evict, bool intr,
        mutex_lock_nested(&cli->mutex, SINGLE_DEPTH_NESTING);
        ret = nouveau_fence_sync(nouveau_bo(bo), chan, true, intr);
        if (ret == 0) {
-               ret = drm->ttm.move(chan, bo, &bo->mem, new_mem);
+               /*
+                * For prime-imported dmabufs which are page-flipped to the
+                * display as scanout bo's and thereby pinned into VRAM, we
+                * need to do a pseudo-move back into GTT memory domain once
+                * they are replaced by a new scanout bo. This to enforce an
+                * update to the new content from dmabuf storage at next flip,
+                * otherwise we'd display a stale image. The move back into
+                * GTT goes through most "administrative moves" of a real
+                * bo move, but we skip the actual copy of the now stale old
+                * image data from VRAM back to GTT dmabuf backing to save a
+                * useless copy.
+                */
+               if (!skip_prime)
+                       ret = drm->ttm.move(chan, bo, &bo->mem, new_mem);
+
                if (ret == 0) {
                        ret = nouveau_fence_new(chan, false, &fence);
                        if (ret == 0) {
@@ -1202,6 +1230,9 @@ nouveau_bo_move_ntfy(struct ttm_buffer_object *bo, struct 
ttm_mem_reg *new_mem)
        if (bo->destroy != nouveau_bo_del_ttm)
                return;

+       if (nouveau_dmabuf_skip_op(bo, new_mem))
+               return;
+
        list_for_each_entry(vma, &nvbo->vma_list, head) {
                if (new_mem && new_mem->mem_type != TTM_PL_SYSTEM &&
                              (new_mem->mem_type == TTM_PL_VRAM ||
diff --git a/drivers/gpu/drm/nouveau/nouveau_bo.h 
b/drivers/gpu/drm/nouveau/nouveau_bo.h
index e423609..4e415e0 100644
--- a/drivers/gpu/drm/nouveau/nouveau_bo.h
+++ b/drivers/gpu/drm/nouveau/nouveau_bo.h
@@ -39,6 +39,7 @@ struct nouveau_bo {
        int pin_refcnt;

        struct ttm_bo_kmap_obj dma_buf_vmap;
+       bool prime_imported;
 };

 static inline struct nouveau_bo *
diff --git a/drivers/gpu/drm/nouveau/nouveau_display.c 
b/drivers/gpu/drm/nouveau/nouveau_display.c
index afbf557..bb49159 100644
--- a/drivers/gpu/drm/nouveau/nouveau_display.c
+++ b/drivers/gpu/drm/nouveau/nouveau_display.c
@@ -736,6 +736,22 @@ nouveau_crtc_page_flip(struct drm_crtc *crtc, struct 
drm_framebuffer *fb,
                return -ENOMEM;

        if (new_bo != old_bo) {
+               /* Is this a scanout buffer from an imported prime dmabuf? */
+               if (new_bo->prime_imported && !new_bo->pin_refcnt) {
+                       /*
+                        * Pretend it "moved out" of VRAM, so a fresh copy of
+                        * new dmabuf content from export gpu gets reuploaded
+                        * from GTT backing store when pinning into VRAM.
+                        */
+                       DRM_DEBUG_PRIME("Flip to prime imported dmabuf %p\n",
+                                       new_bo);
+                       if (nouveau_bo_pin(new_bo, TTM_PL_FLAG_TT, false))
+                               DRM_ERROR("Fail gtt pin imported buf %p\n",
+                                         new_bo);
+                       else
+                               nouveau_bo_unpin(new_bo);
+               }
+
                ret = nouveau_bo_pin(new_bo, TTM_PL_FLAG_VRAM, true);
                if (ret)
                        goto fail_free;
@@ -808,6 +824,7 @@ nouveau_crtc_page_flip(struct drm_crtc *crtc, struct 
drm_framebuffer *fb,
        ttm_bo_unreserve(&old_bo->bo);
        if (old_bo != new_bo)
                nouveau_bo_unpin(old_bo);
+
        nouveau_fence_unref(&fence);
        return 0;

diff --git a/drivers/gpu/drm/nouveau/nouveau_prime.c 
b/drivers/gpu/drm/nouveau/nouveau_prime.c
index a0a9704..2bd76f6 100644
--- a/drivers/gpu/drm/nouveau/nouveau_prime.c
+++ b/drivers/gpu/drm/nouveau/nouveau_prime.c
@@ -75,6 +75,7 @@ struct drm_gem_object 
*nouveau_gem_prime_import_sg_table(struct drm_device *dev,
                return ERR_PTR(ret);

        nvbo->valid_domains = NOUVEAU_GEM_DOMAIN_GART;
+       nvbo->prime_imported = true;

        /* Initialize the embedded gem-object. We return a single gem-reference
         * to the caller, instead of a normal nouveau_bo ttm reference. */
-- 
2.7.0

Reply via email to