This blit was previously performed using two large vmaps, one of which was teared down and remapped on each blit. Use the more resource- conserving TTM cpu blit instead.
The blit is used in boundary-box computing mode which makes it possible to minimize the bounding box used in host operations. Signed-off-by: Thomas Hellstrom <thellst...@vmware.com> Reviewed-by: Brian Paul <bri...@vmware.com> --- drivers/gpu/drm/vmwgfx/vmwgfx_buffer.c | 23 ++++++++ drivers/gpu/drm/vmwgfx/vmwgfx_drv.h | 1 + drivers/gpu/drm/vmwgfx/vmwgfx_kms.c | 50 ++++++++++------ drivers/gpu/drm/vmwgfx/vmwgfx_kms.h | 4 +- drivers/gpu/drm/vmwgfx/vmwgfx_scrn.c | 5 +- drivers/gpu/drm/vmwgfx/vmwgfx_stdu.c | 105 +++++++++++---------------------- 6 files changed, 97 insertions(+), 91 deletions(-) diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_buffer.c b/drivers/gpu/drm/vmwgfx/vmwgfx_buffer.c index 22231bc..2fe099a 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_buffer.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_buffer.c @@ -185,6 +185,22 @@ static const struct ttm_place evictable_placement_flags[] = { } }; +static const struct ttm_place nonfixed_placement_flags[] = { + { + .fpfn = 0, + .lpfn = 0, + .flags = TTM_PL_FLAG_SYSTEM | TTM_PL_FLAG_CACHED + }, { + .fpfn = 0, + .lpfn = 0, + .flags = VMW_PL_FLAG_GMR | TTM_PL_FLAG_CACHED + }, { + .fpfn = 0, + .lpfn = 0, + .flags = VMW_PL_FLAG_MOB | TTM_PL_FLAG_CACHED + } +}; + struct ttm_placement vmw_evictable_placement = { .num_placement = 4, .placement = evictable_placement_flags, @@ -213,6 +229,13 @@ struct ttm_placement vmw_mob_ne_placement = { .busy_placement = &mob_ne_placement_flags }; +struct ttm_placement vmw_nonfixed_placement = { + .num_placement = 3, + .placement = nonfixed_placement_flags, + .num_busy_placement = 1, + .busy_placement = &sys_placement_flags +}; + struct vmw_ttm_tt { struct ttm_dma_tt dma_ttm; struct vmw_private *dev_priv; diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h index 99e7e426..62e567a 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h @@ -767,6 +767,7 @@ extern struct ttm_placement vmw_evictable_placement; extern struct ttm_placement vmw_srf_placement; extern struct ttm_placement vmw_mob_placement; extern struct ttm_placement vmw_mob_ne_placement; +extern struct ttm_placement vmw_nonfixed_placement; extern struct ttm_bo_driver vmw_bo_driver; extern int vmw_dma_quiescent(struct drm_device *dev); extern int vmw_bo_map_dma(struct ttm_buffer_object *bo); diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c b/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c index 96737d5..a62d0f8 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c @@ -692,9 +692,6 @@ vmw_du_plane_duplicate_state(struct drm_plane *plane) return NULL; vps->pinned = 0; - - /* Mapping is managed by prepare_fb/cleanup_fb */ - memset(&vps->host_map, 0, sizeof(vps->host_map)); vps->cpp = 0; /* Each ref counted resource needs to be acquired again */ @@ -756,11 +753,6 @@ vmw_du_plane_destroy_state(struct drm_plane *plane, /* Should have been freed by cleanup_fb */ - if (vps->host_map.virtual) { - DRM_ERROR("Host mapping not freed\n"); - ttm_bo_kunmap(&vps->host_map); - } - if (vps->surf) vmw_surface_unreference(&vps->surf); @@ -1139,12 +1131,14 @@ static const struct drm_framebuffer_funcs vmw_framebuffer_dmabuf_funcs = { }; /** - * Pin the dmabuffer to the start of vram. + * Pin the dmabuffer in a location suitable for access by the + * display system. */ static int vmw_framebuffer_pin(struct vmw_framebuffer *vfb) { struct vmw_private *dev_priv = vmw_priv(vfb->base.dev); struct vmw_dma_buffer *buf; + struct ttm_placement *placement; int ret; buf = vfb->dmabuf ? vmw_framebuffer_to_vfbd(&vfb->base)->buffer : @@ -1161,12 +1155,24 @@ static int vmw_framebuffer_pin(struct vmw_framebuffer *vfb) break; case vmw_du_screen_object: case vmw_du_screen_target: - if (vfb->dmabuf) - return vmw_dmabuf_pin_in_vram_or_gmr(dev_priv, buf, - false); + if (vfb->dmabuf) { + if (dev_priv->capabilities & SVGA_CAP_3D) { + /* + * Use surface DMA to get content to + * sreen target surface. + */ + placement = &vmw_vram_gmr_placement; + } else { + /* Use CPU blit. */ + placement = &vmw_sys_placement; + } + } else { + /* Use surface / image update */ + placement = &vmw_mob_placement; + } - return vmw_dmabuf_pin_in_placement(dev_priv, buf, - &vmw_mob_placement, false); + return vmw_dmabuf_pin_in_placement(dev_priv, buf, placement, + false); default: return -EINVAL; } @@ -2429,14 +2435,21 @@ int vmw_kms_helper_dirty(struct vmw_private *dev_priv, int vmw_kms_helper_buffer_prepare(struct vmw_private *dev_priv, struct vmw_dma_buffer *buf, bool interruptible, - bool validate_as_mob) + bool validate_as_mob, + bool for_cpu_blit) { + struct ttm_operation_ctx ctx = { + .interruptible = interruptible, + .no_wait_gpu = false}; struct ttm_buffer_object *bo = &buf->base; int ret; ttm_bo_reserve(bo, false, false, NULL); - ret = vmw_validate_single_buffer(dev_priv, bo, interruptible, - validate_as_mob); + if (for_cpu_blit) + ret = ttm_bo_validate(bo, &vmw_nonfixed_placement, &ctx); + else + ret = vmw_validate_single_buffer(dev_priv, bo, interruptible, + validate_as_mob); if (ret) ttm_bo_unreserve(bo); @@ -2548,7 +2561,8 @@ int vmw_kms_helper_resource_prepare(struct vmw_resource *res, if (res->backup) { ret = vmw_kms_helper_buffer_prepare(res->dev_priv, res->backup, interruptible, - res->dev_priv->has_mob); + res->dev_priv->has_mob, + false); if (ret) goto out_unreserve; } diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_kms.h b/drivers/gpu/drm/vmwgfx/vmwgfx_kms.h index 42b0f15..4e8749a 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_kms.h +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_kms.h @@ -177,7 +177,6 @@ struct vmw_plane_state { int pinned; /* For CPU Blit */ - struct ttm_bo_kmap_obj host_map; unsigned int cpp; }; @@ -289,7 +288,8 @@ int vmw_kms_helper_dirty(struct vmw_private *dev_priv, int vmw_kms_helper_buffer_prepare(struct vmw_private *dev_priv, struct vmw_dma_buffer *buf, bool interruptible, - bool validate_as_mob); + bool validate_as_mob, + bool for_cpu_blit); void vmw_kms_helper_buffer_revert(struct vmw_dma_buffer *buf); void vmw_kms_helper_buffer_finish(struct vmw_private *dev_priv, struct drm_file *file_priv, diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_scrn.c b/drivers/gpu/drm/vmwgfx/vmwgfx_scrn.c index 7164724..8a43ba8 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_scrn.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_scrn.c @@ -1032,7 +1032,7 @@ int vmw_kms_sou_do_dmabuf_dirty(struct vmw_private *dev_priv, int ret; ret = vmw_kms_helper_buffer_prepare(dev_priv, buf, interruptible, - false); + false, false); if (ret) return ret; @@ -1130,7 +1130,8 @@ int vmw_kms_sou_readback(struct vmw_private *dev_priv, struct vmw_kms_dirty dirty; int ret; - ret = vmw_kms_helper_buffer_prepare(dev_priv, buf, true, false); + ret = vmw_kms_helper_buffer_prepare(dev_priv, buf, true, false, + false); if (ret) return ret; diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_stdu.c b/drivers/gpu/drm/vmwgfx/vmwgfx_stdu.c index 6de2874..8eec889 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_stdu.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_stdu.c @@ -114,7 +114,6 @@ struct vmw_screen_target_display_unit { bool defined; /* For CPU Blit */ - struct ttm_bo_kmap_obj host_map; unsigned int cpp; }; @@ -639,10 +638,9 @@ static void vmw_stdu_dmabuf_cpu_commit(struct vmw_kms_dirty *dirty) container_of(dirty->unit, typeof(*stdu), base); s32 width, height; s32 src_pitch, dst_pitch; - u8 *src, *dst; - bool not_used; - struct ttm_bo_kmap_obj guest_map; - int ret; + struct ttm_buffer_object *src_bo, *dst_bo; + u32 src_offset, dst_offset; + struct vmw_diff_cpy diff = VMW_CPU_BLIT_DIFF_INITIALIZER(stdu->cpp); if (!dirty->num_hits) return; @@ -653,57 +651,38 @@ static void vmw_stdu_dmabuf_cpu_commit(struct vmw_kms_dirty *dirty) if (width == 0 || height == 0) return; - ret = ttm_bo_kmap(&ddirty->buf->base, 0, ddirty->buf->base.num_pages, - &guest_map); - if (ret) { - DRM_ERROR("Failed mapping framebuffer for blit: %d\n", - ret); - goto out_cleanup; - } - - /* Assume we are blitting from Host (display_srf) to Guest (dmabuf) */ - src_pitch = stdu->display_srf->base_size.width * stdu->cpp; - src = ttm_kmap_obj_virtual(&stdu->host_map, ¬_used); - src += ddirty->top * src_pitch + ddirty->left * stdu->cpp; - - dst_pitch = ddirty->pitch; - dst = ttm_kmap_obj_virtual(&guest_map, ¬_used); - dst += ddirty->fb_top * dst_pitch + ddirty->fb_left * stdu->cpp; + /* Assume we are blitting from Guest (dmabuf) to Host (display_srf) */ + dst_pitch = stdu->display_srf->base_size.width * stdu->cpp; + dst_bo = &stdu->display_srf->res.backup->base; + dst_offset = ddirty->top * dst_pitch + ddirty->left * stdu->cpp; + src_pitch = ddirty->pitch; + src_bo = &ddirty->buf->base; + src_offset = ddirty->fb_top * src_pitch + ddirty->fb_left * stdu->cpp; - /* Figure out the real direction */ - if (ddirty->transfer == SVGA3D_WRITE_HOST_VRAM) { - u8 *tmp; - s32 tmp_pitch; - - tmp = src; - tmp_pitch = src_pitch; - - src = dst; - src_pitch = dst_pitch; - - dst = tmp; - dst_pitch = tmp_pitch; + /* Swap src and dst if the assumption was wrong. */ + if (ddirty->transfer != SVGA3D_WRITE_HOST_VRAM) { + swap(dst_pitch, src_pitch); + swap(dst_bo, src_bo); + swap(src_offset, dst_offset); } - /* CPU Blit */ - while (height-- > 0) { - memcpy(dst, src, width * stdu->cpp); - dst += dst_pitch; - src += src_pitch; - } + (void) vmw_bo_cpu_blit(dst_bo, dst_offset, dst_pitch, + src_bo, src_offset, src_pitch, + width * stdu->cpp, height, &diff); - if (ddirty->transfer == SVGA3D_WRITE_HOST_VRAM) { + if (ddirty->transfer == SVGA3D_WRITE_HOST_VRAM && + drm_rect_visible(&diff.rect)) { struct vmw_private *dev_priv; struct vmw_stdu_update *cmd; struct drm_clip_rect region; int ret; /* We are updating the actual surface, not a proxy */ - region.x1 = ddirty->left; - region.x2 = ddirty->right; - region.y1 = ddirty->top; - region.y2 = ddirty->bottom; + region.x1 = diff.rect.x1; + region.x2 = diff.rect.x2; + region.y1 = diff.rect.y1; + region.y2 = diff.rect.y2; ret = vmw_kms_update_proxy( (struct vmw_resource *) &stdu->display_srf->res, (const struct drm_clip_rect *) ®ion, 1, 1); @@ -720,13 +699,12 @@ static void vmw_stdu_dmabuf_cpu_commit(struct vmw_kms_dirty *dirty) } vmw_stdu_populate_update(cmd, stdu->base.unit, - ddirty->left, ddirty->right, - ddirty->top, ddirty->bottom); + region.x1, region.x2, + region.y1, region.y2); vmw_fifo_commit(dev_priv, sizeof(*cmd)); } - ttm_bo_kunmap(&guest_map); out_cleanup: ddirty->left = ddirty->top = ddirty->fb_left = ddirty->fb_top = S32_MAX; ddirty->right = ddirty->bottom = S32_MIN; @@ -772,9 +750,15 @@ int vmw_kms_stdu_dma(struct vmw_private *dev_priv, container_of(vfb, struct vmw_framebuffer_dmabuf, base)->buffer; struct vmw_stdu_dirty ddirty; int ret; + bool cpu_blit = !(dev_priv->capabilities & SVGA_CAP_3D); + /* + * VMs without 3D support don't have the surface DMA command and + * we'll be using a CPU blit, and the framebuffer should be moved out + * of VRAM. + */ ret = vmw_kms_helper_buffer_prepare(dev_priv, buf, interruptible, - false); + false, cpu_blit); if (ret) return ret; @@ -793,8 +777,8 @@ int vmw_kms_stdu_dma(struct vmw_private *dev_priv, if (to_surface) ddirty.base.fifo_reserve_size += sizeof(struct vmw_stdu_update); - /* 2D VMs cannot use SVGA_3D_CMD_SURFACE_DMA so do CPU blit instead */ - if (!(dev_priv->capabilities & SVGA_CAP_3D)) { + + if (cpu_blit) { ddirty.base.fifo_commit = vmw_stdu_dmabuf_cpu_commit; ddirty.base.clip = vmw_stdu_dmabuf_cpu_clip; ddirty.base.fifo_reserve_size = 0; @@ -1071,9 +1055,6 @@ vmw_stdu_primary_plane_cleanup_fb(struct drm_plane *plane, { struct vmw_plane_state *vps = vmw_plane_state_to_vps(old_state); - if (vps->host_map.virtual) - ttm_bo_kunmap(&vps->host_map); - if (vps->surf) WARN_ON(!vps->pinned); @@ -1235,24 +1216,11 @@ vmw_stdu_primary_plane_prepare_fb(struct drm_plane *plane, * so cache these mappings */ if (vps->content_fb_type == SEPARATE_DMA && - !(dev_priv->capabilities & SVGA_CAP_3D)) { - ret = ttm_bo_kmap(&vps->surf->res.backup->base, 0, - vps->surf->res.backup->base.num_pages, - &vps->host_map); - if (ret) { - DRM_ERROR("Failed to map display buffer to CPU\n"); - goto out_srf_unpin; - } - + !(dev_priv->capabilities & SVGA_CAP_3D)) vps->cpp = new_fb->pitches[0] / new_fb->width; - } return 0; -out_srf_unpin: - vmw_resource_unpin(&vps->surf->res); - vps->pinned--; - out_srf_unref: vmw_surface_unreference(&vps->surf); return ret; @@ -1296,7 +1264,6 @@ vmw_stdu_primary_plane_atomic_update(struct drm_plane *plane, stdu->display_srf = vps->surf; stdu->content_fb_type = vps->content_fb_type; stdu->cpp = vps->cpp; - memcpy(&stdu->host_map, &vps->host_map, sizeof(vps->host_map)); vclips.x = crtc->x; vclips.y = crtc->y; -- 2.7.4 _______________________________________________ dri-devel mailing list dri-devel@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/dri-devel