On 06/09/2021 17:55, Thomas Hellström wrote:
Just evict unpinned objects to system. For pinned LMEM objects,
make a backup system object and blit the contents to that.
Backup is performed in three steps,
1: Opportunistically evict evictable objects using the gpu blitter.
2: After gt idle, evict evictable objects using the gpu blitter. This will
be modified in an upcoming patch to backup pinned objects that are not used
by the blitter itself.
3: Backup remaining pinned objects using memcpy.
Also move uC suspend to after 2) to make sure we have a functional GuC
during 2) if using GuC submission.
v2:
- Major refactor to make sure gem_exec_suspend@hang-SX subtests work, and
suspend / resume works with a slightly modified GuC submission enabling
patch series.
Signed-off-by: Thomas Hellström <thomas.hellst...@linux.intel.com>
<snip>
+
+static int i915_ttm_backup(struct i915_gem_apply_to_region *apply,
+ struct drm_i915_gem_object *obj)
+{
+ struct i915_gem_ttm_pm_apply *pm_apply =
+ container_of(apply, typeof(*pm_apply), base);
+ struct ttm_buffer_object *bo = i915_gem_to_ttm(obj);
+ struct ttm_buffer_object *backup_bo;
+ struct drm_i915_private *i915 =
+ container_of(bo->bdev, typeof(*i915), bdev);
+ struct intel_memory_region *sys_region;
+ struct drm_i915_gem_object *backup;
+ struct ttm_operation_ctx ctx = {};
+ int err = 0;
+
+ if (bo->resource->mem_type == I915_PL_SYSTEM || obj->ttm.backup)
+ return 0;
+
+ if (pm_apply->allow_gpu && i915_gem_object_evictable(obj))
+ return ttm_bo_validate(bo, i915_ttm_sys_placement(), &ctx);
+
+ if (!pm_apply->backup_pinned)
+ return 0;
+
+ sys_region = i915->mm.regions[INTEL_REGION_SMEM];
+ backup = i915_gem_object_create_region(sys_region,
+ obj->base.size,
+ 0, 0);
create_shmem()?
+ if (IS_ERR(backup))
+ return PTR_ERR(backup);
+
+ err = i915_gem_object_lock(backup, apply->ww);
+ if (err)
+ goto out_no_lock;
+
+ backup_bo = i915_gem_to_ttm(backup);
+ err = ttm_tt_populate(backup_bo->bdev, backup_bo->ttm, &ctx);
+ if (err)
+ goto out_no_populate;
+
+ err = i915_gem_obj_copy_ttm(backup, obj, pm_apply->allow_gpu, false);
+ GEM_WARN_ON(err);
+
+ obj->ttm.backup = backup;
+ return 0;
+
+out_no_populate:
+ i915_gem_ww_unlock_single(backup);
+out_no_lock:
+ i915_gem_object_put(backup);
+
+ return err;
+}
+
+static int i915_ttm_recover(struct i915_gem_apply_to_region *apply,
+ struct drm_i915_gem_object *obj)
+{
+ i915_ttm_backup_free(obj);
+ return 0;
+}
+
+/**
+ * i915_ttm_recover_region - Free the backup of all objects of a region
+ * @mr: The memory region
+ *
+ * Checks all objects of a region if there is backup attached and if so
+ * frees that backup. Typically this is called to recover after a partially
+ * performed backup.
+ */
+void i915_ttm_recover_region(struct intel_memory_region *mr)
+{
+ static const struct i915_gem_apply_to_region_ops recover_ops = {
+ .process_obj = i915_ttm_recover,
+ };
+ struct i915_gem_apply_to_region apply = {.ops = &recover_ops};
+ int ret;
+
+ ret = i915_gem_process_region(mr, &apply);
+ GEM_WARN_ON(ret);
+}
+
+/**
+ * i915_ttm_backup_region - Back up all objects of a region to smem.
+ * @mr: The memory region
+ * @allow_gpu: Whether to allow the gpu blitter for this backup.
+ * @backup_pinned: Backup also pinned objects.
+ *
+ * Loops over all objects of a region and either evicts them if they are
+ * evictable or backs them up using a backup object if they are pinned.
+ *
+ * Return: Zero on success. Negative error code on error.
+ */
+int i915_ttm_backup_region(struct intel_memory_region *mr, bool allow_gpu,
+ bool backup_pinned)
+{
+ static const struct i915_gem_apply_to_region_ops backup_ops = {
+ .process_obj = i915_ttm_backup,
+ };
+ struct i915_gem_ttm_pm_apply pm_apply = {
+ .base = {.ops = &backup_ops},
+ .allow_gpu = allow_gpu,
+ .backup_pinned = backup_pinned,
+ };
+
+ return i915_gem_process_region(mr, &pm_apply.base);
+}
+
+static int i915_ttm_restore(struct i915_gem_apply_to_region *apply,
+ struct drm_i915_gem_object *obj)
+{
+ struct i915_gem_ttm_pm_apply *pm_apply =
+ container_of(apply, typeof(*pm_apply), base);
+ struct drm_i915_gem_object *backup = obj->ttm.backup;
+ struct ttm_buffer_object *backup_bo = i915_gem_to_ttm(backup);
+ struct ttm_operation_ctx ctx = {};
+ int err;
+
+ if (!obj->ttm.backup)
if (!backup)
+ return 0;
+
+ if (!pm_apply->allow_gpu && (obj->flags & I915_BO_ALLOC_USER))
+ return 0;
+
+ err = i915_gem_object_lock(backup, apply->ww);
+ if (err)
+ return err;
+
+ /* Content may have been swapped. */
+ err = ttm_tt_populate(backup_bo->bdev, backup_bo->ttm, &ctx);
+ if (!err) {
+ err = i915_gem_obj_copy_ttm(obj, backup, pm_apply->allow_gpu,
+ false);
+ GEM_WARN_ON(err);
+
+ obj->ttm.backup = NULL;
+ err = 0;
+ }
+
+ i915_gem_ww_unlock_single(backup);
+ i915_gem_object_put(backup);
I assume we need to set ttm.backup = NULL somewhere here on the failure
path, or don't drop the ref? Or at least it looks like potential uaf later?
+
+ return err;
+}
+