Re: [Intel-gfx] [PATCH 4/4] [v4] drm/i915: Convert execbuf code to use vmas

2013-08-14 Thread Ben Widawsky
On Wed, Aug 14, 2013 at 11:43:58PM +0100, Chris Wilson wrote:
> These are my numbers for a beefy haswell box (note the really
> interesting numbers will be on Baytrail):
> 
> unpatched:
> 
> relocation: buffers=   1: old=  21945 + 34.4*reloc, lut=  21814 + 34.0*reloc 
> (ns)
> relocation: buffers=   2: old=  15947 + 36.4*reloc, lut=  16169 + 35.4*reloc 
> (ns)
> relocation: buffers=   4: old=  12711 + 37.6*reloc, lut=  13039 + 36.7*reloc 
> (ns)
> relocation: buffers=   8: old=   6154 + 40.9*reloc, lut=   7201 + 38.9*reloc 
> (ns)
> relocation: buffers=  16: old=   4846 + 41.6*reloc, lut=   5337 + 40.6*reloc 
> (ns)
> relocation: buffers=  32: old=   7097 + 41.9*reloc, lut=   6943 + 41.0*reloc 
> (ns)
> relocation: buffers=  64: old=  13318 + 41.9*reloc, lut=  12748 + 41.2*reloc 
> (ns)
> relocation: buffers= 128: old=  27282 + 43.0*reloc, lut=  25778 + 41.7*reloc 
> (ns)
> relocation: buffers= 256: old=  54535 + 45.2*reloc, lut=  51912 + 43.7*reloc 
> (ns)
> relocation: buffers= 512: old= 137447 + 53.2*reloc, lut= 129333 + 45.5*reloc 
> (ns)
> relocation: buffers=1024: old= 307347 + 66.5*reloc, lut= 291487 + 48.1*reloc 
> (ns)
> relocation: buffers=2048: old= 606300 + 92.1*reloc, lut= 574774 + 51.6*reloc 
> (ns)
> skip-relocs: buffers=   1: old=   1583 + 15.6*reloc, lut=   1516 + 14.5*reloc 
> (ns)
> skip-relocs: buffers=   2: old=   1621 + 15.6*reloc, lut=   1603 + 14.5*reloc 
> (ns)
> skip-relocs: buffers=   4: old=   1791 + 15.6*reloc, lut=   1777 + 14.5*reloc 
> (ns)
> skip-relocs: buffers=   8: old=   2009 + 15.6*reloc, lut=   2024 + 14.6*reloc 
> (ns)
> skip-relocs: buffers=  16: old=   2637 + 15.7*reloc, lut=   2564 + 14.6*reloc 
> (ns)
> skip-relocs: buffers=  32: old=   3835 + 15.8*reloc, lut=   3785 + 14.7*reloc 
> (ns)
> skip-relocs: buffers=  64: old=   6996 + 15.8*reloc, lut=   6681 + 14.7*reloc 
> (ns)
> skip-relocs: buffers= 128: old=  14333 + 16.4*reloc, lut=  13560 + 15.2*reloc 
> (ns)
> skip-relocs: buffers= 256: old=  28092 + 17.7*reloc, lut=  26759 + 16.2*reloc 
> (ns)
> skip-relocs: buffers= 512: old=  70885 + 25.2*reloc, lut=  66713 + 17.9*reloc 
> (ns)
> skip-relocs: buffers=1024: old= 158520 + 35.2*reloc, lut= 150828 + 20.1*reloc 
> (ns)
> skip-relocs: buffers=2048: old= 314208 + 54.3*reloc, lut= 298343 + 22.1*reloc 
> (ns)
> no-relocs: buffers=   1: old=   1533 + 5.2*reloc, lut=   1498 + 4.9*reloc (ns)
> no-relocs: buffers=   2: old=   1518 + 5.2*reloc, lut=   1505 + 4.9*reloc (ns)
> no-relocs: buffers=   4: old=   1647 + 5.2*reloc, lut=   1593 + 4.9*reloc (ns)
> no-relocs: buffers=   8: old=   1882 + 5.3*reloc, lut=   1874 + 5.0*reloc (ns)
> no-relocs: buffers=  16: old=   2399 + 5.3*reloc, lut=   2341 + 5.0*reloc (ns)
> no-relocs: buffers=  32: old=   3638 + 5.3*reloc, lut=   3554 + 5.0*reloc (ns)
> no-relocs: buffers=  64: old=   6622 + 5.3*reloc, lut=   6308 + 5.1*reloc (ns)
> no-relocs: buffers= 128: old=  13584 + 5.3*reloc, lut=  12872 + 5.1*reloc (ns)
> no-relocs: buffers= 256: old=  26519 + 5.8*reloc, lut=  25234 + 5.5*reloc (ns)
> no-relocs: buffers= 512: old=  67128 + 5.4*reloc, lut=  63054 + 5.2*reloc (ns)
> no-relocs: buffers=1024: old= 146705 + 5.2*reloc, lut= 139020 + 5.1*reloc (ns)
> no-relocs: buffers=2048: old= 290319 + 5.4*reloc, lut= 274705 + 5.4*reloc (ns)
> 
> vma(execbuffer):
> 
> relocation: buffers=   1: old=  21922 + 34.6*reloc, lut=  21510 + 34.0*reloc 
> (ns)
> relocation: buffers=   2: old=  16851 + 37.4*reloc, lut=  17123 + 35.4*reloc 
> (ns)
> relocation: buffers=   4: old=  13234 + 37.8*reloc, lut=  13436 + 36.9*reloc 
> (ns)
> relocation: buffers=   8: old=   6549 + 40.8*reloc, lut=   6512 + 39.8*reloc 
> (ns)
> relocation: buffers=  16: old=   5012 + 41.8*reloc, lut=   4883 + 41.0*reloc 
> (ns)
> relocation: buffers=  32: old=   8591 + 42.2*reloc, lut=   8377 + 41.1*reloc 
> (ns)
> relocation: buffers=  64: old=  16051 + 42.8*reloc, lut=  15658 + 41.7*reloc 
> (ns)
> relocation: buffers= 128: old=  33397 + 44.5*reloc, lut=  32705 + 43.3*reloc 
> (ns)
> relocation: buffers= 256: old=  68012 + 46.8*reloc, lut=  66904 + 45.5*reloc 
> (ns)
> relocation: buffers= 512: old= 160162 + 56.4*reloc, lut= 155586 + 49.1*reloc 
> (ns)
> relocation: buffers=1024: old= 348728 + 71.8*reloc, lut= 338113 + 55.1*reloc 
> (ns)
> relocation: buffers=2048: old= 699331 + 98.7*reloc, lut= 675969 + 62.2*reloc 
> (ns)
> skip-relocs: buffers=   1: old=   1642 + 16.5*reloc, lut=   1588 + 15.6*reloc 
> (ns)
> skip-relocs: buffers=   2: old=   1676 + 16.4*reloc, lut=   1663 + 15.6*reloc 
> (ns)
> skip-relocs: buffers=   4: old=   1926 + 16.4*reloc, lut=   1891 + 15.6*reloc 
> (ns)
> skip-relocs: buffers=   8: old=   2218 + 16.6*reloc, lut=   2212 + 15.7*reloc 
> (ns)
> skip-relocs: buffers=  16: old=   2933 + 16.6*reloc, lut=   2880 + 15.7*reloc 
> (ns)
> skip-relocs: buffers=  32: old=   4594 + 16.6*reloc, lut=   4523 + 15.8*reloc 
> (ns)
> skip-relocs: buffers=  64: old=   8414 + 16.8*reloc, lut=   8210 + 15.9*reloc 
> (ns)
> skip-relocs: buffers= 128: old=  17429 + 17.9*reloc

Re: [Intel-gfx] [PATCH 4/4] [v4] drm/i915: Convert execbuf code to use vmas

2013-08-14 Thread Chris Wilson
On Tue, Aug 13, 2013 at 06:11:59PM -0700, Ben Widawsky wrote:
> On Tue, Aug 13, 2013 at 06:09:09PM -0700, Ben Widawsky wrote:
> > From: Ben Widawsky 
> > 
> > In order to transition more of our code over to using a VMA instead of
> > an  pair - we must have the vma accessible at execbuf time. Up
> > until now, we've only had a VMA when actually binding an object.
> > 
> > The previous patch helped handle the distinction on bound vs. unbound.
> > This patch will help us catch leaks, and other issues before we actually
> > shuffle a bunch of stuff around.
> > 
> > This attempts to convert all the execbuf code to speak in vmas. Since
> > the execbuf code is very self contained it was a nice isolated
> > conversion.
> > 
> > The meat of the code is about turning eb_objects into eb_vma, and then
> > wiring up the rest of the code to use vmas instead of obj, vm pairs.
> > 
> > Unfortunately, to do this, we must move the exec_list link from the obj
> > structure. This list is reused in the eviction code, so we must also
> > modify the eviction code to make this work.
> > 
> > WARNING: This patch makes an already hotly profiled path slower. The cost is
> > unavoidable. In reply to this mail, I will attach the extra data.
> > 
> 
> [snip]
> 
> Here is the output from gem_exec_lut_handle both before and after this
> patch. The results honestly don't make sense to me, but I'll set Chris
> parse it before scratching my head harder.
> 
> Before patch
> 
> relocation: buffers=   1: old=   8060 + 165.3*reloc, lut=   7816 + 
> 164.8*reloc (ns)
> relocation: buffers=   2: old=   6748 + 166.6*reloc, lut=   6952 + 
> 165.4*reloc (ns)
> relocation: buffers=   4: old=   8140 + 165.9*reloc, lut=   8216 + 
> 165.4*reloc (ns)
> relocation: buffers=   8: old=  10732 + 166.0*reloc, lut=  10615 + 
> 165.2*reloc (ns)
> relocation: buffers=  16: old=  15099 + 167.8*reloc, lut=  15337 + 
> 165.3*reloc (ns)
> relocation: buffers=  32: old=  26140 + 166.0*reloc, lut=  25488 + 
> 165.5*reloc (ns)
> relocation: buffers=  64: old=  46300 + 170.5*reloc, lut=  44279 + 
> 166.7*reloc (ns)
> relocation: buffers= 128: old=  84056 + 176.9*reloc, lut=  85379 + 
> 166.3*reloc (ns)
> relocation: buffers= 256: old= 174398 + 167.9*reloc, lut= 167744 + 
> 167.0*reloc (ns)
> relocation: buffers= 512: old= 349688 + 175.7*reloc, lut= 348590 + 
> 170.8*reloc (ns)
> relocation: buffers=1024: old= 726265 + 191.2*reloc, lut= 719774 + 
> 180.2*reloc (ns)
> relocation: buffers=2048: old=1456866 + 224.3*reloc, lut=1442087 + 
> 173.0*reloc (ns)
> skip-relocs: buffers=   1: old=   4445 + 16.0*reloc, lut=   4433 + 15.6*reloc 
> (ns)
> skip-relocs: buffers=   2: old=   4585 + 16.0*reloc, lut=   4571 + 15.6*reloc 
> (ns)
> skip-relocs: buffers=   4: old=   5667 + 16.0*reloc, lut=   5340 + 15.6*reloc 
> (ns)
> skip-relocs: buffers=   8: old=   6051 + 16.1*reloc, lut=   6026 + 15.6*reloc 
> (ns)
> skip-relocs: buffers=  16: old=   7953 + 16.1*reloc, lut=   7914 + 15.6*reloc 
> (ns)
> skip-relocs: buffers=  32: old=  11972 + 16.2*reloc, lut=  11875 + 15.7*reloc 
> (ns)
> skip-relocs: buffers=  64: old=  1 + 16.5*reloc, lut=  19832 + 15.7*reloc 
> (ns)
> skip-relocs: buffers= 128: old=  37796 + 16.9*reloc, lut=  36539 + 15.9*reloc 
> (ns)
> skip-relocs: buffers= 256: old=  71604 + 18.1*reloc, lut=  71313 + 16.5*reloc 
> (ns)
> skip-relocs: buffers= 512: old= 152682 + 24.3*reloc, lut= 141379 + 27.9*reloc 
> (ns)
> skip-relocs: buffers=1024: old= 314116 + 41.7*reloc, lut= 303019 + 20.1*reloc 
> (ns)
> skip-relocs: buffers=2048: old= 619784 + 54.1*reloc, lut= 603931 + 20.0*reloc 
> (ns)
> no-relocs: buffers=   1: old=   4194 + 5.1*reloc, lut=   4206 + 4.8*reloc (ns)
> no-relocs: buffers=   2: old=   4404 + 5.1*reloc, lut=   4381 + 4.8*reloc (ns)
> no-relocs: buffers=   4: old=   4926 + 5.1*reloc, lut=   4921 + 4.8*reloc (ns)
> no-relocs: buffers=   8: old=   5901 + 5.1*reloc, lut=   5822 + 4.9*reloc (ns)
> no-relocs: buffers=  16: old=   7840 + 5.1*reloc, lut=   7737 + 4.9*reloc (ns)
> no-relocs: buffers=  32: old=  11842 + 5.1*reloc, lut=  11681 + 4.9*reloc (ns)
> no-relocs: buffers=  64: old=  19741 + 5.1*reloc, lut=  19542 + 4.8*reloc (ns)
> no-relocs: buffers= 128: old=  36479 + 5.2*reloc, lut=  35958 + 4.9*reloc (ns)
> no-relocs: buffers= 256: old=  70171 + 5.4*reloc, lut=  69390 + 5.2*reloc (ns)
> no-relocs: buffers= 512: old= 147213 + 3.5*reloc, lut= 137953 + 13.0*reloc 
> (ns)
> no-relocs: buffers=1024: old= 300165 + 4.8*reloc, lut= 293852 + 4.9*reloc (ns)
> no-relocs: buffers=2048: old= 597992 + 8.3*reloc, lut= 590185 + 2.1*reloc (ns)
> 
> 
> After patch
> ===
> relocation: buffers=   1: old=   8075 + 81.4*reloc, lut=   7592 + 80.6*reloc 
> (ns)
> relocation: buffers=   2: old=   5744 + 82.3*reloc, lut=   5837 + 81.1*reloc 
> (ns)
> relocation: buffers=   4: old=   4875 + 82.7*reloc, lut=   4871 + 81.6*reloc 
> (ns)
> relocation: buffers=   8: old=   5729 + 82.7*reloc, lut=   5698 + 81.5*reloc 
> (ns)
> relocation: buffers=  16: old=   7952 

Re: [Intel-gfx] [PATCH 4/4] [v4] drm/i915: Convert execbuf code to use vmas

2013-08-13 Thread Ben Widawsky
On Tue, Aug 13, 2013 at 06:09:09PM -0700, Ben Widawsky wrote:
> From: Ben Widawsky 
> 
> In order to transition more of our code over to using a VMA instead of
> an  pair - we must have the vma accessible at execbuf time. Up
> until now, we've only had a VMA when actually binding an object.
> 
> The previous patch helped handle the distinction on bound vs. unbound.
> This patch will help us catch leaks, and other issues before we actually
> shuffle a bunch of stuff around.
> 
> This attempts to convert all the execbuf code to speak in vmas. Since
> the execbuf code is very self contained it was a nice isolated
> conversion.
> 
> The meat of the code is about turning eb_objects into eb_vma, and then
> wiring up the rest of the code to use vmas instead of obj, vm pairs.
> 
> Unfortunately, to do this, we must move the exec_list link from the obj
> structure. This list is reused in the eviction code, so we must also
> modify the eviction code to make this work.
> 
> WARNING: This patch makes an already hotly profiled path slower. The cost is
> unavoidable. In reply to this mail, I will attach the extra data.
> 

[snip]

Here is the output from gem_exec_lut_handle both before and after this
patch. The results honestly don't make sense to me, but I'll set Chris
parse it before scratching my head harder.

Before patch

relocation: buffers=   1: old=   8060 + 165.3*reloc, lut=   7816 + 164.8*reloc 
(ns)
relocation: buffers=   2: old=   6748 + 166.6*reloc, lut=   6952 + 165.4*reloc 
(ns)
relocation: buffers=   4: old=   8140 + 165.9*reloc, lut=   8216 + 165.4*reloc 
(ns)
relocation: buffers=   8: old=  10732 + 166.0*reloc, lut=  10615 + 165.2*reloc 
(ns)
relocation: buffers=  16: old=  15099 + 167.8*reloc, lut=  15337 + 165.3*reloc 
(ns)
relocation: buffers=  32: old=  26140 + 166.0*reloc, lut=  25488 + 165.5*reloc 
(ns)
relocation: buffers=  64: old=  46300 + 170.5*reloc, lut=  44279 + 166.7*reloc 
(ns)
relocation: buffers= 128: old=  84056 + 176.9*reloc, lut=  85379 + 166.3*reloc 
(ns)
relocation: buffers= 256: old= 174398 + 167.9*reloc, lut= 167744 + 167.0*reloc 
(ns)
relocation: buffers= 512: old= 349688 + 175.7*reloc, lut= 348590 + 170.8*reloc 
(ns)
relocation: buffers=1024: old= 726265 + 191.2*reloc, lut= 719774 + 180.2*reloc 
(ns)
relocation: buffers=2048: old=1456866 + 224.3*reloc, lut=1442087 + 173.0*reloc 
(ns)
skip-relocs: buffers=   1: old=   4445 + 16.0*reloc, lut=   4433 + 15.6*reloc 
(ns)
skip-relocs: buffers=   2: old=   4585 + 16.0*reloc, lut=   4571 + 15.6*reloc 
(ns)
skip-relocs: buffers=   4: old=   5667 + 16.0*reloc, lut=   5340 + 15.6*reloc 
(ns)
skip-relocs: buffers=   8: old=   6051 + 16.1*reloc, lut=   6026 + 15.6*reloc 
(ns)
skip-relocs: buffers=  16: old=   7953 + 16.1*reloc, lut=   7914 + 15.6*reloc 
(ns)
skip-relocs: buffers=  32: old=  11972 + 16.2*reloc, lut=  11875 + 15.7*reloc 
(ns)
skip-relocs: buffers=  64: old=  1 + 16.5*reloc, lut=  19832 + 15.7*reloc 
(ns)
skip-relocs: buffers= 128: old=  37796 + 16.9*reloc, lut=  36539 + 15.9*reloc 
(ns)
skip-relocs: buffers= 256: old=  71604 + 18.1*reloc, lut=  71313 + 16.5*reloc 
(ns)
skip-relocs: buffers= 512: old= 152682 + 24.3*reloc, lut= 141379 + 27.9*reloc 
(ns)
skip-relocs: buffers=1024: old= 314116 + 41.7*reloc, lut= 303019 + 20.1*reloc 
(ns)
skip-relocs: buffers=2048: old= 619784 + 54.1*reloc, lut= 603931 + 20.0*reloc 
(ns)
no-relocs: buffers=   1: old=   4194 + 5.1*reloc, lut=   4206 + 4.8*reloc (ns)
no-relocs: buffers=   2: old=   4404 + 5.1*reloc, lut=   4381 + 4.8*reloc (ns)
no-relocs: buffers=   4: old=   4926 + 5.1*reloc, lut=   4921 + 4.8*reloc (ns)
no-relocs: buffers=   8: old=   5901 + 5.1*reloc, lut=   5822 + 4.9*reloc (ns)
no-relocs: buffers=  16: old=   7840 + 5.1*reloc, lut=   7737 + 4.9*reloc (ns)
no-relocs: buffers=  32: old=  11842 + 5.1*reloc, lut=  11681 + 4.9*reloc (ns)
no-relocs: buffers=  64: old=  19741 + 5.1*reloc, lut=  19542 + 4.8*reloc (ns)
no-relocs: buffers= 128: old=  36479 + 5.2*reloc, lut=  35958 + 4.9*reloc (ns)
no-relocs: buffers= 256: old=  70171 + 5.4*reloc, lut=  69390 + 5.2*reloc (ns)
no-relocs: buffers= 512: old= 147213 + 3.5*reloc, lut= 137953 + 13.0*reloc (ns)
no-relocs: buffers=1024: old= 300165 + 4.8*reloc, lut= 293852 + 4.9*reloc (ns)
no-relocs: buffers=2048: old= 597992 + 8.3*reloc, lut= 590185 + 2.1*reloc (ns)


After patch
===
relocation: buffers=   1: old=   8075 + 81.4*reloc, lut=   7592 + 80.6*reloc 
(ns)
relocation: buffers=   2: old=   5744 + 82.3*reloc, lut=   5837 + 81.1*reloc 
(ns)
relocation: buffers=   4: old=   4875 + 82.7*reloc, lut=   4871 + 81.6*reloc 
(ns)
relocation: buffers=   8: old=   5729 + 82.7*reloc, lut=   5698 + 81.5*reloc 
(ns)
relocation: buffers=  16: old=   7952 + 83.0*reloc, lut=   7809 + 81.9*reloc 
(ns)
relocation: buffers=  32: old=  11884 + 82.9*reloc, lut=  11702 + 81.6*reloc 
(ns)
relocation: buffers=  64: old=  20388 + 83.4*reloc, lut=  19995 + 82.2*reloc 
(ns)
relocation: buffers= 128: old=  38057 + 85.0*reloc, lut=  37675 + 83

[Intel-gfx] [PATCH 4/4] [v4] drm/i915: Convert execbuf code to use vmas

2013-08-13 Thread Ben Widawsky
From: Ben Widawsky 

In order to transition more of our code over to using a VMA instead of
an  pair - we must have the vma accessible at execbuf time. Up
until now, we've only had a VMA when actually binding an object.

The previous patch helped handle the distinction on bound vs. unbound.
This patch will help us catch leaks, and other issues before we actually
shuffle a bunch of stuff around.

This attempts to convert all the execbuf code to speak in vmas. Since
the execbuf code is very self contained it was a nice isolated
conversion.

The meat of the code is about turning eb_objects into eb_vma, and then
wiring up the rest of the code to use vmas instead of obj, vm pairs.

Unfortunately, to do this, we must move the exec_list link from the obj
structure. This list is reused in the eviction code, so we must also
modify the eviction code to make this work.

WARNING: This patch makes an already hotly profiled path slower. The cost is
unavoidable. In reply to this mail, I will attach the extra data.

v2: Release table lock early, and two a 2 phase vma lookup to avoid
having to use a GFP_ATOMIC. (Chris)

v3: s/obj_exec_list/obj_exec_link/
Updates to address
commit 6d2b888569d366beb4be72cacfde41adee2c25e1
Author: Chris Wilson 
Date:   Wed Aug 7 18:30:54 2013 +0100

drm/i915: List objects allocated from stolen memory in debugfs

v4: Use obj = vma->obj for neatness in some places (Chris)
need_reloc_mappable() should return false if ppgtt (Chris)

Signed-off-by: Ben Widawsky 
---
 drivers/gpu/drm/i915/i915_debugfs.c|  12 +-
 drivers/gpu/drm/i915/i915_drv.h|  25 ++-
 drivers/gpu/drm/i915/i915_gem.c|  28 ++-
 drivers/gpu/drm/i915/i915_gem_evict.c  |  31 ++-
 drivers/gpu/drm/i915/i915_gem_execbuffer.c | 319 -
 5 files changed, 232 insertions(+), 183 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_debugfs.c 
b/drivers/gpu/drm/i915/i915_debugfs.c
index eb87865..4785d8c 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -195,9 +195,9 @@ static int obj_rank_by_stolen(void *priv,
  struct list_head *A, struct list_head *B)
 {
struct drm_i915_gem_object *a =
-   container_of(A, struct drm_i915_gem_object, exec_list);
+   container_of(A, struct drm_i915_gem_object, obj_exec_link);
struct drm_i915_gem_object *b =
-   container_of(B, struct drm_i915_gem_object, exec_list);
+   container_of(B, struct drm_i915_gem_object, obj_exec_link);
 
return a->stolen->start - b->stolen->start;
 }
@@ -221,7 +221,7 @@ static int i915_gem_stolen_list_info(struct seq_file *m, 
void *data)
if (obj->stolen == NULL)
continue;
 
-   list_add(&obj->exec_list, &stolen);
+   list_add(&obj->obj_exec_link, &stolen);
 
total_obj_size += obj->base.size;
total_gtt_size += i915_gem_obj_ggtt_size(obj);
@@ -231,7 +231,7 @@ static int i915_gem_stolen_list_info(struct seq_file *m, 
void *data)
if (obj->stolen == NULL)
continue;
 
-   list_add(&obj->exec_list, &stolen);
+   list_add(&obj->obj_exec_link, &stolen);
 
total_obj_size += obj->base.size;
count++;
@@ -239,11 +239,11 @@ static int i915_gem_stolen_list_info(struct seq_file *m, 
void *data)
list_sort(NULL, &stolen, obj_rank_by_stolen);
seq_puts(m, "Stolen:\n");
while (!list_empty(&stolen)) {
-   obj = list_first_entry(&stolen, typeof(*obj), exec_list);
+   obj = list_first_entry(&stolen, typeof(*obj), obj_exec_link);
seq_puts(m, "   ");
describe_obj(m, obj);
seq_putc(m, '\n');
-   list_del_init(&obj->exec_list);
+   list_del_init(&obj->obj_exec_link);
}
mutex_unlock(&dev->struct_mutex);
 
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 3fc4324..2147e4d 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -563,6 +563,17 @@ struct i915_vma {
struct list_head mm_list;
 
struct list_head vma_link; /* Link in the object's VMA list */
+
+   /** This vma's place in the batchbuffer or on the eviction list */
+   struct list_head exec_list;
+
+   /**
+* Used for performing relocations during execbuffer insertion.
+*/
+   struct hlist_node exec_node;
+   unsigned long exec_handle;
+   struct drm_i915_gem_exec_object2 *exec_entry;
+
 };
 
 struct i915_ctx_hang_stats {
@@ -1312,8 +1323,8 @@ struct drm_i915_gem_object {
struct list_head global_list;
 
struct list_head ring_list;
-   /** This object's place in the batchbuffer or on the eviction list */
-   struct list_head exec_list;
+   /** Used in execbuf to temporarily hold a