Re: [Mesa-dev] [PATCH 2/3] i965/gen7: Set MOCS L3 cacheability for IVB/BYT

2013-09-07 Thread Vedran Rodic
On Tue, Sep 3, 2013 at 9:19 PM, Ville Syrjälä
ville.syrj...@linux.intel.com wrote:
 On Thu, Aug 15, 2013 at 10:39:31PM +0200, Vedran Rodic wrote:
  We do have the set_caching ioctl. It's enough to flip the PTEs to UC and
  let MOCS manage things. I actually did a few experiments on my IVB. I
  made all Mesa's buffers UC via PTEs by patching libdrm to change the
  cache mode of each bo after allocation. Then I fiddled with the MOCS
  LLC bits in various ways. It definitely has an effect, sometimes making
  things slower, sometimes faster. xonotic again seemed to benefit. IIRC
  leaving everything LLC uncached was actually the fastest (w/ high quality
  at least) so we may be thrashing the LLC a bit there. But eg. reaction
  quake regressed quite a lot if most things were left as UC.

 Can you share the libdrm patch?

 Sorry, forgot to reply.

 Here's the patch if you're still interested.

Thanks,

Just as a data point, I tried my OpenGL test application (Dota 2 on
Wine with my patches to enable fast depth clear), and performance
doesn't change when I use libdrm with your patch applied. I also
disabled all MOCS_L3 stuff for my IVB, still no changes.


Vedran
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 2/3] i965/gen7: Set MOCS L3 cacheability for IVB/BYT

2013-09-03 Thread Ville Syrjälä
On Thu, Aug 15, 2013 at 10:39:31PM +0200, Vedran Rodic wrote:
  We do have the set_caching ioctl. It's enough to flip the PTEs to UC and
  let MOCS manage things. I actually did a few experiments on my IVB. I
  made all Mesa's buffers UC via PTEs by patching libdrm to change the
  cache mode of each bo after allocation. Then I fiddled with the MOCS
  LLC bits in various ways. It definitely has an effect, sometimes making
  things slower, sometimes faster. xonotic again seemed to benefit. IIRC
  leaving everything LLC uncached was actually the fastest (w/ high quality
  at least) so we may be thrashing the LLC a bit there. But eg. reaction
  quake regressed quite a lot if most things were left as UC.
 
 Can you share the libdrm patch?

Sorry, forgot to reply.

Here's the patch if you're still interested.

From 47f51b19137603dccaa4fcb2a703d56335c292fe Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= ville.syrj...@linux.intel.com
Date: Wed, 14 Aug 2013 15:12:29 +0300
Subject: [PATCH] make bos uncached in PTEs
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: Ville Syrjälä ville.syrj...@linux.intel.com
---
 intel/intel_bufmgr_gem.c | 60 ++--
 1 file changed, 53 insertions(+), 7 deletions(-)

diff --git a/intel/intel_bufmgr_gem.c b/intel/intel_bufmgr_gem.c
index f98f7a7..32ff260 100644
--- a/intel/intel_bufmgr_gem.c
+++ b/intel/intel_bufmgr_gem.c
@@ -243,6 +243,10 @@ drm_intel_gem_bo_get_tiling(drm_intel_bo *bo, uint32_t * 
tiling_mode,
uint32_t * swizzle_mode);
 
 static int
+drm_intel_gem_bo_set_caching_internal(drm_intel_bo *bo,
+ uint32_t cache_mode);
+
+static int
 drm_intel_gem_bo_set_tiling_internal(drm_intel_bo *bo,
 uint32_t tiling_mode,
 uint32_t stride);
@@ -695,6 +699,7 @@ retry:
drm_intel_gem_bo_free(bo_gem-bo);
goto retry;
}
+
}
}
pthread_mutex_unlock(bufmgr_gem-lock);
@@ -761,9 +766,16 @@ drm_intel_gem_bo_alloc_for_render(drm_intel_bufmgr *bufmgr,
  unsigned long size,
  unsigned int alignment)
 {
-   return drm_intel_gem_bo_alloc_internal(bufmgr, name, size,
-  BO_ALLOC_FOR_RENDER,
-  I915_TILING_NONE, 0);
+   drm_intel_bo *bo;
+
+   bo = drm_intel_gem_bo_alloc_internal(bufmgr, name, size,
+BO_ALLOC_FOR_RENDER,
+I915_TILING_NONE, 0);
+
+   if (bo)
+   drm_intel_gem_bo_set_caching_internal(bo, I915_CACHEING_NONE);
+
+   return bo;
 }
 
 static drm_intel_bo *
@@ -772,8 +784,15 @@ drm_intel_gem_bo_alloc(drm_intel_bufmgr *bufmgr,
   unsigned long size,
   unsigned int alignment)
 {
-   return drm_intel_gem_bo_alloc_internal(bufmgr, name, size, 0,
-  I915_TILING_NONE, 0);
+   drm_intel_bo *bo;
+
+   bo = drm_intel_gem_bo_alloc_internal(bufmgr, name, size, 0,
+I915_TILING_NONE, 0);
+
+   if (bo)
+   drm_intel_gem_bo_set_caching_internal(bo, I915_CACHEING_CACHED);
+
+   return bo;
 }
 
 static drm_intel_bo *
@@ -784,6 +803,7 @@ drm_intel_gem_bo_alloc_tiled(drm_intel_bufmgr *bufmgr, 
const char *name,
drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bufmgr;
unsigned long size, stride;
uint32_t tiling;
+   drm_intel_bo *bo;
 
do {
unsigned long aligned_y, height_alignment;
@@ -824,8 +844,13 @@ drm_intel_gem_bo_alloc_tiled(drm_intel_bufmgr *bufmgr, 
const char *name,
if (tiling == I915_TILING_NONE)
stride = 0;
 
-   return drm_intel_gem_bo_alloc_internal(bufmgr, name, size, flags,
-  tiling, stride);
+   bo = drm_intel_gem_bo_alloc_internal(bufmgr, name, size, flags,
+tiling, stride);
+
+   if (bo)
+   drm_intel_gem_bo_set_caching_internal(bo, I915_CACHEING_NONE);
+
+   return bo;
 }
 
 /**
@@ -2363,6 +2388,27 @@ drm_intel_gem_bo_unpin(drm_intel_bo *bo)
 }
 
 static int
+drm_intel_gem_bo_set_caching_internal(drm_intel_bo *bo,
+ uint32_t cache_mode)
+{
+   drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo-bufmgr;
+   drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
+   struct drm_i915_gem_cacheing set_caching;
+   int ret;
+
+   memset(set_caching, 0, sizeof(set_caching));
+
+   set_caching.handle = bo_gem-gem_handle;
+   

Re: [Mesa-dev] [PATCH 2/3] i965/gen7: Set MOCS L3 cacheability for IVB/BYT

2013-08-15 Thread Chad Versace

On 08/14/2013 12:50 AM, Ville Syrjälä wrote:

On Wed, Aug 14, 2013 at 10:45:23AM +0300, Ville Syrjälä wrote:

On Tue, Aug 13, 2013 at 05:46:55PM -0700, Chad Versace wrote:

On 08/13/2013 03:31 PM, Vedran Rodic wrote:

On Mon, Aug 12, 2013 at 3:07 PM,  ville.syrj...@linux.intel.com wrote:

From: Ville Syrjälä ville.syrj...@linux.intel.com




For L3 cacheability, IVB won't consult the PTE for anything that has a
relevant MOCS field. So even if you make everything L3 cacheable through
the PTEs, MOCS will always override it. How do i know you ask? Well,
BSpec says so for one, and more importantly I verified this by running
some tests [...]



I suspected this. Thanks for sharing your experimental evidence.



For LLC cachebility the story will be different because there IVB MOCS
can only say LLC cacheable or consult the PTE. So to make stuff
uncached in LLC on IVB, we'd need to issues the set_caching ioctl to
change the PTE to uncached, and after that we could use just the MOCS
to select the LLC caching policy. Since the set_caching ioctl only needs
to be issued once per object (or you could use the , there


Sorry hit send by accident. Was going to say we could use the new
create2 ioctl Chris has proposed that allows you to set the cache mode
when creating the object. So there won't be a performance hit from
extra ioctls getting issued all the time.



I would like such a cache-control ioctl, as long the ioctl can also
be used to change the object's cacheing policy in addition to
setting it at object creation. This would be
needed when an object's usage oscillates between texture surface
and render target.

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 2/3] i965/gen7: Set MOCS L3 cacheability for IVB/BYT

2013-08-15 Thread Ville Syrjälä
On Thu, Aug 15, 2013 at 08:08:12AM -0700, Chad Versace wrote:
 On 08/14/2013 12:50 AM, Ville Syrjälä wrote:
  On Wed, Aug 14, 2013 at 10:45:23AM +0300, Ville Syrjälä wrote:
  On Tue, Aug 13, 2013 at 05:46:55PM -0700, Chad Versace wrote:
  On 08/13/2013 03:31 PM, Vedran Rodic wrote:
  On Mon, Aug 12, 2013 at 3:07 PM,  ville.syrj...@linux.intel.com wrote:
  From: Ville Syrjälä ville.syrj...@linux.intel.com
 
 
  For L3 cacheability, IVB won't consult the PTE for anything that has a
  relevant MOCS field. So even if you make everything L3 cacheable through
  the PTEs, MOCS will always override it. How do i know you ask? Well,
  BSpec says so for one, and more importantly I verified this by running
  some tests [...]
 
 
 I suspected this. Thanks for sharing your experimental evidence.
 
 
  For LLC cachebility the story will be different because there IVB MOCS
  can only say LLC cacheable or consult the PTE. So to make stuff
  uncached in LLC on IVB, we'd need to issues the set_caching ioctl to
  change the PTE to uncached, and after that we could use just the MOCS
  to select the LLC caching policy. Since the set_caching ioctl only needs
  to be issued once per object (or you could use the , there
 
  Sorry hit send by accident. Was going to say we could use the new
  create2 ioctl Chris has proposed that allows you to set the cache mode
  when creating the object. So there won't be a performance hit from
  extra ioctls getting issued all the time.
 
 
 I would like such a cache-control ioctl, as long the ioctl can also
 be used to change the object's cacheing policy in addition to
 setting it at object creation. This would be
 needed when an object's usage oscillates between texture surface
 and render target.

We do have the set_caching ioctl. It's enough to flip the PTEs to UC and
let MOCS manage things. I actually did a few experiments on my IVB. I
made all Mesa's buffers UC via PTEs by patching libdrm to change the
cache mode of each bo after allocation. Then I fiddled with the MOCS
LLC bits in various ways. It definitely has an effect, sometimes making
things slower, sometimes faster. xonotic again seemed to benefit. IIRC
leaving everything LLC uncached was actually the fastest (w/ high quality
at least) so we may be thrashing the LLC a bit there. But eg. reaction
quake regressed quite a lot if most things were left as UC.

I should probably run through a few MOCS combinations and collect a bit
more data. But it's looking like some sensible heuristic has to be
involved since different benchmarks show conflicting results. Maybe
your LLC overcommit prevention approach would be the one. Are you
planning to continue with that work?

-- 
Ville Syrjälä
Intel OTC
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 2/3] i965/gen7: Set MOCS L3 cacheability for IVB/BYT

2013-08-15 Thread Chad Versace

On 08/15/2013 09:11 AM, Ville Syrjälä wrote:

On Thu, Aug 15, 2013 at 08:08:12AM -0700, Chad Versace wrote:



I would like such a cache-control ioctl, as long the ioctl can also
be used to change the object's cacheing policy in addition to
setting it at object creation. This would be
needed when an object's usage oscillates between texture surface
and render target.


We do have the set_caching ioctl. It's enough to flip the PTEs to UC and
let MOCS manage things. I actually did a few experiments on my IVB. I
made all Mesa's buffers UC via PTEs by patching libdrm to change the
cache mode of each bo after allocation. Then I fiddled with the MOCS
LLC bits in various ways. It definitely has an effect, sometimes making
things slower, sometimes faster. xonotic again seemed to benefit. IIRC
leaving everything LLC uncached was actually the fastest (w/ high quality
at least) so we may be thrashing the LLC a bit there. But eg. reaction
quake regressed quite a lot if most things were left as UC.

I should probably run through a few MOCS combinations and collect a bit
more data. But it's looking like some sensible heuristic has to be
involved since different benchmarks show conflicting results. Maybe
your LLC overcommit prevention approach would be the one. Are you
planning to continue with that work?


I do plan to continue that work. I plan to return to it the week of
Aug 26, because I need to first make more progress on Broadwell.

My simple heuristic that prevents overcommit of the
LLC, in its current form, gives varying results too. Some benchmarks benefit, 
some harmed.
In each experiment, I set the LLC commit threshhold to 0.80,
1.00, or 1.25. (That is, for a given draw call, Mesa stops putting objects
in the LLC when the draw call has filled that ratio of the LLC).

Hopefully, to get consistent benefit across all apps, all we need is to choose 
a significantly
higher or lower threshold than I've previously chosen. What I fear, though, is 
that
since the GPU shares the LLC with the CPU (GPU-LLC=CPU-L3), to find a heuristic 
that's
near-globally beneficial, we may need to consider the CPU load to intelligently 
choose the LLC commit threshold
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 2/3] i965/gen7: Set MOCS L3 cacheability for IVB/BYT

2013-08-15 Thread Vedran Rodic
 We do have the set_caching ioctl. It's enough to flip the PTEs to UC and
 let MOCS manage things. I actually did a few experiments on my IVB. I
 made all Mesa's buffers UC via PTEs by patching libdrm to change the
 cache mode of each bo after allocation. Then I fiddled with the MOCS
 LLC bits in various ways. It definitely has an effect, sometimes making
 things slower, sometimes faster. xonotic again seemed to benefit. IIRC
 leaving everything LLC uncached was actually the fastest (w/ high quality
 at least) so we may be thrashing the LLC a bit there. But eg. reaction
 quake regressed quite a lot if most things were left as UC.

Can you share the libdrm patch?
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 2/3] i965/gen7: Set MOCS L3 cacheability for IVB/BYT

2013-08-14 Thread Ville Syrjälä
On Tue, Aug 13, 2013 at 05:46:55PM -0700, Chad Versace wrote:
 On 08/13/2013 03:31 PM, Vedran Rodic wrote:
  On Mon, Aug 12, 2013 at 3:07 PM,  ville.syrj...@linux.intel.com wrote:
  From: Ville Syrjälä ville.syrj...@linux.intel.com
 
  IVB/BYT also has the same L3 cacheability control in MOCS as HSW,
  so let's make use of it.
 
  According to the discussion we had on #intel-gfx a few weeks ago, on
  IVB all Mesa memory is already marked as cached in DRM allocated PTEs.
  So this should not have any effect. Or I'm misunderstanding something.
 
  As I understand, marking everything uncacheable and then marking just
  certain things cacheable could make a difference (since AFAIK, you
  can't mark select regions as uncacheable after you mark PTEs as
  cacheable on IVB).
 
  Can somebody more knowledgeable comment?
 
 On Ivybridge, the PTEs mark only contexts as LLC+L3 cacheable. Everything
 else is marked as cacheable in LLC, but not L3. So, Ville's patches will
 give a perf boost to Mesa running on any kernel that continues that cacheing
 policy.

There's a bit more to that story.

For L3 cacheability, IVB won't consult the PTE for anything that has a
relevant MOCS field. So even if you make everything L3 cacheable through
the PTEs, MOCS will always override it. How do i know you ask? Well,
BSpec says so for one, and more importantly I verified this by running
some tests on a patched kernel that makes all currently LLC cacheable
PTEs LLC+L3 cacheable. The patched kernel had similar performance
numbers as the unpatched kernel, and the MOCS patches had the same
effect on both kernels.

According to BSpec there are certain things that don't have a MOCS field,
so in theory the L3 PTE setting should have some effect, but for the
(mostly) gaming benchmarks I ran there didn't seem to be a significant
difference.

For LLC cachebility the story will be different because there IVB MOCS
can only say LLC cacheable or consult the PTE. So to make stuff
uncached in LLC on IVB, we'd need to issues the set_caching ioctl to
change the PTE to uncached, and after that we could use just the MOCS
to select the LLC caching policy. Since the set_caching ioctl only needs
to be issued once per object (or you could use the , there 

-- 
Ville Syrjälä
Intel OTC
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 2/3] i965/gen7: Set MOCS L3 cacheability for IVB/BYT

2013-08-14 Thread Ville Syrjälä
On Wed, Aug 14, 2013 at 10:45:23AM +0300, Ville Syrjälä wrote:
 On Tue, Aug 13, 2013 at 05:46:55PM -0700, Chad Versace wrote:
  On 08/13/2013 03:31 PM, Vedran Rodic wrote:
   On Mon, Aug 12, 2013 at 3:07 PM,  ville.syrj...@linux.intel.com wrote:
   From: Ville Syrjälä ville.syrj...@linux.intel.com
  
   IVB/BYT also has the same L3 cacheability control in MOCS as HSW,
   so let's make use of it.
  
   According to the discussion we had on #intel-gfx a few weeks ago, on
   IVB all Mesa memory is already marked as cached in DRM allocated PTEs.
   So this should not have any effect. Or I'm misunderstanding something.
  
   As I understand, marking everything uncacheable and then marking just
   certain things cacheable could make a difference (since AFAIK, you
   can't mark select regions as uncacheable after you mark PTEs as
   cacheable on IVB).
  
   Can somebody more knowledgeable comment?
  
  On Ivybridge, the PTEs mark only contexts as LLC+L3 cacheable. Everything
  else is marked as cacheable in LLC, but not L3. So, Ville's patches will
  give a perf boost to Mesa running on any kernel that continues that cacheing
  policy.
 
 There's a bit more to that story.
 
 For L3 cacheability, IVB won't consult the PTE for anything that has a
 relevant MOCS field. So even if you make everything L3 cacheable through
 the PTEs, MOCS will always override it. How do i know you ask? Well,
 BSpec says so for one, and more importantly I verified this by running
 some tests on a patched kernel that makes all currently LLC cacheable
 PTEs LLC+L3 cacheable. The patched kernel had similar performance
 numbers as the unpatched kernel, and the MOCS patches had the same
 effect on both kernels.
 
 According to BSpec there are certain things that don't have a MOCS field,
 so in theory the L3 PTE setting should have some effect, but for the
 (mostly) gaming benchmarks I ran there didn't seem to be a significant
 difference.
 
 For LLC cachebility the story will be different because there IVB MOCS
 can only say LLC cacheable or consult the PTE. So to make stuff
 uncached in LLC on IVB, we'd need to issues the set_caching ioctl to
 change the PTE to uncached, and after that we could use just the MOCS
 to select the LLC caching policy. Since the set_caching ioctl only needs
 to be issued once per object (or you could use the , there 

Sorry hit send by accident. Was going to say we could use the new
create2 ioctl Chris has proposed that allows you to set the cache mode
when creating the object. So there won't be a performance hit from
extra ioctls getting issued all the time.

-- 
Ville Syrjälä
Intel OTC
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 2/3] i965/gen7: Set MOCS L3 cacheability for IVB/BYT

2013-08-13 Thread Vedran Rodic
On Mon, Aug 12, 2013 at 3:07 PM,  ville.syrj...@linux.intel.com wrote:
 From: Ville Syrjälä ville.syrj...@linux.intel.com

 IVB/BYT also has the same L3 cacheability control in MOCS as HSW,
 so let's make use of it.

According to the discussion we had on #intel-gfx a few weeks ago, on
IVB all Mesa memory is already marked as cached in DRM allocated PTEs.
So this should not have any effect. Or I'm misunderstanding something.

As I understand, marking everything uncacheable and then marking just
certain things cacheable could make a difference (since AFAIK, you
can't mark select regions as uncacheable after you mark PTEs as
cacheable on IVB).

Can somebody more knowledgeable comment?
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 2/3] i965/gen7: Set MOCS L3 cacheability for IVB/BYT

2013-08-13 Thread Chad Versace

On 08/13/2013 03:31 PM, Vedran Rodic wrote:

On Mon, Aug 12, 2013 at 3:07 PM,  ville.syrj...@linux.intel.com wrote:

From: Ville Syrjälä ville.syrj...@linux.intel.com

IVB/BYT also has the same L3 cacheability control in MOCS as HSW,
so let's make use of it.


According to the discussion we had on #intel-gfx a few weeks ago, on
IVB all Mesa memory is already marked as cached in DRM allocated PTEs.
So this should not have any effect. Or I'm misunderstanding something.

As I understand, marking everything uncacheable and then marking just
certain things cacheable could make a difference (since AFAIK, you
can't mark select regions as uncacheable after you mark PTEs as
cacheable on IVB).

Can somebody more knowledgeable comment?


On Ivybridge, the PTEs mark only contexts as LLC+L3 cacheable. Everything
else is marked as cacheable in LLC, but not L3. So, Ville's patches will
give a perf boost to Mesa running on any kernel that continues that cacheing
policy.

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 2/3] i965/gen7: Set MOCS L3 cacheability for IVB/BYT

2013-08-13 Thread Chad Versace

On 08/12/2013 06:07 AM, ville.syrj...@linux.intel.com wrote:

From: Ville Syrjälä ville.syrj...@linux.intel.com

IVB/BYT also has the same L3 cacheability control in MOCS as HSW,
so let's make use of it.

pts/xonotic and pts/reaction @ 1920x1080 gain ~4% on my IVB GT2. Most
other things show less gains/no regressions, except furmark which
loses some 10 points.

I didn't have a BYT at hand for testing.

Signed-off-by: Ville Syrjälä ville.syrj...@linux.intel.com
---
  src/mesa/drivers/dri/i965/brw_draw_upload.c   | 2 +-
  src/mesa/drivers/dri/i965/brw_misc_state.c| 2 +-
  src/mesa/drivers/dri/i965/gen6_blorp.cpp  | 4 ++--
  src/mesa/drivers/dri/i965/gen7_blorp.cpp  | 6 +++---
  src/mesa/drivers/dri/i965/gen7_misc_state.c   | 2 +-
  src/mesa/drivers/dri/i965/gen7_vs_state.c | 2 +-
  src/mesa/drivers/dri/i965/gen7_wm_state.c | 2 +-
  src/mesa/drivers/dri/i965/gen7_wm_surface_state.c | 4 ++--
  8 files changed, 12 insertions(+), 12 deletions(-)


Conceptually, the patch looks good. The (intel-gen == 7)
checks should be removed from the changes in the gen7 files.

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 2/3] i965/gen7: Set MOCS L3 cacheability for IVB/BYT

2013-08-12 Thread ville . syrjala
From: Ville Syrjälä ville.syrj...@linux.intel.com

IVB/BYT also has the same L3 cacheability control in MOCS as HSW,
so let's make use of it.

pts/xonotic and pts/reaction @ 1920x1080 gain ~4% on my IVB GT2. Most
other things show less gains/no regressions, except furmark which
loses some 10 points.

I didn't have a BYT at hand for testing.

Signed-off-by: Ville Syrjälä ville.syrj...@linux.intel.com
---
 src/mesa/drivers/dri/i965/brw_draw_upload.c   | 2 +-
 src/mesa/drivers/dri/i965/brw_misc_state.c| 2 +-
 src/mesa/drivers/dri/i965/gen6_blorp.cpp  | 4 ++--
 src/mesa/drivers/dri/i965/gen7_blorp.cpp  | 6 +++---
 src/mesa/drivers/dri/i965/gen7_misc_state.c   | 2 +-
 src/mesa/drivers/dri/i965/gen7_vs_state.c | 2 +-
 src/mesa/drivers/dri/i965/gen7_wm_state.c | 2 +-
 src/mesa/drivers/dri/i965/gen7_wm_surface_state.c | 4 ++--
 8 files changed, 12 insertions(+), 12 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_draw_upload.c 
b/src/mesa/drivers/dri/i965/brw_draw_upload.c
index 897e733..fe840d7 100644
--- a/src/mesa/drivers/dri/i965/brw_draw_upload.c
+++ b/src/mesa/drivers/dri/i965/brw_draw_upload.c
@@ -658,7 +658,7 @@ static void brw_emit_vertices(struct brw_context *brw)
 if (brw-gen = 7)
dw0 |= GEN7_VB0_ADDRESS_MODIFYENABLE;
 
-if (brw-is_haswell)
+if (brw-gen == 7)
dw0 |= GEN7_MOCS_L3  16;
 
 OUT_BATCH(dw0 | (buffer-stride  BRW_VB0_PITCH_SHIFT));
diff --git a/src/mesa/drivers/dri/i965/brw_misc_state.c 
b/src/mesa/drivers/dri/i965/brw_misc_state.c
index 5927b9b..3884f86 100644
--- a/src/mesa/drivers/dri/i965/brw_misc_state.c
+++ b/src/mesa/drivers/dri/i965/brw_misc_state.c
@@ -1038,7 +1038,7 @@ static void upload_state_base_address( struct brw_context 
*brw )
 */
 
if (brw-gen = 6) {
-  uint8_t mocs = brw-is_haswell ? GEN7_MOCS_L3 : 0;
+  uint8_t mocs = brw-gen == 7 ? GEN7_MOCS_L3 : 0;
 
   if (brw-gen == 6)
 intel_emit_post_sync_nonzero_flush(brw);
diff --git a/src/mesa/drivers/dri/i965/gen6_blorp.cpp 
b/src/mesa/drivers/dri/i965/gen6_blorp.cpp
index af0f6fc..3c06a3f 100644
--- a/src/mesa/drivers/dri/i965/gen6_blorp.cpp
+++ b/src/mesa/drivers/dri/i965/gen6_blorp.cpp
@@ -74,7 +74,7 @@ void
 gen6_blorp_emit_state_base_address(struct brw_context *brw,
const brw_blorp_params *params)
 {
-   uint8_t mocs = brw-is_haswell ? GEN7_MOCS_L3 : 0;
+   uint8_t mocs = brw-gen == 7 ? GEN7_MOCS_L3 : 0;
 
BEGIN_BATCH(10);
OUT_BATCH(CMD_STATE_BASE_ADDRESS  16 | (10 - 2));
@@ -165,7 +165,7 @@ gen6_blorp_emit_vertices(struct brw_context *brw,
   if (brw-gen = 7)
  dw0 |= GEN7_VB0_ADDRESS_MODIFYENABLE;
 
-  if (brw-is_haswell)
+  if (brw-gen == 7)
  dw0 |= GEN7_MOCS_L3  16;
 
   BEGIN_BATCH(batch_length);
diff --git a/src/mesa/drivers/dri/i965/gen7_blorp.cpp 
b/src/mesa/drivers/dri/i965/gen7_blorp.cpp
index 518d7f5..a9d6198 100644
--- a/src/mesa/drivers/dri/i965/gen7_blorp.cpp
+++ b/src/mesa/drivers/dri/i965/gen7_blorp.cpp
@@ -143,7 +143,7 @@ gen7_blorp_emit_surface_state(struct brw_context *brw,
 */
struct intel_region *region = surface-mt-region;
uint32_t tile_x, tile_y;
-   uint8_t mocs = brw-is_haswell ? GEN7_MOCS_L3 : 0;
+   uint8_t mocs = brw-gen == 7 ? GEN7_MOCS_L3 : 0;
 
uint32_t tiling = surface-map_stencil_as_y_tiled
   ? I915_TILING_Y : region-tiling;
@@ -616,7 +616,7 @@ gen7_blorp_emit_constant_ps(struct brw_context *brw,
 const brw_blorp_params *params,
 uint32_t wm_push_const_offset)
 {
-   uint8_t mocs = brw-is_haswell ? GEN7_MOCS_L3 : 0;
+   uint8_t mocs = brw-gen == 7 ? GEN7_MOCS_L3 : 0;
 
/* Make sure the push constants fill an exact integer number of
 * registers.
@@ -658,7 +658,7 @@ static void
 gen7_blorp_emit_depth_stencil_config(struct brw_context *brw,
  const brw_blorp_params *params)
 {
-   uint8_t mocs = brw-is_haswell ? GEN7_MOCS_L3 : 0;
+   uint8_t mocs = brw-gen == 7 ? GEN7_MOCS_L3 : 0;
uint32_t surfwidth, surfheight;
uint32_t surftype;
unsigned int depth = MAX2(params-depth.mt-logical_depth0, 1);
diff --git a/src/mesa/drivers/dri/i965/gen7_misc_state.c 
b/src/mesa/drivers/dri/i965/gen7_misc_state.c
index 51067b3..10619c1 100644
--- a/src/mesa/drivers/dri/i965/gen7_misc_state.c
+++ b/src/mesa/drivers/dri/i965/gen7_misc_state.c
@@ -41,7 +41,7 @@ gen7_emit_depth_stencil_hiz(struct brw_context *brw,
 uint32_t tile_x, uint32_t tile_y)
 {
struct gl_context *ctx = brw-ctx;
-   uint8_t mocs = brw-is_haswell ? GEN7_MOCS_L3 : 0;
+   uint8_t mocs = brw-gen == 7 ? GEN7_MOCS_L3 : 0;
struct gl_framebuffer *fb = ctx-DrawBuffer;
uint32_t surftype;
unsigned int depth = 1;
diff --git a/src/mesa/drivers/dri/i965/gen7_vs_state.c 
b/src/mesa/drivers/dri/i965/gen7_vs_state.c
index 0340da4..20f3f58 100644
---