Re: [Mesa-dev] [PATCH 4/6] i965/gen8: Correct HALIGN for AUX surfaces

2015-05-29 Thread Pohjolainen, Topi
On Thu, May 28, 2015 at 10:21:32AM -0700, Ben Widawsky wrote:
 This restriction was attempted in this commit:
 commit 47053464630888f819ef8cc44278f1a1220159b9
 Author: Anuj Phogat anuj.pho...@gmail.com
 Date:   Fri Feb 13 11:21:21 2015 -0800
 
i965/gen8: Use HALIGN_16 if MCS is enabled for non-MSRT
 
 However, the commit itself doesn't achieve the desired goal as determined by 
 the
 asserts which the next patch adds. mcs_mt is never a value because we're in 
 the
 process of allocating the mcs_mt miptree when we get to this function. I 
 didn't
 check, but perhaps this would work with blorp, however, meta clears allocate 
 the
 miptree structure (which AFAICT needs the alignment also) way before it
 allocates using meta clears where the renderbuffer is allocated way before the
 aux buffer.
 
 The restriction is referenced in a few places, but the most concise one [IMO]
 from the spec is for Gen9. Gen8 8 loosens the restriction in that it only
 requires this for non-msrt surface.
 
When Auxiliary Surface Mode is set to AUX_CCS_D or AUX_CCS_E, HALIGN 16 
 must
be used.
 
 With the code before the miptree layout flag rework (patches preceding this),
 accomplishing this workaround is very difficult.
 
 Cc: Anuj Phogat anuj.pho...@gmail.com
 Cc: Neil Roberts n...@linux.intel.com
 Signed-off-by: Ben Widawsky b...@bwidawsk.net
 ---
  src/mesa/drivers/dri/i965/brw_tex_layout.c| 16 ++--
  src/mesa/drivers/dri/i965/intel_mipmap_tree.c | 15 ---
  src/mesa/drivers/dri/i965/intel_mipmap_tree.h |  4 +++-
  3 files changed, 25 insertions(+), 10 deletions(-)
 
 diff --git a/src/mesa/drivers/dri/i965/brw_tex_layout.c 
 b/src/mesa/drivers/dri/i965/brw_tex_layout.c
 index 72b02a2..b51c7c7 100644
 --- a/src/mesa/drivers/dri/i965/brw_tex_layout.c
 +++ b/src/mesa/drivers/dri/i965/brw_tex_layout.c
 @@ -41,8 +41,13 @@
  
  static unsigned int
  intel_horizontal_texture_alignment_unit(struct brw_context *brw,
 -struct intel_mipmap_tree *mt)
 +struct intel_mipmap_tree *mt,
 +uint32_t layout_flags)
  {
 +

Extra newline.

 +   if (layout_flags  MIPTREE_LAYOUT_FORCE_HALIGN16)
 +  return 16;
 +
 /**
  * From the Alignment Unit Size section of various specs, namely:
  * - Gen3 Spec: Memory Data Formats Volume, Section 1.20.1.4
 @@ -91,9 +96,6 @@ intel_horizontal_texture_alignment_unit(struct brw_context 
 *brw,
 if (brw-gen = 7  mt-format == MESA_FORMAT_Z_UNORM16)
return 8;
  
 -   if (brw-gen == 8  mt-mcs_mt  mt-num_samples = 1)
 -  return 16;
 -
 return 4;
  }
  
 @@ -459,7 +461,8 @@ brw_miptree_layout_texture_3d(struct brw_context *brw,
  }
  
  void
 -brw_miptree_layout(struct brw_context *brw, struct intel_mipmap_tree *mt)
 +brw_miptree_layout(struct brw_context *brw, struct intel_mipmap_tree *mt,
 +   uint32_t layout_flags)
  {
 bool multisampled = mt-num_samples  1;
 bool gen6_hiz_or_stencil = false;
 @@ -492,8 +495,9 @@ brw_miptree_layout(struct brw_context *brw, struct 
 intel_mipmap_tree *mt)
   mt-align_w = 128 / mt-cpp;
   mt-align_h = 32;
}
 +  assert((layout_flags  MIPTREE_LAYOUT_FORCE_HALIGN16) == 0);
 } else {
 -  mt-align_w = intel_horizontal_texture_alignment_unit(brw, mt);
 +  mt-align_w = intel_horizontal_texture_alignment_unit(brw, mt, 
 layout_flags);
mt-align_h =
   intel_vertical_texture_alignment_unit(brw, mt-format, 
 multisampled);
 }
 diff --git a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c 
 b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
 index 75ee19a..a1ac0cf 100644
 --- a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
 +++ b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
 @@ -475,7 +475,10 @@ intel_miptree_create_layout(struct brw_context *brw,
 if (layout_flags  MIPTREE_LAYOUT_FORCE_ALL_SLICE_AT_LOD)
mt-array_layout = ALL_SLICES_AT_EACH_LOD;
  
 -   brw_miptree_layout(brw, mt);
 +   if (intel_miptree_is_fast_clear_capable(brw, mt))
 +  layout_flags |= MIPTREE_LAYOUT_FORCE_HALIGN16;
 +
 +   brw_miptree_layout(brw, mt, layout_flags);
  
 if (mt-disable_aux_buffers)
assert(mt-msaa_layout != INTEL_MSAA_LAYOUT_CMS);
 @@ -722,6 +725,7 @@ intel_miptree_create(struct brw_context *brw,
  
  
 if (mt-msaa_layout == INTEL_MSAA_LAYOUT_CMS) {
 +  assert(mt-num_samples  1);
if (!intel_miptree_alloc_mcs(brw, mt, num_samples)) {
   intel_miptree_release(mt);
   return NULL;
 @@ -734,8 +738,10 @@ intel_miptree_create(struct brw_context *brw,
  * clear actually occurs.
  */
 if (intel_is_non_msrt_mcs_tile_supported(brw, mt-tiling) 
 -   intel_miptree_is_fast_clear_capable(brw, mt))
 +   intel_miptree_is_fast_clear_capable(brw, mt)) {
mt-fast_clear_state = INTEL_FAST_CLEAR_STATE_RESOLVED;
 +  assert(brw-gen  8 || mt-align_w == 16);
 +   }
  
 

Re: [Mesa-dev] [PATCH 5/6] i965/gen9: Set HALIGN_16 for all aux buffers

2015-05-29 Thread Pohjolainen, Topi
On Thu, May 28, 2015 at 10:21:33AM -0700, Ben Widawsky wrote:
 Just like the previous patch, but for the GEN9 constraints.
 
 Signed-off-by: Ben Widawsky b...@bwidawsk.net
 ---
  src/mesa/drivers/dri/i965/intel_mipmap_tree.c | 5 +
  1 file changed, 5 insertions(+)
 
 diff --git a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c 
 b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
 index a1ac0cf..89030aa 100644
 --- a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
 +++ b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
 @@ -655,6 +655,11 @@ intel_miptree_create(struct brw_context *brw,
  
 assert((layout_flags  MIPTREE_LAYOUT_DISABLE_AUX) == 0);
 assert((layout_flags  MIPTREE_LAYOUT_FOR_BO) == 0);
 +
 +   if (brw-gen = 9  num_samples  1)
 +  layout_flags |= MIPTREE_LAYOUT_FORCE_HALIGN16;
 +
 +

Extra newline.

 mt = intel_miptree_create_layout(brw, target, format,
  first_level, last_level, width0,
  height0, depth0, num_samples,
 -- 
 2.4.2
 
 ___
 mesa-dev mailing list
 mesa-dev@lists.freedesktop.org
 http://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 1/6] i965: Consolidate certain miptree params to flags

2015-05-29 Thread Pohjolainen, Topi
On Fri, May 29, 2015 at 09:32:53AM +0300, Pohjolainen, Topi wrote:
 On Thu, May 28, 2015 at 10:21:29AM -0700, Ben Widawsky wrote:
  I think pretty much everyone agrees that having more than a single bool as a
  function argument is bordering on a bad idea. What sucks about the current
  code is in several instances it's necessary to propagate these boolean
  selections down to lower layers of the code. This requires plumbing 
  (mechanical,
  but still churn) pretty much all of the miptree functions each time.  By
  introducing the flags paramater, it is possible to add miptree constraints 
  very
  easily.
 
 I like this a lot. I have a few simple comments below.
 
  
  The use of this, as is already the case, is sometimes we have some 
  information
  at the time we create the miptree that needs to be known all the way at the
  lowest levels of the create/allocation, disable_aux_buffers is currently one
  such example. There will be another example coming up in a few patches.
  
  Cc: Chad Versace chad.vers...@linux.intel.com
  Signed-off-by: Ben Widawsky b...@bwidawsk.net
  ---
   src/mesa/drivers/dri/i965/intel_fbo.c  |  5 +-
   src/mesa/drivers/dri/i965/intel_mipmap_tree.c  | 86 
  +-
   src/mesa/drivers/dri/i965/intel_mipmap_tree.h  |  9 ++-
   src/mesa/drivers/dri/i965/intel_pixel_draw.c   |  2 +-
   src/mesa/drivers/dri/i965/intel_tex.c  |  8 +--
   src/mesa/drivers/dri/i965/intel_tex.h  |  2 +-
   src/mesa/drivers/dri/i965/intel_tex_image.c| 14 ++---
   src/mesa/drivers/dri/i965/intel_tex_validate.c |  3 +-
   8 files changed, 63 insertions(+), 66 deletions(-)
  
  diff --git a/src/mesa/drivers/dri/i965/intel_fbo.c 
  b/src/mesa/drivers/dri/i965/intel_fbo.c
  index aebed72..1b3a72f 100644
  --- a/src/mesa/drivers/dri/i965/intel_fbo.c
  +++ b/src/mesa/drivers/dri/i965/intel_fbo.c
  @@ -390,7 +390,7 @@ intel_image_target_renderbuffer_storage(struct 
  gl_context *ctx,
image-height,
1,
image-pitch,
  - true /*disable_aux_buffers*/);
  + MIPTREE_LAYOUT_DISABLE_AUX);
  if (!irb-mt)
 return;
   
  @@ -1027,10 +1027,9 @@ intel_renderbuffer_move_to_temp(struct brw_context 
  *brw,
intel_image-base.Base.Level,
intel_image-base.Base.Level,
width, height, depth,
  - true,
irb-mt-num_samples,
INTEL_MIPTREE_TILING_ANY,
  - false);
  + MIPTREE_LAYOUT_ACCELERATED_UPLOAD);
   
  if (intel_miptree_wants_hiz_buffer(brw, new_mt)) {
 intel_miptree_alloc_hiz(brw, new_mt);
  diff --git a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c 
  b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
  index 24a5c3d..b243f8a 100644
  --- a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
  +++ b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
  @@ -244,10 +244,8 @@ intel_miptree_create_layout(struct brw_context *brw,
   GLuint width0,
   GLuint height0,
   GLuint depth0,
  -bool for_bo,
   GLuint num_samples,
  -bool force_all_slices_at_each_lod,
  -bool disable_aux_buffers)
  +uint32_t layout_flags)
   {
  struct intel_mipmap_tree *mt = calloc(sizeof(*mt), 1);
  if (!mt)
  @@ -286,7 +284,7 @@ intel_miptree_create_layout(struct brw_context *brw,
  mt-logical_height0 = height0;
  mt-logical_depth0 = depth0;
  mt-fast_clear_state = INTEL_FAST_CLEAR_STATE_NO_MCS;
  -   mt-disable_aux_buffers = disable_aux_buffers;
  +   mt-disable_aux_buffers = !!(layout_flags  MIPTREE_LAYOUT_DISABLE_AUX);
 
 You didn't mean to have double negation (!!), did you?

I actually meant that isn't layout_flags  MIPTREE_LAYOUT_DISABLE_AUX enough
on its own?

 
  exec_list_make_empty(mt-hiz_map);
   
  /* The cpp is bytes per (1, blockheight)-sized block for compressed
  @@ -422,12 +420,14 @@ intel_miptree_create_layout(struct brw_context *brw,
  mt-physical_height0 = height0;
  mt-physical_depth0 = depth0;
   
  -   if (!for_bo 
  +   if (!(layout_flags  MIPTREE_LAYOUT_FOR_BO) 
  _mesa_get_format_base_format(format) == GL_DEPTH_STENCIL 
  (brw-must_use_separate_stencil ||
  (brw-has_separate_stencil 
intel_miptree_wants_hiz_buffer(brw, mt {
  -  const bool force_all_slices_at_each_lod = brw-gen == 6;
  +  uint32_t stencil_flags = MIPTREE_LAYOUT_ACCELERATED_UPLOAD;
  +  if (brw-gen == 6)
  + stencil_flags 

Re: [Mesa-dev] [PATCH 1/6] i965: Consolidate certain miptree params to flags

2015-05-29 Thread Pohjolainen, Topi
On Thu, May 28, 2015 at 10:21:29AM -0700, Ben Widawsky wrote:
 I think pretty much everyone agrees that having more than a single bool as a
 function argument is bordering on a bad idea. What sucks about the current
 code is in several instances it's necessary to propagate these boolean
 selections down to lower layers of the code. This requires plumbing 
 (mechanical,
 but still churn) pretty much all of the miptree functions each time.  By
 introducing the flags paramater, it is possible to add miptree constraints 
 very
 easily.

I like this a lot. I have a few simple comments below.

 
 The use of this, as is already the case, is sometimes we have some information
 at the time we create the miptree that needs to be known all the way at the
 lowest levels of the create/allocation, disable_aux_buffers is currently one
 such example. There will be another example coming up in a few patches.
 
 Cc: Chad Versace chad.vers...@linux.intel.com
 Signed-off-by: Ben Widawsky b...@bwidawsk.net
 ---
  src/mesa/drivers/dri/i965/intel_fbo.c  |  5 +-
  src/mesa/drivers/dri/i965/intel_mipmap_tree.c  | 86 
 +-
  src/mesa/drivers/dri/i965/intel_mipmap_tree.h  |  9 ++-
  src/mesa/drivers/dri/i965/intel_pixel_draw.c   |  2 +-
  src/mesa/drivers/dri/i965/intel_tex.c  |  8 +--
  src/mesa/drivers/dri/i965/intel_tex.h  |  2 +-
  src/mesa/drivers/dri/i965/intel_tex_image.c| 14 ++---
  src/mesa/drivers/dri/i965/intel_tex_validate.c |  3 +-
  8 files changed, 63 insertions(+), 66 deletions(-)
 
 diff --git a/src/mesa/drivers/dri/i965/intel_fbo.c 
 b/src/mesa/drivers/dri/i965/intel_fbo.c
 index aebed72..1b3a72f 100644
 --- a/src/mesa/drivers/dri/i965/intel_fbo.c
 +++ b/src/mesa/drivers/dri/i965/intel_fbo.c
 @@ -390,7 +390,7 @@ intel_image_target_renderbuffer_storage(struct gl_context 
 *ctx,
   image-height,
   1,
   image-pitch,
 - true /*disable_aux_buffers*/);
 + MIPTREE_LAYOUT_DISABLE_AUX);
 if (!irb-mt)
return;
  
 @@ -1027,10 +1027,9 @@ intel_renderbuffer_move_to_temp(struct brw_context 
 *brw,
   intel_image-base.Base.Level,
   intel_image-base.Base.Level,
   width, height, depth,
 - true,
   irb-mt-num_samples,
   INTEL_MIPTREE_TILING_ANY,
 - false);
 + MIPTREE_LAYOUT_ACCELERATED_UPLOAD);
  
 if (intel_miptree_wants_hiz_buffer(brw, new_mt)) {
intel_miptree_alloc_hiz(brw, new_mt);
 diff --git a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c 
 b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
 index 24a5c3d..b243f8a 100644
 --- a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
 +++ b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
 @@ -244,10 +244,8 @@ intel_miptree_create_layout(struct brw_context *brw,
  GLuint width0,
  GLuint height0,
  GLuint depth0,
 -bool for_bo,
  GLuint num_samples,
 -bool force_all_slices_at_each_lod,
 -bool disable_aux_buffers)
 +uint32_t layout_flags)
  {
 struct intel_mipmap_tree *mt = calloc(sizeof(*mt), 1);
 if (!mt)
 @@ -286,7 +284,7 @@ intel_miptree_create_layout(struct brw_context *brw,
 mt-logical_height0 = height0;
 mt-logical_depth0 = depth0;
 mt-fast_clear_state = INTEL_FAST_CLEAR_STATE_NO_MCS;
 -   mt-disable_aux_buffers = disable_aux_buffers;
 +   mt-disable_aux_buffers = !!(layout_flags  MIPTREE_LAYOUT_DISABLE_AUX);

You didn't mean to have double negation (!!), did you?

 exec_list_make_empty(mt-hiz_map);
  
 /* The cpp is bytes per (1, blockheight)-sized block for compressed
 @@ -422,12 +420,14 @@ intel_miptree_create_layout(struct brw_context *brw,
 mt-physical_height0 = height0;
 mt-physical_depth0 = depth0;
  
 -   if (!for_bo 
 +   if (!(layout_flags  MIPTREE_LAYOUT_FOR_BO) 
 _mesa_get_format_base_format(format) == GL_DEPTH_STENCIL 
 (brw-must_use_separate_stencil ||
   (brw-has_separate_stencil 
   intel_miptree_wants_hiz_buffer(brw, mt {
 -  const bool force_all_slices_at_each_lod = brw-gen == 6;
 +  uint32_t stencil_flags = MIPTREE_LAYOUT_ACCELERATED_UPLOAD;
 +  if (brw-gen == 6)
 + stencil_flags |= MIPTREE_LAYOUT_FORCE_ALL_SLICE_AT_LOD;

Perhaps a separating line here, your choice.

mt-stencil_mt = intel_miptree_create(brw,
  mt-target,
  

[Mesa-dev] Re: Mesa (master): mesa: Allow overriding the version of ES2+ contexts

2015-05-29 Thread Michel Dänzer

Hi Ian,


On 29.05.2015 09:00, i...@kemper.freedesktop.org (Ian Romanick) wrote:
 Module: Mesa
 Branch: master
 Commit: 9b5e92f4ccc6ee1cb9caea947f6efaad2b391cf1
 URL:
 http://cgit.freedesktop.org/mesa/mesa/commit/?id=9b5e92f4ccc6ee1cb9caea947f6efaad2b391cf1
 
 Author: Ian Romanick ian.d.roman...@intel.com
 Date:   Wed Apr 29 16:12:40 2015 -0700
 
 mesa: Allow overriding the version of ES2+ contexts
 
 Signed-off-by: Ian Romanick ian.d.roman...@intel.com
 Reviewed-by: Tapani Pälli tapani.pa...@intel.com

This commit broke a bunch of GLES related piglit tests for me with
radeonsi, e.g. egl-create-context-default-major-version-gles:

Unexpected GLES version: 93847553.6 Mesa 10.7.0-devel (git-9b5e92f)
Expected GLES 1.0 or 1.1.
PIGLIT: {result: fail }

The numbers before Mesa in the version string seem random, and
sometimes the test happens to pass.

While bisecting this, I noticed that there are some possibly related
compiler warnings in src/mesa/main/version.c and
src/mesa/drivers/dri/common/dri_util.c.


-- 
Earthling Michel Dänzer   |   http://www.amd.com
Libre software enthusiast | Mesa and X developer
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v5] i915/aa: fixing anti-aliasing bug for thinnest width lines

2015-05-29 Thread Marius Predut
On PNV platform, for 1 pixel line thickness or less,
the general anti-aliasing algorithm gives up, and a garbage line is generated.
Setting a Line Width of 0.0 specifies the rasterization
of the thinnest (one-pixel-wide), non-antialiased lines.
Lines rendered with zero Line Width are rasterized using
Grid Intersection Quantization rules as specified by
bspec G45: Volume 2: 3D/Media,
7.3.13.1 Zero-Width (Cosmetic) Line Rasterization section.

This patch follow the same rules as patches fixing the
https://bugs.freedesktop.org/show_bug.cgi?id=28832
bug.

Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=90367

Signed-off-by: Marius Predut marius.pre...@intel.com
---
 src/mesa/drivers/dri/i915/i915_state.c | 15 +++
 1 file changed, 15 insertions(+)

diff --git a/src/mesa/drivers/dri/i915/i915_state.c 
b/src/mesa/drivers/dri/i915/i915_state.c
index 5f10b84..6cd342c 100644
--- a/src/mesa/drivers/dri/i915/i915_state.c
+++ b/src/mesa/drivers/dri/i915/i915_state.c
@@ -599,6 +599,21 @@ i915LineWidth(struct gl_context * ctx, GLfloat widthf)

width = (int) (widthf * 2);
width = CLAMP(width, 1, 0xf);
+
+   if (ctx-Line.Width  1.5 || widthf  1.5) {
+/* For 1 pixel line thickness or less, the general
+ * anti-aliasing algorithm gives up, and a garbage line is
+ * generated.  Setting a Line Width of 0.0 specifies the
+ * rasterization of the thinnest (one-pixel-wide),
+ * non-antialiased lines.
+ *
+ * Lines rendered with zero Line Width are rasterized using
+ * Grid Intersection Quantization rules as specified by
+ * bspec G45: Volume 2: 3D/Media,
+ * 7.3.13.1 Zero-Width (Cosmetic) Line Rasterization section
+ */
+width = 0;
+   }
lis4 |= width  S4_LINE_WIDTH_SHIFT;
 
if (lis4 != i915-state.Ctx[I915_CTXREG_LIS4]) {
-- 
1.9.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] gallivm: Use the LLVM's C disassembly interface.

2015-05-29 Thread Jose Fonseca

On 28/05/15 19:26, Roland Scheidegger wrote:

Looks ok, it's massively simpler and shouldn't break as often.

Reviewed-by: Roland Scheidegger srol...@vmware.com

Am 28.05.2015 um 17:57 schrieb Jose Fonseca:

It doesn't do everything we want.  In particular it doesn't allow to
detect jumps or return opcodes.  Currently we detect the x86's RET
opcode.

Even though it's worse for LLVM 3.3, it's an improvement for LLVM 3.7,
which was totally busted.
---
  scons/llvm.py  |   4 +-
  src/gallium/auxiliary/gallivm/lp_bld_debug.cpp | 260 -
  2 files changed, 40 insertions(+), 224 deletions(-)

diff --git a/scons/llvm.py b/scons/llvm.py
index 17278df..c59b8cb 100644
--- a/scons/llvm.py
+++ b/scons/llvm.py
@@ -120,6 +120,7 @@ def generate(env):
  ])
  elif llvm_version = distutils.version.LooseVersion('3.5'):
  env.Prepend(LIBS = [
+'LLVMMCDisassembler',
  'LLVMBitWriter', 'LLVMMCJIT', 'LLVMRuntimeDyld',
  'LLVMX86Disassembler', 'LLVMX86AsmParser', 'LLVMX86CodeGen',
  'LLVMSelectionDAG', 'LLVMAsmPrinter', 'LLVMX86Desc',
@@ -132,6 +133,7 @@ def generate(env):
  ])
  else:
  env.Prepend(LIBS = [
+'LLVMMCDisassembler',
  'LLVMBitWriter', 'LLVMX86Disassembler', 'LLVMX86AsmParser',
  'LLVMX86CodeGen', 'LLVMX86Desc', 'LLVMSelectionDAG',
  'LLVMAsmPrinter', 'LLVMMCParser', 'LLVMX86AsmPrinter',
@@ -189,7 +191,7 @@ def generate(env):
  if '-fno-rtti' in cxxflags:
  env.Append(CXXFLAGS = ['-fno-rtti'])

-components = ['engine', 'mcjit', 'bitwriter', 'x86asmprinter']
+components = ['engine', 'mcjit', 'bitwriter', 'x86asmprinter', 
'mcdisassembler']

  env.ParseConfig('llvm-config --libs ' + ' '.join(components))
  env.ParseConfig('llvm-config --ldflags')
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_debug.cpp 
b/src/gallium/auxiliary/gallivm/lp_bld_debug.cpp
index 76c302f..64fb044 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_debug.cpp
+++ b/src/gallium/auxiliary/gallivm/lp_bld_debug.cpp
@@ -28,40 +28,12 @@
  #include stddef.h

  #include llvm-c/Core.h
-#include llvm/Target/TargetMachine.h
-#include llvm/Target/TargetInstrInfo.h
+#include llvm-c/Disassembler.h
  #include llvm/Support/raw_ostream.h
  #include llvm/Support/Format.h
-
-#if HAVE_LLVM = 0x0306
-#include llvm/Target/TargetSubtargetInfo.h
-#else
-#include llvm/Support/MemoryObject.h
-#endif
-
-#include llvm/Support/TargetRegistry.h
-#include llvm/MC/MCSubtargetInfo.h
-
  #include llvm/Support/Host.h
-
  #include llvm/IR/Module.h

-#include llvm/MC/MCDisassembler.h
-#include llvm/MC/MCAsmInfo.h
-#include llvm/MC/MCInst.h
-#include llvm/MC/MCInstPrinter.h
-#include llvm/MC/MCRegisterInfo.h
-
-#if HAVE_LLVM = 0x0305
-#define OwningPtr std::unique_ptr
-#else
-#include llvm/ADT/OwningPtr.h
-#endif
-
-#if HAVE_LLVM = 0x0305
-#include llvm/MC/MCContext.h
-#endif
-
  #include util/u_math.h
  #include util/u_debug.h

@@ -133,7 +105,7 @@ lp_get_module_id(LLVMModuleRef module)
  extern C void
  lp_debug_dump_value(LLVMValueRef value)
  {
-#if (defined(PIPE_OS_WINDOWS)  !defined(PIPE_CC_MSVC)) || 
defined(PIPE_OS_EMBDDED)
+#if (defined(PIPE_OS_WINDOWS)  !defined(PIPE_CC_MSVC)) || 
defined(PIPE_OS_EMBEDDED)
 raw_debug_ostream os;
 llvm::unwrap(value)-print(os);
 os.flush();
@@ -143,44 +115,16 @@ lp_debug_dump_value(LLVMValueRef value)
  }


-#if HAVE_LLVM  0x0306
-
-/*
- * MemoryObject wrapper around a buffer of memory, to be used by MC
- * disassembler.
- */
-class BufferMemoryObject:
-   public llvm::MemoryObject
+static const char *
+disassemblerSymbolLookupCB(void *DisInfo,
+   uint64_t ReferenceValue,
+   uint64_t *ReferenceType,
+   uint64_t ReferencePC,
+   const char **ReferenceName)
  {
-private:
-   const uint8_t *Bytes;
-   uint64_t Length;
-public:
-   BufferMemoryObject(const uint8_t *bytes, uint64_t length) :
-  Bytes(bytes), Length(length)
-   {
-   }
-
-   uint64_t getBase() const
-   {
-  return 0;
-   }
-
-   uint64_t getExtent() const
-   {
-  return Length;
-   }
-
-   int readByte(uint64_t addr, uint8_t *byte) const
-   {
-  if (addr  getExtent())
- return -1;
-  *byte = Bytes[addr];
-  return 0;
-   }
-};
-
-#endif /* HAVE_LLVM  0x0306 */
+   // TODO: Maybe this can be used to guess jumps
+   return NULL;
+}


  /*
@@ -193,8 +137,6 @@ public:
  static size_t
  disassemble(const void* func, llvm::raw_ostream  Out)
  {
-   using namespace llvm;
-
 const uint8_t *bytes = (const uint8_t *)func;

 /*
@@ -202,101 +144,23 @@ disassemble(const void* func, llvm::raw_ostream  Out)
  */
 const uint64_t extent = 96 * 1024;

-   uint64_t max_pc = 0;
-
 /*
  * Initialize all used 

Re: [Mesa-dev] Ideas on loop unrolling, loop examples, and my GSoC-blog

2015-05-29 Thread Eero Tamminen

Hi,

On 05/28/2015 10:19 PM, Thomas Helland wrote:

One more thing;
Is there a limit where the loop body gets so large that we
want to decide that gah, this sucks, no point in unrolling this?
I imagine as the loops get large there will be a case of
diminishing returns from the unrolling?


I think only backend can say something to that.   You e.g. give backend 
unrolled and non-unrolled versions and backend decides which one is 
better (after applying additional optimizations)...



- Eero

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] xdemos/corender: Remove.

2015-05-29 Thread Jose Fonseca
Rendering from multiple processes into the same X window is not
something that works in practice anymore.

Nowadays interprocess rendering is better done with GLX/EGL extensions
for that effect.

https://bugs.freedesktop.org/show_bug.cgi?id=30279
---
 src/xdemos/.gitignore |   1 -
 src/xdemos/CMakeLists.txt |   3 +-
 src/xdemos/Makefile.am|   6 -
 src/xdemos/corender.c | 400 --
 src/xdemos/ipc.c  | 264 --
 src/xdemos/ipc.h  |  16 --
 6 files changed, 1 insertion(+), 689 deletions(-)
 delete mode 100644 src/xdemos/corender.c
 delete mode 100644 src/xdemos/ipc.c
 delete mode 100644 src/xdemos/ipc.h

diff --git a/src/xdemos/.gitignore b/src/xdemos/.gitignore
index 41f2519..7ad9449 100644
--- a/src/xdemos/.gitignore
+++ b/src/xdemos/.gitignore
@@ -1,4 +1,3 @@
-corender
 glsync
 glthreads
 glxcontexts
diff --git a/src/xdemos/CMakeLists.txt b/src/xdemos/CMakeLists.txt
index 97329fe..d49a6e6 100644
--- a/src/xdemos/CMakeLists.txt
+++ b/src/xdemos/CMakeLists.txt
@@ -70,8 +70,7 @@ target_link_libraries (${subdir}_pbdemo pbutil)
 target_link_libraries (${subdir}_pbinfo pbutil)
 target_link_libraries (${subdir}_sharedtex_mt pthread)
 
-add_executable (corender corender.c ipc.c) 
 add_executable (xrotfontdemo xrotfontdemo.c xuserotfont.c)
 add_executable (glxinfo glxinfo.c glinfo_common.c)
 
-install (TARGETS glxinfo corender xrotfontdemo DESTINATION demos)
+install (TARGETS glxinfo xrotfontdemo DESTINATION demos)
diff --git a/src/xdemos/Makefile.am b/src/xdemos/Makefile.am
index cfd23b1..625b278 100644
--- a/src/xdemos/Makefile.am
+++ b/src/xdemos/Makefile.am
@@ -34,7 +34,6 @@ if HAVE_X11
 noinst_LTLIBRARIES = libpbutil.la
 
 bin_PROGRAMS = \
-   corender \
glsync \
glthreads \
glxdemo \
@@ -67,11 +66,6 @@ libpbutil_la_SOURCES = \
pbutil.c \
pbutil.h
 
-corender_SOURCES = \
-   corender.c \
-   ipc.c \
-   ipc.h
-
 xrotfontdemo_SOURCES = \
xrotfontdemo.c \
xuserotfont.c \
diff --git a/src/xdemos/corender.c b/src/xdemos/corender.c
deleted file mode 100644
index e706f4b..000
--- a/src/xdemos/corender.c
+++ /dev/null
@@ -1,400 +0,0 @@
-/**
- * Example of cooperative rendering into one window by two processes.
- * The first instance of the program creates the GLX window.
- * The second instance of the program gets the window ID from the first
- * and draws into it.
- * Socket IPC is used for synchronization.
- *
- * Usage:
- * 1. run 'corender '
- * 2. run 'corender 2'  (any arg will do)
- *
- * Brian Paul
- * 11 Oct 2007
- */
-
-
-#include GL/gl.h
-#include GL/glx.h
-#include assert.h
-#include math.h
-#include stdio.h
-#include stdlib.h
-#include X11/keysym.h
-#include unistd.h
-#include ipc.h
-
-
-#ifndef M_PI
-#define M_PI 3.14159265358979323846
-#endif
-
-static int MyID = 0;  /* 0 or 1 */
-static int WindowID = 0;
-static GLXContext Context = 0;
-static int Width = 700, Height = 350;
-static int Rot = 0;
-static int Sock = 0;
-
-static GLfloat Red[4] = {1.0, 0.2, 0.2, 1.0};
-static GLfloat Blue[4] = {0.2, 0.2, 1.0, 1.0};
-
-static int Sync = 1;  /** synchronized rendering? */
-
-
-static void
-setup_ipc(void)
-{
-   int k, port = 10001;
-
-   if (MyID == 0) {
-  /* I'm the first one, wait for connection from second */
-  k = CreatePort(port);
-  assert(k != -1);
-
-  printf(Waiting for connection from another 'corender'\n);
-  Sock = AcceptConnection(k);
-  assert(Sock != -1);
-
-  printf(Got connection, sending windowID\n);
-
-  /* send windowID */
-  SendData(Sock, WindowID, sizeof(WindowID));
-   }
-   else {
-  /* I'm the second one, connect to first */
-  char hostname[1000];
-
-  MyHostName(hostname, 1000);
-  Sock = Connect(hostname, port);
-  assert(Sock != -1);
-
-  /* get windowID */
-  ReceiveData(Sock, WindowID, sizeof(WindowID));
-  printf(Contacted first 'corender', getting WindowID\n);
-   }
-}
-
-
-
-/** from GLUT */
-static void
-doughnut(GLfloat r, GLfloat R, GLint nsides, GLint rings)
-{
-  int i, j;
-  GLfloat theta, phi, theta1;
-  GLfloat cosTheta, sinTheta;
-  GLfloat cosTheta1, sinTheta1;
-  GLfloat ringDelta, sideDelta;
-
-  ringDelta = 2.0 * M_PI / rings;
-  sideDelta = 2.0 * M_PI / nsides;
-
-  theta = 0.0;
-  cosTheta = 1.0;
-  sinTheta = 0.0;
-  for (i = rings - 1; i = 0; i--) {
-theta1 = theta + ringDelta;
-cosTheta1 = cos(theta1);
-sinTheta1 = sin(theta1);
-glBegin(GL_QUAD_STRIP);
-phi = 0.0;
-for (j = nsides; j = 0; j--) {
-  GLfloat cosPhi, sinPhi, dist;
-
-  phi += sideDelta;
-  cosPhi = cos(phi);
-  sinPhi = sin(phi);
-  dist = R + r * cosPhi;
-
-  glNormal3f(cosTheta1 * cosPhi, -sinTheta1 * cosPhi, sinPhi);
-  glVertex3f(cosTheta1 * dist, -sinTheta1 * dist, r * sinPhi);
-  glNormal3f(cosTheta * cosPhi, -sinTheta * cosPhi, sinPhi);
-  glVertex3f(cosTheta * dist, -sinTheta * dist,  

Re: [Mesa-dev] [PATCH v5] i915/aa: fixing anti-aliasing bug for thinnest width lines

2015-05-29 Thread Predut, Marius
Sorry , it is v1 not v5 ( for Pineview platform : 
http://ark.intel.com/products/codename/32201/Pineview)

 -Original Message-
 From: Predut, Marius
 Sent: Friday, May 29, 2015 2:51 PM
 To: mesa-dev@lists.freedesktop.org
 Cc: Predut, Marius
 Subject: [Mesa-dev][PATCH v5] i915/aa: fixing anti-aliasing bug for thinnest
 width lines
 
 On PNV platform, for 1 pixel line thickness or less,
 the general anti-aliasing algorithm gives up, and a garbage line is generated.
 Setting a Line Width of 0.0 specifies the rasterization
 of the thinnest (one-pixel-wide), non-antialiased lines.
 Lines rendered with zero Line Width are rasterized using
 Grid Intersection Quantization rules as specified by
 bspec G45: Volume 2: 3D/Media,
 7.3.13.1 Zero-Width (Cosmetic) Line Rasterization section.
 
 This patch follow the same rules as patches fixing the
 https://bugs.freedesktop.org/show_bug.cgi?id=28832
 bug.
 
 Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=90367
 
 Signed-off-by: Marius Predut marius.pre...@intel.com
 ---
  src/mesa/drivers/dri/i915/i915_state.c | 15 +++
  1 file changed, 15 insertions(+)
 
 diff --git a/src/mesa/drivers/dri/i915/i915_state.c
 b/src/mesa/drivers/dri/i915/i915_state.c
 index 5f10b84..6cd342c 100644
 --- a/src/mesa/drivers/dri/i915/i915_state.c
 +++ b/src/mesa/drivers/dri/i915/i915_state.c
 @@ -599,6 +599,21 @@ i915LineWidth(struct gl_context * ctx, GLfloat widthf)
 
 width = (int) (widthf * 2);
 width = CLAMP(width, 1, 0xf);
 +
 +   if (ctx-Line.Width  1.5 || widthf  1.5) {
 +/* For 1 pixel line thickness or less, the general
 + * anti-aliasing algorithm gives up, and a garbage line is
 + * generated.  Setting a Line Width of 0.0 specifies the
 + * rasterization of the thinnest (one-pixel-wide),
 + * non-antialiased lines.
 + *
 + * Lines rendered with zero Line Width are rasterized using
 + * Grid Intersection Quantization rules as specified by
 + * bspec G45: Volume 2: 3D/Media,
 + * 7.3.13.1 Zero-Width (Cosmetic) Line Rasterization section
 + */
 +width = 0;
 +   }
 lis4 |= width  S4_LINE_WIDTH_SHIFT;
 
 if (lis4 != i915-state.Ctx[I915_CTXREG_LIS4]) {
 --
 1.9.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [Bug 30279] corender broken on llvmpipe and swrast

2015-05-29 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=30279

José Fonseca jfons...@vmware.com changed:

   What|Removed |Added

 Status|NEW |RESOLVED
 CC||bri...@vmware.com,
   ||jfons...@vmware.com,
   ||srol...@vmware.com
 Resolution|--- |WONTFIX

--- Comment #4 from José Fonseca jfons...@vmware.com ---
This is just a too weird use case to be worth our while.  Particularly for SW
renderers, which end up doing some sort of XPutImage for presents, hence can't
actually share the buffers between processes.

Even on native NVIDIA corender doesn't work properly -- the blue donut
flickers.

Probably the only setup corender might still work is with indirect GLX, which
few people if any care anymore.


I think we should actually drop the corender demo from Mesa demos -- it's not
soemthing we want people to replicate.

-- 
You are receiving this mail because:
You are the assignee for the bug.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 1/3] mesa: remove obsolete and broken sampler code

2015-05-29 Thread Timothy Arceri
The warning is now handled earlier in the ast to hir code,
and the name was only generated for arrays of arrays in which
case this just breaks the Uniform hash lookup.
---
 src/mesa/program/sampler.cpp | 38 +-
 1 file changed, 13 insertions(+), 25 deletions(-)

diff --git a/src/mesa/program/sampler.cpp b/src/mesa/program/sampler.cpp
index ea3024d..34567d2 100644
--- a/src/mesa/program/sampler.cpp
+++ b/src/mesa/program/sampler.cpp
@@ -38,14 +38,12 @@
 class get_sampler_name : public ir_hierarchical_visitor
 {
 public:
-   get_sampler_name(ir_dereference *last,
-   struct gl_shader_program *shader_program)
+   get_sampler_name(struct gl_shader_program *shader_program)
{
   this-mem_ctx = ralloc_context(NULL);
   this-shader_program = shader_program;
   this-name = NULL;
   this-offset = 0;
-  this-last = last;
}
 
~get_sampler_name()
@@ -68,29 +66,20 @@ public:
virtual ir_visitor_status visit_leave(ir_dereference_array *ir)
{
   ir_constant *index = ir-array_index-as_constant();
-  int i;
+
+  /* GLSL 1.10 and 1.20 allowed variable sampler array indices,
+   * while GLSL 1.30 requires that the array indices be
+   * constant integer expressions.  We don't expect any driver
+   * to actually work with a really variable array index, so
+   * all that would work would be an unrolled loop counter so
+   * the index must be constant at this point.
+   */
+  assert(index != NULL);
 
   if (index) {
-i = index-value.i[0];
-  } else {
-/* GLSL 1.10 and 1.20 allowed variable sampler array indices,
- * while GLSL 1.30 requires that the array indices be
- * constant integer expressions.  We don't expect any driver
- * to actually work with a really variable array index, so
- * all that would work would be an unrolled loop counter that ends
- * up being constant above.
- */
-ralloc_strcat(shader_program-InfoLog,
-  warning: Variable sampler array index unsupported.\n
-  This feature of the language was removed in GLSL 1.20 
-  and is unlikely to be supported for 1.10 in Mesa.\n);
-i = 0;
-  }
-  if (ir != last) {
-this-name = ralloc_asprintf(mem_ctx, %s[%d], name, i);
-  } else {
-offset = i;
+offset = index-value.i[0];
   }
+
   return visit_continue;
}
 
@@ -98,7 +87,6 @@ public:
const char *name;
void *mem_ctx;
int offset;
-   ir_dereference *last;
 };
 
 
@@ -107,7 +95,7 @@ _mesa_get_sampler_uniform_value(class ir_dereference 
*sampler,
struct gl_shader_program *shader_program,
const struct gl_program *prog)
 {
-   get_sampler_name getname(sampler, shader_program);
+   get_sampler_name getname(shader_program);
 
GLuint shader = _mesa_program_enum_to_shader_stage(prog-Target);
 
-- 
2.1.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] gallivm: Remove stub disassemblerSymbolLookupCB.

2015-05-29 Thread Jose Fonseca
It's incompletete -- it wasn't filling ReferenceType so it was causing
garbagge on the disassembly.  Furthermore it seems impossible to get the
jump information through this interface.

The solution for function size problem is to effectively book-keep the
machine code start and end address while JIT'ing.
---
 src/gallium/auxiliary/gallivm/lp_bld_debug.cpp | 14 +-
 1 file changed, 1 insertion(+), 13 deletions(-)

diff --git a/src/gallium/auxiliary/gallivm/lp_bld_debug.cpp 
b/src/gallium/auxiliary/gallivm/lp_bld_debug.cpp
index 64fb044..9a85248 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_debug.cpp
+++ b/src/gallium/auxiliary/gallivm/lp_bld_debug.cpp
@@ -115,18 +115,6 @@ lp_debug_dump_value(LLVMValueRef value)
 }
 
 
-static const char *
-disassemblerSymbolLookupCB(void *DisInfo,
-   uint64_t ReferenceValue,
-   uint64_t *ReferenceType,
-   uint64_t ReferencePC,
-   const char **ReferenceName)
-{
-   // TODO: Maybe this can be used to guess jumps
-   return NULL;
-}
-
-
 /*
  * Disassemble a function, using the LLVM MC disassembler.
  *
@@ -149,7 +137,7 @@ disassemble(const void* func, llvm::raw_ostream  Out)
 */
 
std::string Triple = llvm::sys::getProcessTriple();
-   LLVMDisasmContextRef D = LLVMCreateDisasm(Triple.c_str(), NULL, 0, NULL, 
disassemblerSymbolLookupCB);
+   LLVMDisasmContextRef D = LLVMCreateDisasm(Triple.c_str(), NULL, 0, NULL, 
NULL);
char outline[1024];
 
if (!D) {
-- 
2.1.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] xdemos/corender: Remove.

2015-05-29 Thread Brian Paul


Reviewed-by: Brian Paul bri...@vmware.com

On 05/29/2015 05:10 AM, Jose Fonseca wrote:

Rendering from multiple processes into the same X window is not
something that works in practice anymore.

Nowadays interprocess rendering is better done with GLX/EGL extensions
for that effect.

https://bugs.freedesktop.org/show_bug.cgi?id=30279
---
  src/xdemos/.gitignore |   1 -
  src/xdemos/CMakeLists.txt |   3 +-
  src/xdemos/Makefile.am|   6 -
  src/xdemos/corender.c | 400 --
  src/xdemos/ipc.c  | 264 --
  src/xdemos/ipc.h  |  16 --
  6 files changed, 1 insertion(+), 689 deletions(-)
  delete mode 100644 src/xdemos/corender.c
  delete mode 100644 src/xdemos/ipc.c
  delete mode 100644 src/xdemos/ipc.h

diff --git a/src/xdemos/.gitignore b/src/xdemos/.gitignore
index 41f2519..7ad9449 100644
--- a/src/xdemos/.gitignore
+++ b/src/xdemos/.gitignore
@@ -1,4 +1,3 @@
-corender
  glsync
  glthreads
  glxcontexts
diff --git a/src/xdemos/CMakeLists.txt b/src/xdemos/CMakeLists.txt
index 97329fe..d49a6e6 100644
--- a/src/xdemos/CMakeLists.txt
+++ b/src/xdemos/CMakeLists.txt
@@ -70,8 +70,7 @@ target_link_libraries (${subdir}_pbdemo pbutil)
  target_link_libraries (${subdir}_pbinfo pbutil)
  target_link_libraries (${subdir}_sharedtex_mt pthread)

-add_executable (corender corender.c ipc.c)
  add_executable (xrotfontdemo xrotfontdemo.c xuserotfont.c)
  add_executable (glxinfo glxinfo.c glinfo_common.c)

-install (TARGETS glxinfo corender xrotfontdemo DESTINATION demos)
+install (TARGETS glxinfo xrotfontdemo DESTINATION demos)
diff --git a/src/xdemos/Makefile.am b/src/xdemos/Makefile.am
index cfd23b1..625b278 100644
--- a/src/xdemos/Makefile.am
+++ b/src/xdemos/Makefile.am
@@ -34,7 +34,6 @@ if HAVE_X11
  noinst_LTLIBRARIES = libpbutil.la

  bin_PROGRAMS = \
-   corender \
glsync \
glthreads \
glxdemo \
@@ -67,11 +66,6 @@ libpbutil_la_SOURCES = \
pbutil.c \
pbutil.h

-corender_SOURCES = \
-   corender.c \
-   ipc.c \
-   ipc.h
-
  xrotfontdemo_SOURCES = \
xrotfontdemo.c \
xuserotfont.c \
diff --git a/src/xdemos/corender.c b/src/xdemos/corender.c
deleted file mode 100644
index e706f4b..000
--- a/src/xdemos/corender.c
+++ /dev/null
@@ -1,400 +0,0 @@
-/**
- * Example of cooperative rendering into one window by two processes.
- * The first instance of the program creates the GLX window.
- * The second instance of the program gets the window ID from the first
- * and draws into it.
- * Socket IPC is used for synchronization.
- *
- * Usage:
- * 1. run 'corender '
- * 2. run 'corender 2'  (any arg will do)
- *
- * Brian Paul
- * 11 Oct 2007
- */
-
-
-#include GL/gl.h
-#include GL/glx.h
-#include assert.h
-#include math.h
-#include stdio.h
-#include stdlib.h
-#include X11/keysym.h
-#include unistd.h
-#include ipc.h
-
-
-#ifndef M_PI
-#define M_PI 3.14159265358979323846
-#endif
-
-static int MyID = 0;  /* 0 or 1 */
-static int WindowID = 0;
-static GLXContext Context = 0;
-static int Width = 700, Height = 350;
-static int Rot = 0;
-static int Sock = 0;
-
-static GLfloat Red[4] = {1.0, 0.2, 0.2, 1.0};
-static GLfloat Blue[4] = {0.2, 0.2, 1.0, 1.0};
-
-static int Sync = 1;  /** synchronized rendering? */
-
-
-static void
-setup_ipc(void)
-{
-   int k, port = 10001;
-
-   if (MyID == 0) {
-  /* I'm the first one, wait for connection from second */
-  k = CreatePort(port);
-  assert(k != -1);
-
-  printf(Waiting for connection from another 'corender'\n);
-  Sock = AcceptConnection(k);
-  assert(Sock != -1);
-
-  printf(Got connection, sending windowID\n);
-
-  /* send windowID */
-  SendData(Sock, WindowID, sizeof(WindowID));
-   }
-   else {
-  /* I'm the second one, connect to first */
-  char hostname[1000];
-
-  MyHostName(hostname, 1000);
-  Sock = Connect(hostname, port);
-  assert(Sock != -1);
-
-  /* get windowID */
-  ReceiveData(Sock, WindowID, sizeof(WindowID));
-  printf(Contacted first 'corender', getting WindowID\n);
-   }
-}
-
-
-
-/** from GLUT */
-static void
-doughnut(GLfloat r, GLfloat R, GLint nsides, GLint rings)
-{
-  int i, j;
-  GLfloat theta, phi, theta1;
-  GLfloat cosTheta, sinTheta;
-  GLfloat cosTheta1, sinTheta1;
-  GLfloat ringDelta, sideDelta;
-
-  ringDelta = 2.0 * M_PI / rings;
-  sideDelta = 2.0 * M_PI / nsides;
-
-  theta = 0.0;
-  cosTheta = 1.0;
-  sinTheta = 0.0;
-  for (i = rings - 1; i = 0; i--) {
-theta1 = theta + ringDelta;
-cosTheta1 = cos(theta1);
-sinTheta1 = sin(theta1);
-glBegin(GL_QUAD_STRIP);
-phi = 0.0;
-for (j = nsides; j = 0; j--) {
-  GLfloat cosPhi, sinPhi, dist;
-
-  phi += sideDelta;
-  cosPhi = cos(phi);
-  sinPhi = sin(phi);
-  dist = R + r * cosPhi;
-
-  glNormal3f(cosTheta1 * cosPhi, -sinTheta1 * cosPhi, sinPhi);
-  glVertex3f(cosTheta1 * dist, -sinTheta1 * dist, r * sinPhi);
-  

[Mesa-dev] [PATCH 3/3] mesa: remove unused function declaration

2015-05-29 Thread Timothy Arceri
---
 src/mesa/main/uniforms.h | 4 
 1 file changed, 4 deletions(-)

diff --git a/src/mesa/main/uniforms.h b/src/mesa/main/uniforms.h
index 55fa235..bd7b05e 100644
--- a/src/mesa/main/uniforms.h
+++ b/src/mesa/main/uniforms.h
@@ -343,10 +343,6 @@ void GLAPIENTRY
 _mesa_ProgramUniformMatrix4x3dv(GLuint program, GLint location, GLsizei count,
 GLboolean transpose, const GLdouble *value);
 
-long
-_mesa_parse_program_resource_name(const GLchar *name,
-  const GLchar **out_base_name_end);
-
 unsigned
 _mesa_get_uniform_location(struct gl_shader_program *shProg,
   const GLchar *name, unsigned *offset);
-- 
2.1.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 2/3] nir: remove recreated broken sampler logic

2015-05-29 Thread Timothy Arceri
This logic was recreated based on the old glsl ir code.

The name was only generated for arrays of arrays in which
case this just breaks the Uniform hash lookup.
---
 src/glsl/nir/nir_lower_samplers.cpp | 11 ++-
 1 file changed, 2 insertions(+), 9 deletions(-)

diff --git a/src/glsl/nir/nir_lower_samplers.cpp 
b/src/glsl/nir/nir_lower_samplers.cpp
index 7a0b0a0..b054afa 100644
--- a/src/glsl/nir/nir_lower_samplers.cpp
+++ b/src/glsl/nir/nir_lower_samplers.cpp
@@ -71,15 +71,8 @@ lower_sampler(nir_tex_instr *instr, const struct 
gl_shader_program *shader_progr
  nir_deref_array *deref_array = nir_deref_as_array(deref-child);
 
  assert(deref_array-deref_array_type != 
nir_deref_array_type_wildcard);
-
- if (deref_array-deref.child) {
-ralloc_asprintf_append(name, [%u],
-   deref_array-deref_array_type == nir_deref_array_type_direct ?
-  deref_array-base_offset : 0);
- } else {
-assert(deref-child-type-base_type == GLSL_TYPE_SAMPLER);
-instr-sampler_index = deref_array-base_offset;
- }
+ assert(deref-child-type-base_type == GLSL_TYPE_SAMPLER);
+ instr-sampler_index = deref_array-base_offset;
 
  /* XXX: We're assuming here that the indirect is the last array
   * thing we have.  This should be ok for now as we don't support
-- 
2.1.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 1/5] mesa: remove unused geometry shader variables

2015-05-29 Thread Brian Paul

For the series,
Reviewed-by: Brian Paul bri...@vmware.com


On 05/28/2015 05:57 PM, Marek Olšák wrote:

From: Marek Olšák marek.ol...@amd.com

These states are for GS assembly shaders only. We don't support those.
---
  src/mesa/main/context.c| 1 -
  src/mesa/main/mtypes.h | 7 ---
  src/mesa/main/shared.c | 1 -
  src/mesa/program/program.c | 9 -
  4 files changed, 18 deletions(-)

diff --git a/src/mesa/main/context.c b/src/mesa/main/context.c
index e4faf3d..483e9b1 100644
--- a/src/mesa/main/context.c
+++ b/src/mesa/main/context.c
@@ -1333,7 +1333,6 @@ _mesa_free_context_data( struct gl_context *ctx )
 _mesa_reference_vertprog(ctx, ctx-VertexProgram._Current, NULL);
 _mesa_reference_vertprog(ctx, ctx-VertexProgram._TnlProgram, NULL);

-   _mesa_reference_geomprog(ctx, ctx-GeometryProgram.Current, NULL);
 _mesa_reference_geomprog(ctx, ctx-GeometryProgram._Current, NULL);

 _mesa_reference_fragprog(ctx, ctx-FragmentProgram.Current, NULL);
diff --git a/src/mesa/main/mtypes.h b/src/mesa/main/mtypes.h
index 8342517..96ef060 100644
--- a/src/mesa/main/mtypes.h
+++ b/src/mesa/main/mtypes.h
@@ -2275,16 +2275,10 @@ struct gl_vertex_program_state
   */
  struct gl_geometry_program_state
  {
-   GLboolean Enabled;   /** GL_ARB_GEOMETRY_SHADER4 */
-   GLboolean _Enabled;  /** Enabled and valid program? */
-   struct gl_geometry_program *Current;  /** user-bound geometry program */
-
 /** Currently enabled and valid program (including internal programs
  * and compiled shader programs).
  */
 struct gl_geometry_program *_Current;
-
-   GLfloat Parameters[MAX_PROGRAM_ENV_PARAMS][4]; /** Env params */
  };

  /**
@@ -3004,7 +2998,6 @@ struct gl_shared_state
 struct _mesa_HashTable *Programs; /** All vertex/fragment programs */
 struct gl_vertex_program *DefaultVertexProgram;
 struct gl_fragment_program *DefaultFragmentProgram;
-   struct gl_geometry_program *DefaultGeometryProgram;
 /*@}*/

 /* GL_ATI_fragment_shader */
diff --git a/src/mesa/main/shared.c b/src/mesa/main/shared.c
index 0b76cc0..d5ac9f1 100644
--- a/src/mesa/main/shared.c
+++ b/src/mesa/main/shared.c
@@ -313,7 +313,6 @@ free_shared_state(struct gl_context *ctx, struct 
gl_shared_state *shared)
 _mesa_DeleteHashTable(shared-Programs);

 _mesa_reference_vertprog(ctx, shared-DefaultVertexProgram, NULL);
-   _mesa_reference_geomprog(ctx, shared-DefaultGeometryProgram, NULL);
 _mesa_reference_fragprog(ctx, shared-DefaultFragmentProgram, NULL);

 _mesa_HashDeleteAll(shared-ATIShaders, delete_fragshader_cb, ctx);
diff --git a/src/mesa/program/program.c b/src/mesa/program/program.c
index fb61f4d..f0a47ac 100644
--- a/src/mesa/program/program.c
+++ b/src/mesa/program/program.c
@@ -97,11 +97,6 @@ _mesa_init_program(struct gl_context *ctx)
 assert(ctx-FragmentProgram.Current);
 ctx-FragmentProgram.Cache = _mesa_new_program_cache();

-   ctx-GeometryProgram.Enabled = GL_FALSE;
-   /* right now by default we don't have a geometry program */
-   _mesa_reference_geomprog(ctx, ctx-GeometryProgram.Current,
-NULL);
-
 _mesa_reference_compprog(ctx, ctx-ComputeProgram.Current, NULL);

 /* XXX probably move this stuff */
@@ -122,7 +117,6 @@ _mesa_free_program_data(struct gl_context *ctx)
 _mesa_delete_program_cache(ctx, ctx-VertexProgram.Cache);
 _mesa_reference_fragprog(ctx, ctx-FragmentProgram.Current, NULL);
 _mesa_delete_shader_cache(ctx, ctx-FragmentProgram.Cache);
-   _mesa_reference_geomprog(ctx, ctx-GeometryProgram.Current, NULL);
 _mesa_reference_compprog(ctx, ctx-ComputeProgram.Current, NULL);

 /* XXX probably move this stuff */
@@ -153,9 +147,6 @@ _mesa_update_default_objects_program(struct gl_context *ctx)
  ctx-Shared-DefaultFragmentProgram);
 assert(ctx-FragmentProgram.Current);

-   _mesa_reference_geomprog(ctx, ctx-GeometryProgram.Current,
-  ctx-Shared-DefaultGeometryProgram);
-
 /* XXX probably move this stuff */
 if (ctx-ATIFragmentShader.Current) {
ctx-ATIFragmentShader.Current-RefCount--;



___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 2/2] i965: Don't add base_binding_table_index if it's zero

2015-05-29 Thread Neil Roberts
When calculating the binding table index for non-constant sampler
array indexing it needs to add the base binding table index which is a
constant within the generated code. Often this base is zero so we can
avoid a redundant instruction in that case.

It looks like nothing in shader-db is doing non-constant sampler array
indexing so this patch doesn't make any difference but it might be
worth having anyway.
---
 src/mesa/drivers/dri/i965/brw_fs_generator.cpp   | 3 ++-
 src/mesa/drivers/dri/i965/brw_vec4_generator.cpp | 3 ++-
 2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp 
b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
index ea46b1a..40a3db3 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
@@ -789,7 +789,8 @@ fs_generator::generate_tex(fs_inst *inst, struct brw_reg 
dst, struct brw_reg src
 
   /* addr = ((sampler * 0x101) + base_binding_table_index)  0xfff */
   brw_MUL(p, addr, sampler_reg, brw_imm_ud(0x101));
-  brw_ADD(p, addr, addr, brw_imm_ud(base_binding_table_index));
+  if (base_binding_table_index)
+ brw_ADD(p, addr, addr, brw_imm_ud(base_binding_table_index));
   brw_AND(p, addr, addr, brw_imm_ud(0xfff));
 
   brw_pop_insn_state(p);
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp 
b/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp
index 1d3f5ed..cf1aa83 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp
@@ -409,7 +409,8 @@ vec4_generator::generate_tex(vec4_instruction *inst,
 
   /* addr = ((sampler * 0x101) + base_binding_table_index)  0xfff */
   brw_MUL(p, addr, sampler_reg, brw_imm_ud(0x101));
-  brw_ADD(p, addr, addr, brw_imm_ud(base_binding_table_index));
+  if (base_binding_table_index)
+ brw_ADD(p, addr, addr, brw_imm_ud(base_binding_table_index));
   brw_AND(p, addr, addr, brw_imm_ud(0xfff));
 
   brw_pop_insn_state(p);
-- 
1.9.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] PIPE_SHADER_* vs TGSI_PROCESSOR_*

2015-05-29 Thread Brian Paul

On 05/28/2015 05:36 PM, Marek Olšák wrote:

Hi,

Would people be okay with removing the TGSI_PROCESSOR_ enums and
replacing all usages with PIPE_SHADER_*?


It's kind of nice that the TGSI shader stuff is self-contained with no 
real dependency on gallium.  This would change that in a tiny way.


Is this causing pain?  Would adjusting the TSGI/PIPE enum values so they 
match (VS/FS are mixed up) be of any help?


-Brian


___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 1/2] i965: Don't use a temporary when generating an indirect sample

2015-05-29 Thread Neil Roberts
Previously when generating the send instruction for a sample
instruction with an indirect sampler it would use the destination
register as a temporary store. This breaks when used in combination
with the opt_sampler_eot optimisation because that forces the
destination to be null. This patch fixes that by avoiding the temp
register altogether.

The reason the temporary register was needed was because it was trying
to ensure the binding table index doesn't overflow a byte by and'ing
it with 0xff. The result is then or'd with samper_index8. This patch
instead just and's the whole thing by 0xfff. This will ensure that a
bogus sampler index won't overflow into the rest of the message
descriptor but unlike the previous code it won't ensure that the
binding table index doesn't overflow into the sampler index. It
doesn't seem like that should matter very much though because if the
shader is generating a bogus sampler index then it's going to just get
garbage out either way.

Instead of doing sampler_index8|(sampler_index+base_table_index) the
new code avoids one operation by doing
sampler_index*0x101+base_table_index which should be equivalent.
However if we wanted to avoid the multiply for some reason we could do
this by adding an extra or instruction still without needing the
temporary register.

This fixes a number of Piglit tests on Skylake that were using
indirect samplers such as:

 spec@arb_gpu_shader5@execution@sampler_array_indexing@fs-simple
---
 src/mesa/drivers/dri/i965/brw_fs_generator.cpp   | 17 -
 src/mesa/drivers/dri/i965/brw_vec4_generator.cpp | 16 
 2 files changed, 8 insertions(+), 25 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp 
b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
index 0be0f86..ea46b1a 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
@@ -779,27 +779,18 @@ fs_generator::generate_tex(fs_inst *inst, struct brw_reg 
dst, struct brw_reg src
   brw_mark_surface_used(prog_data, sampler + base_binding_table_index);
} else {
   /* Non-const sampler index */
-  /* Note: this clobbers `dst` as a temporary before emitting the send */
 
   struct brw_reg addr = vec1(retype(brw_address_reg(0), 
BRW_REGISTER_TYPE_UD));
-  struct brw_reg temp = vec1(retype(dst, BRW_REGISTER_TYPE_UD));
-
   struct brw_reg sampler_reg = vec1(retype(sampler_index, 
BRW_REGISTER_TYPE_UD));
 
   brw_push_insn_state(p);
   brw_set_default_mask_control(p, BRW_MASK_DISABLE);
   brw_set_default_access_mode(p, BRW_ALIGN_1);
 
-  /* Some care required: `sampler` and `temp` may alias:
-   *addr = sampler  0xff
-   *temp = (sampler  8)  0xf00
-   *addr = addr | temp
-   */
-  brw_ADD(p, addr, sampler_reg, brw_imm_ud(base_binding_table_index));
-  brw_SHL(p, temp, sampler_reg, brw_imm_ud(8u));
-  brw_AND(p, temp, temp, brw_imm_ud(0x0f00));
-  brw_AND(p, addr, addr, brw_imm_ud(0x0ff));
-  brw_OR(p, addr, addr, temp);
+  /* addr = ((sampler * 0x101) + base_binding_table_index)  0xfff */
+  brw_MUL(p, addr, sampler_reg, brw_imm_ud(0x101));
+  brw_ADD(p, addr, addr, brw_imm_ud(base_binding_table_index));
+  brw_AND(p, addr, addr, brw_imm_ud(0xfff));
 
   brw_pop_insn_state(p);
 
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp 
b/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp
index 20d096c..1d3f5ed 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp
@@ -398,10 +398,8 @@ vec4_generator::generate_tex(vec4_instruction *inst,
   brw_mark_surface_used(prog_data-base, sampler + 
base_binding_table_index);
} else {
   /* Non-constant sampler index. */
-  /* Note: this clobbers `dst` as a temporary before emitting the send */
 
   struct brw_reg addr = vec1(retype(brw_address_reg(0), 
BRW_REGISTER_TYPE_UD));
-  struct brw_reg temp = vec1(retype(dst, BRW_REGISTER_TYPE_UD));
 
   struct brw_reg sampler_reg = vec1(retype(sampler_index, 
BRW_REGISTER_TYPE_UD));
 
@@ -409,16 +407,10 @@ vec4_generator::generate_tex(vec4_instruction *inst,
   brw_set_default_mask_control(p, BRW_MASK_DISABLE);
   brw_set_default_access_mode(p, BRW_ALIGN_1);
 
-  /* Some care required: `sampler` and `temp` may alias:
-   *addr = sampler  0xff
-   *temp = (sampler  8)  0xf00
-   *addr = addr | temp
-   */
-  brw_ADD(p, addr, sampler_reg, brw_imm_ud(base_binding_table_index));
-  brw_SHL(p, temp, sampler_reg, brw_imm_ud(8u));
-  brw_AND(p, temp, temp, brw_imm_ud(0x0f00));
-  brw_AND(p, addr, addr, brw_imm_ud(0x0ff));
-  brw_OR(p, addr, addr, temp);
+  /* addr = ((sampler * 0x101) + base_binding_table_index)  0xfff */
+  brw_MUL(p, addr, sampler_reg, brw_imm_ud(0x101));
+  brw_ADD(p, addr, addr, brw_imm_ud(base_binding_table_index));
+  brw_AND(p, addr, 

Re: [Mesa-dev] [PATCH 1/3] mesa: remove obsolete and broken sampler code

2015-05-29 Thread Timothy Arceri
Please ignore the first two patches turns out the name generation is
used for structs (although there seems to be no piglit tests for this)
like:

struct S {
   sampler2D tex[2];
};

uniform S i[3];


Where name would become: i[2].tex

On Fri, 2015-05-29 at 23:16 +1000, Timothy Arceri wrote:
 The warning is now handled earlier in the ast to hir code,
 and the name was only generated for arrays of arrays in which
 case this just breaks the Uniform hash lookup.
 ---
  src/mesa/program/sampler.cpp | 38 +-
  1 file changed, 13 insertions(+), 25 deletions(-)
 
 diff --git a/src/mesa/program/sampler.cpp b/src/mesa/program/sampler.cpp
 index ea3024d..34567d2 100644
 --- a/src/mesa/program/sampler.cpp
 +++ b/src/mesa/program/sampler.cpp
 @@ -38,14 +38,12 @@
  class get_sampler_name : public ir_hierarchical_visitor
  {
  public:
 -   get_sampler_name(ir_dereference *last,
 - struct gl_shader_program *shader_program)
 +   get_sampler_name(struct gl_shader_program *shader_program)
 {
this-mem_ctx = ralloc_context(NULL);
this-shader_program = shader_program;
this-name = NULL;
this-offset = 0;
 -  this-last = last;
 }
  
 ~get_sampler_name()
 @@ -68,29 +66,20 @@ public:
 virtual ir_visitor_status visit_leave(ir_dereference_array *ir)
 {
ir_constant *index = ir-array_index-as_constant();
 -  int i;
 +
 +  /* GLSL 1.10 and 1.20 allowed variable sampler array indices,
 +   * while GLSL 1.30 requires that the array indices be
 +   * constant integer expressions.  We don't expect any driver
 +   * to actually work with a really variable array index, so
 +   * all that would work would be an unrolled loop counter so
 +   * the index must be constant at this point.
 +   */
 +  assert(index != NULL);
  
if (index) {
 -  i = index-value.i[0];
 -  } else {
 -  /* GLSL 1.10 and 1.20 allowed variable sampler array indices,
 -   * while GLSL 1.30 requires that the array indices be
 -   * constant integer expressions.  We don't expect any driver
 -   * to actually work with a really variable array index, so
 -   * all that would work would be an unrolled loop counter that ends
 -   * up being constant above.
 -   */
 -  ralloc_strcat(shader_program-InfoLog,
 -warning: Variable sampler array index unsupported.\n
 -This feature of the language was removed in GLSL 1.20 
 -and is unlikely to be supported for 1.10 in Mesa.\n);
 -  i = 0;
 -  }
 -  if (ir != last) {
 -  this-name = ralloc_asprintf(mem_ctx, %s[%d], name, i);
 -  } else {
 -  offset = i;
 +  offset = index-value.i[0];
}
 +
return visit_continue;
 }
  
 @@ -98,7 +87,6 @@ public:
 const char *name;
 void *mem_ctx;
 int offset;
 -   ir_dereference *last;
  };
  
 
 @@ -107,7 +95,7 @@ _mesa_get_sampler_uniform_value(class ir_dereference 
 *sampler,
   struct gl_shader_program *shader_program,
   const struct gl_program *prog)
  {
 -   get_sampler_name getname(sampler, shader_program);
 +   get_sampler_name getname(shader_program);
  
 GLuint shader = _mesa_program_enum_to_shader_stage(prog-Target);
  


___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] Ideas on loop unrolling, loop examples, and my GSoC-blog

2015-05-29 Thread Connor Abbott
On Fri, May 29, 2015 at 6:23 AM, Eero Tamminen
eero.t.tammi...@intel.com wrote:
 Hi,

 On 05/28/2015 10:19 PM, Thomas Helland wrote:

 One more thing;
 Is there a limit where the loop body gets so large that we
 want to decide that gah, this sucks, no point in unrolling this?
 I imagine as the loops get large there will be a case of
 diminishing returns from the unrolling?


 I think only backend can say something to that.   You e.g. give backend
 unrolled and non-unrolled versions and backend decides which one is better
 (after applying additional optimizations)...

I don't really think it's going to be too good of an idea to do that,
mainly because it means you'd be duplicating a lot of work for the
normal vs. unrolled versions, and there might be some really large
loops where generating the unrolled version is going to kill your CPU
-- doing any amount of work that's proportional to the number of times
the loop runs, without any limit, seems like a recipe for disaster.

In GLSL IR, we've been fairly lax about figuring out when unrolling is
helpful and unhelpful -- we just have a simple node count plus a
threshold (as well as a few other heuristics). In NIR, we could
similarly have an instruction count plus a threshold and port over the
heuristics to whatever extent possible. We do have some logic for
figuring out if an array access is constant after unrolling, and it
seems like we'd want to keep that around. The next level of
sophistication, I guess, is to give the backend a callback to give an
estimation of the execution cost of certain operations. For example,
unless a negate/absolute value instruction is used by something that
can't handle the modifier, then on i965 the cost of those instructions
would be 0. I think that would get us most of the way there to
something accurate, without needing to do an undue amount of work (in
terms of CPU time and man-effort).

Connor



 - Eero


 ___
 mesa-dev mailing list
 mesa-dev@lists.freedesktop.org
 http://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 11/15] egl: add eglGetSyncAttrib

2015-05-29 Thread Chad Versace
On Thu 28 May 2015, Marek Olšák wrote:
 Hi Chad,
 
 A new patch is attached.

Thanks for the changes. This patch is
Reviewed-by: Chad Versace chad.vers...@intel.com
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] dri_util: make version var unsigned to silence warnings

2015-05-29 Thread Brian Paul
_mesa_override_gl_version_contextless() takes an unsigned version
parameter.
---
 src/mesa/drivers/dri/common/dri_util.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/mesa/drivers/dri/common/dri_util.c 
b/src/mesa/drivers/dri/common/dri_util.c
index 2d847ef..66f9eaa 100644
--- a/src/mesa/drivers/dri/common/dri_util.c
+++ b/src/mesa/drivers/dri/common/dri_util.c
@@ -164,7 +164,7 @@ driCreateNewScreen2(int scrn, int fd,
 
 struct gl_constants consts = { 0 };
 gl_api api;
-int version;
+unsigned int version;
 
 api = API_OPENGLES2;
 if (_mesa_override_gl_version_contextless(consts, api, version))
-- 
1.9.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 2/2] i965: Don't add base_binding_table_index if it's zero

2015-05-29 Thread Matt Turner
Reviewed-by: Matt Turner matts...@gmail.com
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 12/15] egl: add eglCreateImage

2015-05-29 Thread Chad Versace
On Thu 28 May 2015, Eirik Byrkjeflot Anonsen wrote:
 Marek Olšák mar...@gmail.com writes:
 
  I don't understand. Using size_t should prevent the integer overflow.
  Is there anything else wrong other than no fail path for malloc? I
  also don't understand how calloc can help here.
 
  Marek
 
 size * sizeof(int_attribs[0]) may overflow and thus wrap to a small
 number. Using calloc, you'd have calloc(size, sizeof(int_attribs[0])),
 moving the overflow inside calloc(). So if calloc() does its job
 properly, it will protect against it.

Right.

It's very unlikely that an attacker could coerce the size calculation to
overflow, but better safe than sorry.

calloc() [and ralloc() too] will refuse to allocate memory if the size
calculation overflows. ralloc() checks for overflow with some simple
arithmetic. I expect that calloc() checks for overflow using a faster
method: multiply first, then inspect the overflow flag in a status
register. Recent GCC provides builtin functions for that [1].

[1] 
https://gcc.gnu.org/onlinedocs/gcc/Integer-Overflow-Builtins.html#Integer-Overflow-Builtins
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 3/3] mesa: remove unused function declaration

2015-05-29 Thread Matt Turner
On Fri, May 29, 2015 at 6:16 AM, Timothy Arceri t_arc...@yahoo.com.au wrote:
 ---
  src/mesa/main/uniforms.h | 4 
  1 file changed, 4 deletions(-)

 diff --git a/src/mesa/main/uniforms.h b/src/mesa/main/uniforms.h
 index 55fa235..bd7b05e 100644
 --- a/src/mesa/main/uniforms.h
 +++ b/src/mesa/main/uniforms.h
 @@ -343,10 +343,6 @@ void GLAPIENTRY
  _mesa_ProgramUniformMatrix4x3dv(GLuint program, GLint location, GLsizei 
 count,
  GLboolean transpose, const GLdouble *value);

 -long
 -_mesa_parse_program_resource_name(const GLchar *name,
 -  const GLchar **out_base_name_end);
 -

Whoops, looks like it was mistakenly added in commit b92900d2 (which
actually did add parse_program_resource_name).

Reviewed-by: Matt Turner matts...@gmail.com
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] dri_util: make version var unsigned to silence warnings

2015-05-29 Thread Matt Turner
On Fri, May 29, 2015 at 10:38 AM, Brian Paul bri...@vmware.com wrote:
 _mesa_override_gl_version_contextless() takes an unsigned version
 parameter.
 ---
  src/mesa/drivers/dri/common/dri_util.c | 2 +-
  1 file changed, 1 insertion(+), 1 deletion(-)

 diff --git a/src/mesa/drivers/dri/common/dri_util.c 
 b/src/mesa/drivers/dri/common/dri_util.c
 index 2d847ef..66f9eaa 100644
 --- a/src/mesa/drivers/dri/common/dri_util.c
 +++ b/src/mesa/drivers/dri/common/dri_util.c
 @@ -164,7 +164,7 @@ driCreateNewScreen2(int scrn, int fd,

  struct gl_constants consts = { 0 };
  gl_api api;
 -int version;
 +unsigned int version;

Maybe drop the 'int' from the declaration.

Reviewed-by: Matt Turner matts...@gmail.com
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 1/2] i965: Don't use a temporary when generating an indirect sample

2015-05-29 Thread Matt Turner
On Fri, May 29, 2015 at 6:53 AM, Neil Roberts n...@linux.intel.com wrote:
 Previously when generating the send instruction for a sample
 instruction with an indirect sampler it would use the destination
 register as a temporary store. This breaks when used in combination
 with the opt_sampler_eot optimisation because that forces the
 destination to be null. This patch fixes that by avoiding the temp
 register altogether.

 The reason the temporary register was needed was because it was trying
 to ensure the binding table index doesn't overflow a byte by and'ing
 it with 0xff. The result is then or'd with samper_index8. This patch
 instead just and's the whole thing by 0xfff. This will ensure that a
 bogus sampler index won't overflow into the rest of the message
 descriptor but unlike the previous code it won't ensure that the
 binding table index doesn't overflow into the sampler index. It
 doesn't seem like that should matter very much though because if the
 shader is generating a bogus sampler index then it's going to just get
 garbage out either way.

 Instead of doing sampler_index8|(sampler_index+base_table_index) the
 new code avoids one operation by doing
 sampler_index*0x101+base_table_index which should be equivalent.
 However if we wanted to avoid the multiply for some reason we could do
 this by adding an extra or instruction still without needing the
 temporary register.

 This fixes a number of Piglit tests on Skylake that were using
 indirect samplers such as:

  spec@arb_gpu_shader5@execution@sampler_array_indexing@fs-simple
 ---
  src/mesa/drivers/dri/i965/brw_fs_generator.cpp   | 17 -
  src/mesa/drivers/dri/i965/brw_vec4_generator.cpp | 16 
  2 files changed, 8 insertions(+), 25 deletions(-)

 diff --git a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp 
 b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
 index 0be0f86..ea46b1a 100644
 --- a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
 +++ b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
 @@ -779,27 +779,18 @@ fs_generator::generate_tex(fs_inst *inst, struct 
 brw_reg dst, struct brw_reg src
brw_mark_surface_used(prog_data, sampler + base_binding_table_index);
 } else {
/* Non-const sampler index */
 -  /* Note: this clobbers `dst` as a temporary before emitting the send */

struct brw_reg addr = vec1(retype(brw_address_reg(0), 
 BRW_REGISTER_TYPE_UD));
 -  struct brw_reg temp = vec1(retype(dst, BRW_REGISTER_TYPE_UD));
 -
struct brw_reg sampler_reg = vec1(retype(sampler_index, 
 BRW_REGISTER_TYPE_UD));

brw_push_insn_state(p);
brw_set_default_mask_control(p, BRW_MASK_DISABLE);
brw_set_default_access_mode(p, BRW_ALIGN_1);

 -  /* Some care required: `sampler` and `temp` may alias:
 -   *addr = sampler  0xff
 -   *temp = (sampler  8)  0xf00
 -   *addr = addr | temp
 -   */
 -  brw_ADD(p, addr, sampler_reg, brw_imm_ud(base_binding_table_index));
 -  brw_SHL(p, temp, sampler_reg, brw_imm_ud(8u));
 -  brw_AND(p, temp, temp, brw_imm_ud(0x0f00));
 -  brw_AND(p, addr, addr, brw_imm_ud(0x0ff));
 -  brw_OR(p, addr, addr, temp);
 +  /* addr = ((sampler * 0x101) + base_binding_table_index)  0xfff */
 +  brw_MUL(p, addr, sampler_reg, brw_imm_ud(0x101));
 +  brw_ADD(p, addr, addr, brw_imm_ud(base_binding_table_index));
 +  brw_AND(p, addr, addr, brw_imm_ud(0xfff));

brw_pop_insn_state(p);

 diff --git a/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp 
 b/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp
 index 20d096c..1d3f5ed 100644
 --- a/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp
 +++ b/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp
 @@ -398,10 +398,8 @@ vec4_generator::generate_tex(vec4_instruction *inst,
brw_mark_surface_used(prog_data-base, sampler + 
 base_binding_table_index);
 } else {
/* Non-constant sampler index. */
 -  /* Note: this clobbers `dst` as a temporary before emitting the send */

struct brw_reg addr = vec1(retype(brw_address_reg(0), 
 BRW_REGISTER_TYPE_UD));
 -  struct brw_reg temp = vec1(retype(dst, BRW_REGISTER_TYPE_UD));


I'd delete the blank line here as well to match the fs code.

Really nice solution. I'd been trying to figure out the best way to
get an additional temporary in here, but this is clearly better.

Reviewed-by: Matt Turner matts...@gmail.com
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 13/15] egl: add new platform functions

2015-05-29 Thread Chad Versace
On Thu 28 May 2015, Marek Olšák wrote:
 A new patch is attached. Please review.
 

Looks good to me.
Reviewed-by: Chad Versace chad.vers...@intel.com

Later, if and when some platform extension defines a pointer-sized
attribute, then we will need to invert the function order. That is,
eglGetPlatformDisplayEXT will then need to be a wrapper around
eglGetPlatformDisplay. But this patch's approach is fine for now.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 15/15] egl: expose EGL 1.5 if all requirements are met

2015-05-29 Thread Chad Versace
The last two patches, 14-15, are
Reviewed-by: Chad Versace chad.vers...@intel.com

I think I've reviewed all the patches in this series. Please let me know
if I missed any.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 2/3] i965: Create a shader_dispatch_mode enum to replace VS/GS fields.

2015-05-29 Thread Ben Widawsky
On Fri, May 29, 2015 at 12:26:39PM -0700, Kenneth Graunke wrote:
 We used to store the GS dispatch mode in brw_gs_prog_data while
 separately storing the VS dispatch mode in brw_vue_prog_data::simd8.
 
 This patch introduces an enum to represent all possible dispatch modes,
 and stores it in brw_vue_prog_data::dispatch_mode, unifying the two.
 
 Based on a suggestion by Matt Turner.
 
 Signed-off-by: Kenneth Graunke kenn...@whitecape.org
 ---
  src/mesa/drivers/dri/i965/brw_context.h   | 16 +++-
  src/mesa/drivers/dri/i965/brw_defines.h   |  5 ++---
  src/mesa/drivers/dri/i965/brw_vec4.cpp|  5 -
  src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp |  8 
  src/mesa/drivers/dri/i965/brw_vs_surface_state.c  |  4 ++--
  src/mesa/drivers/dri/i965/gen7_gs_state.c |  2 +-
  src/mesa/drivers/dri/i965/gen8_gs_state.c |  3 ++-
  src/mesa/drivers/dri/i965/gen8_vs_state.c |  3 ++-
  8 files changed, 24 insertions(+), 22 deletions(-)
 
 diff --git a/src/mesa/drivers/dri/i965/brw_context.h 
 b/src/mesa/drivers/dri/i965/brw_context.h
 index abc11f6..01c4283 100644
 --- a/src/mesa/drivers/dri/i965/brw_context.h
 +++ b/src/mesa/drivers/dri/i965/brw_context.h
 @@ -605,6 +605,12 @@ struct brw_ff_gs_prog_data {
 unsigned svbi_postincrement_value;
  };
  
 +enum shader_dispatch_mode {
 +   DISPATCH_MODE_4X1_SINGLE = 0,
 +   DISPATCH_MODE_4X2_DUAL_INSTANCE = 1,
 +   DISPATCH_MODE_4X2_DUAL_OBJECT = 2,
 +   DISPATCH_MODE_SIMD8 = 3,
 +};
  
  /* Note: brw_vue_prog_data_compare() must be updated when adding fields to
   * this struct!
 @@ -622,7 +628,7 @@ struct brw_vue_prog_data {
  */
 GLuint urb_entry_size;
  
 -   bool simd8;
 +   enum shader_dispatch_mode dispatch_mode;
  };
  
  
 @@ -720,14 +726,6 @@ struct brw_gs_prog_data
 int invocations;
  
 /**
 -* Dispatch mode, can be any of:
 -* GEN7_GS_DISPATCH_MODE_DUAL_OBJECT
 -* GEN7_GS_DISPATCH_MODE_DUAL_INSTANCE
 -* GEN7_GS_DISPATCH_MODE_SINGLE
 -*/
 -   int dispatch_mode;
 -
 -   /**
  * Gen6 transform feedback enabled flag.
  */
 bool gen6_xfb_enabled;
 diff --git a/src/mesa/drivers/dri/i965/brw_defines.h 
 b/src/mesa/drivers/dri/i965/brw_defines.h
 index dedc381..f6da305 100644
 --- a/src/mesa/drivers/dri/i965/brw_defines.h
 +++ b/src/mesa/drivers/dri/i965/brw_defines.h
 @@ -1773,9 +1773,8 @@ enum brw_message_target {
  # define GEN7_GS_CONTROL_DATA_FORMAT_GSCTL_SID   1
  # define GEN7_GS_CONTROL_DATA_HEADER_SIZE_SHIFT  20
  # define GEN7_GS_INSTANCE_CONTROL_SHIFT  15
 -# define GEN7_GS_DISPATCH_MODE_SINGLE(0  11)
 -# define GEN7_GS_DISPATCH_MODE_DUAL_INSTANCE (1  11)
 -# define GEN7_GS_DISPATCH_MODE_DUAL_OBJECT   (2  11)
 +# define GEN7_GS_DISPATCH_MODE_SHIFT11
 +# define GEN7_GS_DISPATCH_MODE_MASK INTEL_MASK(12, 11)
  # define GEN6_GS_STATISTICS_ENABLE   (1  10)
  # define GEN6_GS_SO_STATISTICS_ENABLE(1  9)
  # define GEN6_GS_RENDERING_ENABLE(1  8)
 diff --git a/src/mesa/drivers/dri/i965/brw_vec4.cpp 
 b/src/mesa/drivers/dri/i965/brw_vec4.cpp
 index a324798..5a9c3f5 100644
 --- a/src/mesa/drivers/dri/i965/brw_vec4.cpp
 +++ b/src/mesa/drivers/dri/i965/brw_vec4.cpp
 @@ -1894,6 +1894,8 @@ brw_vs_emit(struct brw_context *brw,
  brw_create_nir(brw, NULL, c-vp-program.Base, 
 MESA_SHADER_VERTEX);
}
  
 +  prog_data-base.dispatch_mode = DISPATCH_MODE_SIMD8;
 +
fs_visitor v(brw, mem_ctx, MESA_SHADER_VERTEX, c-key,
 prog_data-base.base, prog, c-vp-program.Base, 8);
if (!v.run_vs()) {
 @@ -1926,11 +1928,12 @@ brw_vs_emit(struct brw_context *brw,
g.generate_code(v.cfg, 8);
assembly = g.get_assembly(final_assembly_size);
  
 -  prog_data-base.simd8 = true;
c-base.last_scratch = v.last_scratch;

for whatever it's worth, it would have made more sense to put the dispatch_mode
change here.

 }
  
 if (!assembly) {
 +  prog_data-base.dispatch_mode = DISPATCH_MODE_4X2_DUAL_OBJECT;
 +
vec4_vs_visitor v(brw, c, prog_data, prog, mem_ctx);
if (!v.run()) {
   if (prog) {
 diff --git a/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp 
 b/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp
 index 363e30e..eacb2f5 100644
 --- a/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp
 +++ b/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp
 @@ -106,7 +106,7 @@ vec4_gs_visitor::setup_payload()
  * to be interleaved, so one register contains two attribute slots.
  */
 int attributes_per_reg =
 -  c-prog_data.dispatch_mode == GEN7_GS_DISPATCH_MODE_DUAL_OBJECT ? 1 : 
 2;
 +  c-prog_data.base.dispatch_mode == DISPATCH_MODE_4X2_DUAL_OBJECT ? 1 : 
 2;
  
 /* If a geometry shader tries to read from an input that wasn't written by
  * the vertex shader, that 

Re: [Mesa-dev] [PATCH 6/6] i965/gen8+: Add aux buffer alignment assertions

2015-05-29 Thread Chad Versace
On Thu 28 May 2015, Ben Widawsky wrote:
 This helped find the incorrect HALIGN values from the previous patches.
 
 Signed-off-by: Ben Widawsky b...@bwidawsk.net
 ---
  src/mesa/drivers/dri/i965/gen8_surface_state.c | 4 
  1 file changed, 4 insertions(+)
 
 diff --git a/src/mesa/drivers/dri/i965/gen8_surface_state.c 
 b/src/mesa/drivers/dri/i965/gen8_surface_state.c
 index 672fc70..c8965db 100644
 --- a/src/mesa/drivers/dri/i965/gen8_surface_state.c
 +++ b/src/mesa/drivers/dri/i965/gen8_surface_state.c
 @@ -178,6 +178,8 @@ gen8_emit_texture_surface_state(struct brw_context *brw,
 if (mt-mcs_mt) {
aux_mt = mt-mcs_mt;
aux_mode = GEN8_SURFACE_AUX_MODE_MCS;
 +  assert(brw-gen  9 || mt-align_w == 16);
 +  assert(brw-gen  8 || mt-num_samples  0 || mt-align_w == 16);

When I saw the gen8 assertion, I was unsure if you were asserting that
the miptree satisfied a purely software expectation or that the miptree
also satifisfied a hardware requirement. I'd like to see a PRM quote
here to make it clear:

   The Broadwell PRM, RENDER_SURFACE_STATE.SurfaceHorizontalAlignment,
   says When MCS is enabled for non-MSRT, HALIGN_16 must be used.

 uint32_t *surf = allocate_surface_state(brw, surf_offset, surf_index);
 @@ -391,6 +393,8 @@ gen8_update_renderbuffer_surface(struct brw_context *brw,
 if (mt-mcs_mt) {
aux_mt = mt-mcs_mt;
aux_mode = GEN8_SURFACE_AUX_MODE_MCS;
 +  assert(brw-gen  9 || mt-align_w == 16);
 +  assert(brw-gen  8 || mt-num_samples  0 || mt-align_w == 16);
 }

Same here.

With some PRM quotes sprinkled in, this patch is
Reviewed-by: Chad Versace chad.vers...@intel.com

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 4/6] i965/gen8: Correct HALIGN for AUX surfaces

2015-05-29 Thread Chad Versace
On Thu 28 May 2015, Ben Widawsky wrote:
 This restriction was attempted in this commit:
 commit 47053464630888f819ef8cc44278f1a1220159b9
 Author: Anuj Phogat anuj.pho...@gmail.com
 Date:   Fri Feb 13 11:21:21 2015 -0800
 
i965/gen8: Use HALIGN_16 if MCS is enabled for non-MSRT
 
 However, the commit itself doesn't achieve the desired goal as determined by 
 the
 asserts which the next patch adds. mcs_mt is never a value because we're in 
 the
 process of allocating the mcs_mt miptree when we get to this function. I 
 didn't
 check, but perhaps this would work with blorp, however, meta clears allocate 
 the
 miptree structure (which AFAICT needs the alignment also) way before it
 allocates using meta clears where the renderbuffer is allocated way before the
 aux buffer.
 
 The restriction is referenced in a few places, but the most concise one [IMO]
 from the spec is for Gen9. Gen8 8 loosens the restriction in that it only
 requires this for non-msrt surface.
 
When Auxiliary Surface Mode is set to AUX_CCS_D or AUX_CCS_E, HALIGN 16 
 must
be used.
 
 With the code before the miptree layout flag rework (patches preceding this),
 accomplishing this workaround is very difficult.
 
 Cc: Anuj Phogat anuj.pho...@gmail.com
 Cc: Neil Roberts n...@linux.intel.com
 Signed-off-by: Ben Widawsky b...@bwidawsk.net
 ---
  src/mesa/drivers/dri/i965/brw_tex_layout.c| 16 ++--
  src/mesa/drivers/dri/i965/intel_mipmap_tree.c | 15 ---
  src/mesa/drivers/dri/i965/intel_mipmap_tree.h |  4 +++-
  3 files changed, 25 insertions(+), 10 deletions(-)
 
 diff --git a/src/mesa/drivers/dri/i965/brw_tex_layout.c 
 b/src/mesa/drivers/dri/i965/brw_tex_layout.c
 index 72b02a2..b51c7c7 100644
 --- a/src/mesa/drivers/dri/i965/brw_tex_layout.c
 +++ b/src/mesa/drivers/dri/i965/brw_tex_layout.c
 @@ -41,8 +41,13 @@
  
  static unsigned int
  intel_horizontal_texture_alignment_unit(struct brw_context *brw,
 -struct intel_mipmap_tree *mt)
 +struct intel_mipmap_tree *mt,
 +uint32_t layout_flags)
  {
 +
 +   if (layout_flags  MIPTREE_LAYOUT_FORCE_HALIGN16)
 +  return 16;
 +

This snippet makes sense. If the force flag is set, then don't bother
inspecting any other state.

 /**
  * From the Alignment Unit Size section of various specs, namely:
  * - Gen3 Spec: Memory Data Formats Volume, Section 1.20.1.4
 @@ -91,9 +96,6 @@ intel_horizontal_texture_alignment_unit(struct brw_context 
 *brw,
 if (brw-gen = 7  mt-format == MESA_FORMAT_Z_UNORM16)
return 8;
  
 -   if (brw-gen == 8  mt-mcs_mt  mt-num_samples = 1)
 -  return 16;
 -
 return 4;
  }
  
 @@ -459,7 +461,8 @@ brw_miptree_layout_texture_3d(struct brw_context *brw,
  }
  
  void
 -brw_miptree_layout(struct brw_context *brw, struct intel_mipmap_tree *mt)
 +brw_miptree_layout(struct brw_context *brw, struct intel_mipmap_tree *mt,
 +   uint32_t layout_flags)
  {
 bool multisampled = mt-num_samples  1;
 bool gen6_hiz_or_stencil = false;
 @@ -492,8 +495,9 @@ brw_miptree_layout(struct brw_context *brw, struct 
 intel_mipmap_tree *mt)
   mt-align_w = 128 / mt-cpp;
   mt-align_h = 32;
}
 +  assert((layout_flags  MIPTREE_LAYOUT_FORCE_HALIGN16) == 0);

This assertion makes sense. 

 } else {
 -  mt-align_w = intel_horizontal_texture_alignment_unit(brw, mt);
 +  mt-align_w = intel_horizontal_texture_alignment_unit(brw, mt, 
 layout_flags);
mt-align_h =
   intel_vertical_texture_alignment_unit(brw, mt-format, 
 multisampled);
 }
 diff --git a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c 
 b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
 index 75ee19a..a1ac0cf 100644
 --- a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
 +++ b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
 @@ -475,7 +475,10 @@ intel_miptree_create_layout(struct brw_context *brw,
 if (layout_flags  MIPTREE_LAYOUT_FORCE_ALL_SLICE_AT_LOD)
mt-array_layout = ALL_SLICES_AT_EACH_LOD;
  
 -   brw_miptree_layout(brw, mt);
 +   if (intel_miptree_is_fast_clear_capable(brw, mt))
 +  layout_flags |= MIPTREE_LAYOUT_FORCE_HALIGN16;
 +
 +   brw_miptree_layout(brw, mt, layout_flags);

This does not make sense to me. I though HALIGN16 didn't exist before
Skylake, but here you're setting it on Ivybridge through Broadwell too.
Am I misunderstanding something?
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 1/3] i965: Drop Vector Mask Enable bit from 3DSTATE_GS on Gen8+.

2015-05-29 Thread Ben Widawsky
On Fri, May 29, 2015 at 12:26:38PM -0700, Kenneth Graunke wrote:
 The documentation makes it pretty clear that we shouldn't use this:
 
Under normal conditions SW shall specify DMask, as the GS stage
 will provide a Dispatch Mask appropriate to SIMD4x2 or SIMD8 thread
 execution (as a function of dispatch mode).  E.g., for SIMD4x2
 execution, the GS stage will generate a Dispatch Mask that is equal
 to what the EU would use as the Vector Mask.  For SIMD8 execution
 there is no known usage model for use of Vector Mask (as there is
 for PS shaders).
 
 I also managed to find descriptions of DMask and VMask, in the State
 Register (sr0.2/3) field descriptions:
 
Dispatch Mask (DMask).  This 32-bit field specifies which channels
 are active at Dispatch time.
 
Vector Mask (VMask).  This 32-bit field contains, for each 4-bit
 group, the OR of the corresponding 4-bit group in the dispatch
 mask.
 
 SIMD4x2 shaders process one or two vec4 values, with each 4-bit group
 corresponding to xyzw channel enables (either all on, or all off).
 Thus, DMask = VMask in SIMD4x2 mode.  But in SIMD8 mode, 4-bit groups
 are meaningless, so it just messes up your values.

Just making sure you're reading this the same way I am, DMask == VMask for
SIMD4x2. VMask shouldn't be used for SIMD8.

While here, you've probably seen this before, but I just noticed for SIMD8 GS on
BDW:
Not valid for objects with more than 6 vertices per object.

Probably we don't care, but SPF might be effected by this patch, broken, or
fixed by this - I can't quite tell which.

This seems fine to me though.
Reviewed-by: Ben Widawsky b...@bwidawsk.net

 
 Signed-off-by: Kenneth Graunke kenn...@whitecape.org
 ---
  src/mesa/drivers/dri/i965/gen8_gs_state.c | 3 +--
  1 file changed, 1 insertion(+), 2 deletions(-)
 
 I'm working on SIMD8 geometry shader support.  These are a few simple patches
 that are pretty straightforward and could be landed on their own.
 
 diff --git a/src/mesa/drivers/dri/i965/gen8_gs_state.c 
 b/src/mesa/drivers/dri/i965/gen8_gs_state.c
 index 6a0e215..0763e91 100644
 --- a/src/mesa/drivers/dri/i965/gen8_gs_state.c
 +++ b/src/mesa/drivers/dri/i965/gen8_gs_state.c
 @@ -48,8 +48,7 @@ gen8_upload_gs_state(struct brw_context *brw)
OUT_BATCH(_3DSTATE_GS  16 | (10 - 2));
OUT_BATCH(stage_state-prog_offset);
OUT_BATCH(0);
 -  OUT_BATCH(GEN6_GS_VECTOR_MASK_ENABLE |
 -brw-geometry_program-VerticesIn |
 +  OUT_BATCH(brw-geometry_program-VerticesIn |
  ((ALIGN(stage_state-sampler_count, 4)/4) 
   GEN6_GS_SAMPLER_COUNT_SHIFT) |
  ((prog_data-base.binding_table.size_bytes / 4) 



-- 
Ben Widawsky, Intel Open Source Technology Center
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] enable pbuffer for drm platform

2015-05-29 Thread Liu, Ying2
When we tried to enable EGL on our project, we found that there was no pbuffer 
support for drm platform. I got some help from Alan and Kristian. Thank you so 
much! I studied platform_drm.c and compared it to other platforms' code. 
Finally I was able to create this patch to enable pbuffer for drm platform. Our 
QA team has run full cycle testing and didn't find any regression.  I wonder if 
anybody could review this patch to see if we could submit it to mesa.

Thanks

Ying

--- mesa-10.5.5.orig/src/egl/drivers/dri2/egl_dri2.c  2015-05-11 
12:10:59.0 -0700
+++ mesa-10.5.5/src/egl/drivers/dri2/egl_dri2.c   2015-05-26 04:06:16.353346946 
-0700
@@ -869,6 +869,10 @@ dri2_create_context(_EGLDriver *drv, _EG
*/
   if (conf-SurfaceType  EGL_WINDOW_BIT)
  dri2_ctx-base.WindowRenderBuffer = EGL_BACK_BUFFER;
+
+  if (conf-SurfaceType  EGL_PBUFFER_BIT)
+  dri2_ctx-base.WindowRenderBuffer = EGL_BACK_BUFFER;
+
}
else
   dri_config = NULL;
diff -rupN mesa-10.5.5.orig/src/egl/drivers/dri2/platform_drm.c 
mesa-10.5.5/src/egl/drivers/dri2/platform_drm.c
--- mesa-10.5.5.orig/src/egl/drivers/dri2/platform_drm.c2015-05-11 
12:10:59.0 -0700
+++ mesa-10.5.5/src/egl/drivers/dri2/platform_drm.c 2015-05-26 
04:04:25.293346505 -0700
@@ -122,6 +122,8 @@ dri2_drm_create_surface(_EGLDriver *drv,
   dri2_surf-base.Height = surf-base.height;
   surf-dri_private = dri2_surf;
   break;
+   case EGL_PBUFFER_BIT:
+  break;
default:
   goto cleanup_surf;
}
@@ -130,7 +132,7 @@ dri2_drm_create_surface(_EGLDriver *drv,
   dri2_surf-dri_drawable =
  (*dri2_dpy-dri2-createNewDrawable) (dri2_dpy-dri_screen,
dri2_conf-dri_double_config,
-   dri2_surf-gbm_surf);
+   dri2_surf);
} else {
   assert(dri2_dpy-swrast != NULL);
@@ -153,6 +155,15 @@ dri2_drm_create_surface(_EGLDriver *drv,
return NULL;
}
+static inline _EGLSurface *
+dri2_drm_create_pbuffer_surface(_EGLDriver *drv, _EGLDisplay *disp,
+ _EGLConfig *conf,
+ const EGLint *attrib_list)
+{
+   return dri2_drm_create_surface(drv, disp, EGL_PBUFFER_BIT, conf,
+  NULL, attrib_list);
+}
+
static _EGLSurface *
dri2_drm_create_window_surface(_EGLDriver *drv, _EGLDisplay *disp,
_EGLConfig *conf, void *native_window,
@@ -575,7 +586,7 @@ static struct dri2_egl_display_vtbl dri2
.authenticate = dri2_drm_authenticate,
.create_window_surface = dri2_drm_create_window_surface,
.create_pixmap_surface = dri2_drm_create_pixmap_surface,
-   .create_pbuffer_surface = dri2_fallback_create_pbuffer_surface,
+   .create_pbuffer_surface = dri2_drm_create_pbuffer_surface,
.destroy_surface = dri2_drm_destroy_surface,
.create_image = dri2_drm_create_image_khr,
.swap_interval = dri2_fallback_swap_interval,
@@ -596,6 +607,7 @@ dri2_initialize_drm(_EGLDriver *drv, _EG
struct gbm_device *gbm;
int fd = -1;
int i;
+   EGLint surface_type;
loader_set_logger(_eglLog);
@@ -691,8 +703,9 @@ dri2_initialize_drm(_EGLDriver *drv, _EG
   attr_list[1] = format;
   attr_list[2] = EGL_NONE;
+  surface_type =  EGL_WINDOW_BIT | EGL_PBUFFER_BIT;
   dri2_add_config(disp, dri2_dpy-driver_configs[i],
-  i + 1, EGL_WINDOW_BIT, attr_list, NULL);
+  i + 1, surface_type, attr_list, NULL);
}
disp-Extensions.KHR_image_pixmap = EGL_TRUE;

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] enable pbuffer for drm platform

2015-05-29 Thread Matt Turner
On Fri, May 29, 2015 at 3:17 PM, Liu, Ying2 ying2@intel.com wrote:
 When we tried to enable EGL on our project, we found that there was no
 pbuffer support for drm platform. I got some help from Alan and Kristian.
 Thank you so much! I studied platform_drm.c and compared it to other
 platforms’ code. Finally I was able to create this patch to enable pbuffer
 for drm platform. Our QA team has run full cycle testing and didn’t find any
 regression.  I wonder if anybody could review this patch to see if we could
 submit it to mesa.



 Thanks



 Ying



Thanks for the patch. I don't know much about EGL or pbuffers, but I
can offer some advice on patch submission.

Firstly, the patch cannot be applied directly, since it's been line
wrapped by your email client. The best way to send patches is with
`git send-email` which will do all the proper formatting to ensure it
can be easily applied.

Also, patches should have an appropriate title and commit summary. The
title is of the form prefix: Imperative sentence, like egl: Add
pbuffer support for drm platform. Perhaps a small description of why
this is useful would be nice in the commit summary as well.

 --- mesa-10.5.5.orig/src/egl/drivers/dri2/egl_dri2.c  2015-05-11
 12:10:59.0 -0700

 +++ mesa-10.5.5/src/egl/drivers/dri2/egl_dri2.c   2015-05-26
 04:06:16.353346946 -0700

 @@ -869,6 +869,10 @@ dri2_create_context(_EGLDriver *drv, _EG

 */

if (conf-SurfaceType  EGL_WINDOW_BIT)

   dri2_ctx-base.WindowRenderBuffer = EGL_BACK_BUFFER;

 +

 +  if (conf-SurfaceType  EGL_PBUFFER_BIT)

 +  dri2_ctx-base.WindowRenderBuffer = EGL_BACK_BUFFER;

The indentation on this line is different from the one in the
EGL_WINDOW_BIT case above. Keep it consistent with the surrounding
code.


 +

 }

 else

dri_config = NULL;

 diff -rupN mesa-10.5.5.orig/src/egl/drivers/dri2/platform_drm.c
 mesa-10.5.5/src/egl/drivers/dri2/platform_drm.c

 --- mesa-10.5.5.orig/src/egl/drivers/dri2/platform_drm.c2015-05-11
 12:10:59.0 -0700

 +++ mesa-10.5.5/src/egl/drivers/dri2/platform_drm.c 2015-05-26
 04:04:25.293346505 -0700

 @@ -122,6 +122,8 @@ dri2_drm_create_surface(_EGLDriver *drv,

dri2_surf-base.Height = surf-base.height;

surf-dri_private = dri2_surf;

break;

 +   case EGL_PBUFFER_BIT:

 +  break;

 default:

goto cleanup_surf;

 }

 @@ -130,7 +132,7 @@ dri2_drm_create_surface(_EGLDriver *drv,

dri2_surf-dri_drawable =

   (*dri2_dpy-dri2-createNewDrawable) (dri2_dpy-dri_screen,


 dri2_conf-dri_double_config,

 -   dri2_surf-gbm_surf);

 +   dri2_surf);

I'm not familiar with the code, but this change seems suspect... not
only because immediately below this is another call to
createNewDrawable that does not contain the same change.


 } else {

assert(dri2_dpy-swrast != NULL);

 @@ -153,6 +155,15 @@ dri2_drm_create_surface(_EGLDriver *drv,

 return NULL;

 }

 +static inline _EGLSurface *

Don't use inline unless you know better than the compiler. In this
case it's particularly unhelpful, since the function is used in a
vtable... so it cannot be inline.


 +dri2_drm_create_pbuffer_surface(_EGLDriver *drv, _EGLDisplay *disp,

 + _EGLConfig *conf,

 + const EGLint *attrib_list)

The style for line-wrapped argument lists is to align them with the opening (

See dri2_drm_create_window_surface for example.


 +{

 +   return dri2_drm_create_surface(drv, disp, EGL_PBUFFER_BIT, conf,

 +  NULL, attrib_list);

The same comment applies here as well.


 +}

 +

 static _EGLSurface *

 dri2_drm_create_window_surface(_EGLDriver *drv, _EGLDisplay *disp,

 _EGLConfig *conf, void *native_window,

 @@ -575,7 +586,7 @@ static struct dri2_egl_display_vtbl dri2

 .authenticate = dri2_drm_authenticate,

 .create_window_surface = dri2_drm_create_window_surface,

 .create_pixmap_surface = dri2_drm_create_pixmap_surface,

 -   .create_pbuffer_surface = dri2_fallback_create_pbuffer_surface,

 +   .create_pbuffer_surface = dri2_drm_create_pbuffer_surface,

 .destroy_surface = dri2_drm_destroy_surface,

 .create_image = dri2_drm_create_image_khr,

 .swap_interval = dri2_fallback_swap_interval,

 @@ -596,6 +607,7 @@ dri2_initialize_drm(_EGLDriver *drv, _EG

 struct gbm_device *gbm;

 int fd = -1;

 int i;

 +   EGLint surface_type;

 loader_set_logger(_eglLog);

 @@ -691,8 +703,9 @@ dri2_initialize_drm(_EGLDriver *drv, _EG

attr_list[1] = format;

attr_list[2] = EGL_NONE;

 +  surface_type =  EGL_WINDOW_BIT | EGL_PBUFFER_BIT;

dri2_add_config(disp, dri2_dpy-driver_configs[i],

 -  i + 1, EGL_WINDOW_BIT, attr_list, NULL);

 + 

Re: [Mesa-dev] [PATCH 1/6] i965: Consolidate certain miptree params to flags

2015-05-29 Thread Chad Versace
On Fri 29 May 2015, Kenneth Graunke wrote:
 On Friday, May 29, 2015 12:33:10 PM Chad Versace wrote:
  On Fri 29 May 2015, Matt Turner wrote:
   On Thu, May 28, 2015 at 10:21 AM, Ben Widawsky
@@ -286,7 +284,7 @@ intel_miptree_create_layout(struct brw_context *brw,
mt-logical_height0 = height0;
mt-logical_depth0 = depth0;
mt-fast_clear_state = INTEL_FAST_CLEAR_STATE_NO_MCS;
-   mt-disable_aux_buffers = disable_aux_buffers;
+   mt-disable_aux_buffers = !!(layout_flags  
MIPTREE_LAYOUT_DISABLE_AUX);
   
   FWIW, I much prefer (x  y) != 0 to !!(x  y).
  
  Matt, in the C code you've encountered in the wild, do you feel that
  `(x  y) != 0` is more prevalent than `!!(x  y)`? I'm curious, because
  we should probably choose the idiom which is more recognizable.
  
  For the record, I slightly prefer !! because I've encountered it often
  in idiomatic Python, but it really doesn't matter to me. I suspect that
  != 0 may be the more common idiom in C.
 
 I prefer != 0 as well.

I'm convinced that != 0 is the right choice here, despite my personal
preference.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 3/3] i965: Use proper pitch for scalar GS pull constants and UBOs.

2015-05-29 Thread Ben Widawsky
On Fri, May 29, 2015 at 12:26:40PM -0700, Kenneth Graunke wrote:
 See the corresponding code in brw_vs_surface_state.c.
 
 Signed-off-by: Kenneth Graunke kenn...@whitecape.org
 ---
  src/mesa/drivers/dri/i965/brw_gs_surface_state.c | 10 +++---
  1 file changed, 7 insertions(+), 3 deletions(-)
 
 diff --git a/src/mesa/drivers/dri/i965/brw_gs_surface_state.c 
 b/src/mesa/drivers/dri/i965/brw_gs_surface_state.c
 index a323e4d..bfc4516 100644
 --- a/src/mesa/drivers/dri/i965/brw_gs_surface_state.c
 +++ b/src/mesa/drivers/dri/i965/brw_gs_surface_state.c
 @@ -47,11 +47,12 @@ brw_upload_gs_pull_constants(struct brw_context *brw)
return;
  
 /* BRW_NEW_GS_PROG_DATA */
 -   const struct brw_stage_prog_data *prog_data = 
 brw-gs.prog_data-base.base;
 +   struct brw_vue_prog_data *prog_data = brw-gs.prog_data-base;
 +   bool dword_pitch = prog_data-dispatch_mode == DISPATCH_MODE_SIMD8;
  
 /* _NEW_PROGRAM_CONSTANTS */
 brw_upload_pull_constants(brw, BRW_NEW_GS_CONSTBUF, gp-program.Base,
 - stage_state, prog_data, false);
 + stage_state, prog_data-base, dword_pitch);
  }
  
  const struct brw_tracked_state brw_gs_pull_constants = {
 @@ -77,8 +78,11 @@ brw_upload_gs_ubo_surfaces(struct brw_context *brw)
return;
  
 /* BRW_NEW_GS_PROG_DATA */
 +   struct brw_vue_prog_data *prog_data = brw-gs.prog_data-base;
 +   bool dword_pitch = prog_data-dispatch_mode == DISPATCH_MODE_SIMD8;
 +
 brw_upload_ubo_surfaces(brw, prog-_LinkedShaders[MESA_SHADER_GEOMETRY],
 -brw-gs.base, brw-gs.prog_data-base.base, false);
 +brw-gs.base, prog_data-base, dword_pitch);
  }
  
  const struct brw_tracked_state brw_gs_ubo_surfaces = {

See comment in the previous patch... but as you like:
Reviewed-by: Ben Widawsky b...@bwidawsk.net

-- 
Ben Widawsky, Intel Open Source Technology Center
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 4/6] i965/gen8: Correct HALIGN for AUX surfaces

2015-05-29 Thread Chad Versace
On Fri 29 May 2015, Chad Versace wrote:
 On Thu 28 May 2015, Ben Widawsky wrote:
  This restriction was attempted in this commit:
  commit 47053464630888f819ef8cc44278f1a1220159b9
  Author: Anuj Phogat anuj.pho...@gmail.com
  Date:   Fri Feb 13 11:21:21 2015 -0800
  
 i965/gen8: Use HALIGN_16 if MCS is enabled for non-MSRT
  
  However, the commit itself doesn't achieve the desired goal as determined 
  by the
  asserts which the next patch adds. mcs_mt is never a value because we're in 
  the
  process of allocating the mcs_mt miptree when we get to this function. I 
  didn't
  check, but perhaps this would work with blorp, however, meta clears 
  allocate the
  miptree structure (which AFAICT needs the alignment also) way before it
  allocates using meta clears where the renderbuffer is allocated way before 
  the
  aux buffer.
  
  The restriction is referenced in a few places, but the most concise one 
  [IMO]
  from the spec is for Gen9. Gen8 8 loosens the restriction in that it only
  requires this for non-msrt surface.
  
 When Auxiliary Surface Mode is set to AUX_CCS_D or AUX_CCS_E, HALIGN 16 
  must
 be used.
  
  With the code before the miptree layout flag rework (patches preceding 
  this),
  accomplishing this workaround is very difficult.
  
  Cc: Anuj Phogat anuj.pho...@gmail.com
  Cc: Neil Roberts n...@linux.intel.com
  Signed-off-by: Ben Widawsky b...@bwidawsk.net
  ---
   src/mesa/drivers/dri/i965/brw_tex_layout.c| 16 ++--
   src/mesa/drivers/dri/i965/intel_mipmap_tree.c | 15 ---
   src/mesa/drivers/dri/i965/intel_mipmap_tree.h |  4 +++-
   3 files changed, 25 insertions(+), 10 deletions(-)
  
  diff --git a/src/mesa/drivers/dri/i965/brw_tex_layout.c 
  b/src/mesa/drivers/dri/i965/brw_tex_layout.c
  index 72b02a2..b51c7c7 100644
  --- a/src/mesa/drivers/dri/i965/brw_tex_layout.c
  +++ b/src/mesa/drivers/dri/i965/brw_tex_layout.c
  @@ -41,8 +41,13 @@
   
   static unsigned int
   intel_horizontal_texture_alignment_unit(struct brw_context *brw,
  -struct intel_mipmap_tree *mt)
  +struct intel_mipmap_tree *mt,
  +uint32_t layout_flags)
   {
  +
  +   if (layout_flags  MIPTREE_LAYOUT_FORCE_HALIGN16)
  +  return 16;
  +
 
 This snippet makes sense. If the force flag is set, then don't bother
 inspecting any other state.
 
  /**
   * From the Alignment Unit Size section of various specs, namely:
   * - Gen3 Spec: Memory Data Formats Volume, Section 1.20.1.4
  @@ -91,9 +96,6 @@ intel_horizontal_texture_alignment_unit(struct 
  brw_context *brw,
  if (brw-gen = 7  mt-format == MESA_FORMAT_Z_UNORM16)
 return 8;
   
  -   if (brw-gen == 8  mt-mcs_mt  mt-num_samples = 1)
  -  return 16;
  -
  return 4;
   }
   
  @@ -459,7 +461,8 @@ brw_miptree_layout_texture_3d(struct brw_context *brw,
   }
   
   void
  -brw_miptree_layout(struct brw_context *brw, struct intel_mipmap_tree *mt)
  +brw_miptree_layout(struct brw_context *brw, struct intel_mipmap_tree *mt,
  +   uint32_t layout_flags)
   {
  bool multisampled = mt-num_samples  1;
  bool gen6_hiz_or_stencil = false;
  @@ -492,8 +495,9 @@ brw_miptree_layout(struct brw_context *brw, struct 
  intel_mipmap_tree *mt)
mt-align_w = 128 / mt-cpp;
mt-align_h = 32;
 }
  +  assert((layout_flags  MIPTREE_LAYOUT_FORCE_HALIGN16) == 0);
 
 This assertion makes sense. 
 
  } else {
  -  mt-align_w = intel_horizontal_texture_alignment_unit(brw, mt);
  +  mt-align_w = intel_horizontal_texture_alignment_unit(brw, mt, 
  layout_flags);
 mt-align_h =
intel_vertical_texture_alignment_unit(brw, mt-format, 
  multisampled);
  }
  diff --git a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c 
  b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
  index 75ee19a..a1ac0cf 100644
  --- a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
  +++ b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
  @@ -475,7 +475,10 @@ intel_miptree_create_layout(struct brw_context *brw,
  if (layout_flags  MIPTREE_LAYOUT_FORCE_ALL_SLICE_AT_LOD)
 mt-array_layout = ALL_SLICES_AT_EACH_LOD;
   
  -   brw_miptree_layout(brw, mt);
  +   if (intel_miptree_is_fast_clear_capable(brw, mt))
  +  layout_flags |= MIPTREE_LAYOUT_FORCE_HALIGN16;
  +
  +   brw_miptree_layout(brw, mt, layout_flags);
 
 This does not make sense to me. I though HALIGN16 didn't exist before
 Skylake, but here you're setting it on Ivybridge through Broadwell too.
 Am I misunderstanding something?

I did some homewwork. HALIGN16 was introduced in Broadwell and does not
exist in Haswell. So this code still looks wrong to me.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 5/6] i965/gen9: Set HALIGN_16 for all aux buffers

2015-05-29 Thread Chad Versace
On Thu 28 May 2015, Ben Widawsky wrote:
 Just like the previous patch, but for the GEN9 constraints.
 
 Signed-off-by: Ben Widawsky b...@bwidawsk.net
 ---
  src/mesa/drivers/dri/i965/intel_mipmap_tree.c | 5 +
  1 file changed, 5 insertions(+)
 
 diff --git a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c 
 b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
 index a1ac0cf..89030aa 100644
 --- a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
 +++ b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
 @@ -655,6 +655,11 @@ intel_miptree_create(struct brw_context *brw,
  
 assert((layout_flags  MIPTREE_LAYOUT_DISABLE_AUX) == 0);
 assert((layout_flags  MIPTREE_LAYOUT_FOR_BO) == 0);
 +
 +   if (brw-gen = 9  num_samples  1)
 +  layout_flags |= MIPTREE_LAYOUT_FORCE_HALIGN16;
 +

The commit subject says this change applies to all aux buffers, and
does not mention non-aux buffers. But the patch applies HALIGN16 to
a large set of non-aux buffers: all multisample buffers. Should the
patch's code be fixed to agree with the commit subject, or the converse?
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 1/6] i965: Consolidate certain miptree params to flags

2015-05-29 Thread Kenneth Graunke
On Friday, May 29, 2015 12:33:10 PM Chad Versace wrote:
 On Fri 29 May 2015, Matt Turner wrote:
  On Thu, May 28, 2015 at 10:21 AM, Ben Widawsky
   @@ -286,7 +284,7 @@ intel_miptree_create_layout(struct brw_context *brw,
   mt-logical_height0 = height0;
   mt-logical_depth0 = depth0;
   mt-fast_clear_state = INTEL_FAST_CLEAR_STATE_NO_MCS;
   -   mt-disable_aux_buffers = disable_aux_buffers;
   +   mt-disable_aux_buffers = !!(layout_flags  
   MIPTREE_LAYOUT_DISABLE_AUX);
  
  FWIW, I much prefer (x  y) != 0 to !!(x  y).
 
 Matt, in the C code you've encountered in the wild, do you feel that
 `(x  y) != 0` is more prevalent than `!!(x  y)`? I'm curious, because
 we should probably choose the idiom which is more recognizable.
 
 For the record, I slightly prefer !! because I've encountered it often
 in idiomatic Python, but it really doesn't matter to me. I suspect that
 != 0 may be the more common idiom in C.

I prefer != 0 as well.


signature.asc
Description: This is a digitally signed message part.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 1/6] i965: Consolidate certain miptree params to flags

2015-05-29 Thread Chad Versace
On Fri 29 May 2015, Pohjolainen, Topi wrote:
 On Fri, May 29, 2015 at 09:32:53AM +0300, Pohjolainen, Topi wrote:
  On Thu, May 28, 2015 at 10:21:29AM -0700, Ben Widawsky wrote:
   I think pretty much everyone agrees that having more than a single bool 
   as a
   function argument is bordering on a bad idea. What sucks about the current
   code is in several instances it's necessary to propagate these boolean
   selections down to lower layers of the code. This requires plumbing 
   (mechanical,
   but still churn) pretty much all of the miptree functions each time.  By
   introducing the flags paramater, it is possible to add miptree 
   constraints very
   easily.
  
  I like this a lot. I have a few simple comments below.

Me too. This is a good cleanup.


   diff --git a/src/mesa/drivers/dri/i965/intel_fbo.c 
   b/src/mesa/drivers/dri/i965/intel_fbo.c
   index aebed72..1b3a72f 100644
   --- a/src/mesa/drivers/dri/i965/intel_fbo.c
   +++ b/src/mesa/drivers/dri/i965/intel_fbo.c
   @@ -390,7 +390,7 @@ intel_image_target_renderbuffer_storage(struct 
   gl_context *ctx,
 image-height,
 1,
 image-pitch,
   - true /*disable_aux_buffers*/);
   + MIPTREE_LAYOUT_DISABLE_AUX);
   if (!irb-mt)
  return;

   @@ -1027,10 +1027,9 @@ intel_renderbuffer_move_to_temp(struct brw_context 
   *brw,
 intel_image-base.Base.Level,
 intel_image-base.Base.Level,
 width, height, depth,
   - true,
 irb-mt-num_samples,
 INTEL_MIPTREE_TILING_ANY,
   - false);
   + MIPTREE_LAYOUT_ACCELERATED_UPLOAD);

   if (intel_miptree_wants_hiz_buffer(brw, new_mt)) {
  intel_miptree_alloc_hiz(brw, new_mt);
   diff --git a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c 
   b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
   index 24a5c3d..b243f8a 100644
   --- a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
   +++ b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
   @@ -244,10 +244,8 @@ intel_miptree_create_layout(struct brw_context *brw,
GLuint width0,
GLuint height0,
GLuint depth0,
   -bool for_bo,
GLuint num_samples,
   -bool force_all_slices_at_each_lod,
   -bool disable_aux_buffers)
   +uint32_t layout_flags)
{
   struct intel_mipmap_tree *mt = calloc(sizeof(*mt), 1);
   if (!mt)
   @@ -286,7 +284,7 @@ intel_miptree_create_layout(struct brw_context *brw,
   mt-logical_height0 = height0;
   mt-logical_depth0 = depth0;
   mt-fast_clear_state = INTEL_FAST_CLEAR_STATE_NO_MCS;
   -   mt-disable_aux_buffers = disable_aux_buffers;
   +   mt-disable_aux_buffers = !!(layout_flags  
   MIPTREE_LAYOUT_DISABLE_AUX);
  
  You didn't mean to have double negation (!!), did you?
 
 I actually meant that isn't layout_flags  MIPTREE_LAYOUT_DISABLE_AUX enough
 on its own?

I think `layout_flags  MIPTREE_LAYOUT_DISABLE_AUX` is sufficient solely
because mt-disable_aux_buffers has type bool. I prefer to keep Ben's
original `!!`, though`, because it feels more future-proof against
future code changes.

   @@ -422,12 +420,14 @@ intel_miptree_create_layout(struct brw_context *brw,
   mt-physical_height0 = height0;
   mt-physical_depth0 = depth0;

   -   if (!for_bo 
   +   if (!(layout_flags  MIPTREE_LAYOUT_FOR_BO) 
   _mesa_get_format_base_format(format) == GL_DEPTH_STENCIL 
   (brw-must_use_separate_stencil ||
 (brw-has_separate_stencil 
 intel_miptree_wants_hiz_buffer(brw, mt {
   -  const bool force_all_slices_at_each_lod = brw-gen == 6;
   +  uint32_t stencil_flags = MIPTREE_LAYOUT_ACCELERATED_UPLOAD;
   +  if (brw-gen == 6)
   + stencil_flags |= MIPTREE_LAYOUT_FORCE_ALL_SLICE_AT_LOD;
  
  Perhaps a separating line here, your choice.

+1

   @@ -527,6 +527,10 @@ bool
intel_miptree_alloc_non_msrt_mcs(struct brw_context *brw,
 struct intel_mipmap_tree *mt);

   +#define MIPTREE_LAYOUT_ACCELERATED_UPLOAD   (10)
  
  I was wondering if we could call the the flags something else than layout.
  The manner of upload doesn't technically have much to do with layout. Maybe
  just flags, shrug.

I have nitpick too :)

I'd like to see these defined in an anonymous enum block so the token
names show up in gdb.

enum {
MIPTREE_LAYOUT_FORCE_ALL_SLICE_AT_LOD = 1  1,
...,
};

I agree with 

Re: [Mesa-dev] [PATCH 1/2] i965: Don't use a temporary when generating an indirect sample

2015-05-29 Thread Chris Forbes
This thing has been trouble since I wrote it. Nice to see it go.

Both patches are:

Reviewed-by: Chris Forbes chr...@ijw.co.nz
On May 30, 2015 6:28 AM, Matt Turner matts...@gmail.com wrote:

 On Fri, May 29, 2015 at 6:53 AM, Neil Roberts n...@linux.intel.com
 wrote:
  Previously when generating the send instruction for a sample
  instruction with an indirect sampler it would use the destination
  register as a temporary store. This breaks when used in combination
  with the opt_sampler_eot optimisation because that forces the
  destination to be null. This patch fixes that by avoiding the temp
  register altogether.
 
  The reason the temporary register was needed was because it was trying
  to ensure the binding table index doesn't overflow a byte by and'ing
  it with 0xff. The result is then or'd with samper_index8. This patch
  instead just and's the whole thing by 0xfff. This will ensure that a
  bogus sampler index won't overflow into the rest of the message
  descriptor but unlike the previous code it won't ensure that the
  binding table index doesn't overflow into the sampler index. It
  doesn't seem like that should matter very much though because if the
  shader is generating a bogus sampler index then it's going to just get
  garbage out either way.
 
  Instead of doing sampler_index8|(sampler_index+base_table_index) the
  new code avoids one operation by doing
  sampler_index*0x101+base_table_index which should be equivalent.
  However if we wanted to avoid the multiply for some reason we could do
  this by adding an extra or instruction still without needing the
  temporary register.
 
  This fixes a number of Piglit tests on Skylake that were using
  indirect samplers such as:
 
   spec@arb_gpu_shader5@execution@sampler_array_indexing@fs-simple
  ---
   src/mesa/drivers/dri/i965/brw_fs_generator.cpp   | 17 -
   src/mesa/drivers/dri/i965/brw_vec4_generator.cpp | 16 
   2 files changed, 8 insertions(+), 25 deletions(-)
 
  diff --git a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
 b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
  index 0be0f86..ea46b1a 100644
  --- a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
  +++ b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
  @@ -779,27 +779,18 @@ fs_generator::generate_tex(fs_inst *inst, struct
 brw_reg dst, struct brw_reg src
 brw_mark_surface_used(prog_data, sampler +
 base_binding_table_index);
  } else {
 /* Non-const sampler index */
  -  /* Note: this clobbers `dst` as a temporary before emitting the
 send */
 
 struct brw_reg addr = vec1(retype(brw_address_reg(0),
 BRW_REGISTER_TYPE_UD));
  -  struct brw_reg temp = vec1(retype(dst, BRW_REGISTER_TYPE_UD));
  -
 struct brw_reg sampler_reg = vec1(retype(sampler_index,
 BRW_REGISTER_TYPE_UD));
 
 brw_push_insn_state(p);
 brw_set_default_mask_control(p, BRW_MASK_DISABLE);
 brw_set_default_access_mode(p, BRW_ALIGN_1);
 
  -  /* Some care required: `sampler` and `temp` may alias:
  -   *addr = sampler  0xff
  -   *temp = (sampler  8)  0xf00
  -   *addr = addr | temp
  -   */
  -  brw_ADD(p, addr, sampler_reg,
 brw_imm_ud(base_binding_table_index));
  -  brw_SHL(p, temp, sampler_reg, brw_imm_ud(8u));
  -  brw_AND(p, temp, temp, brw_imm_ud(0x0f00));
  -  brw_AND(p, addr, addr, brw_imm_ud(0x0ff));
  -  brw_OR(p, addr, addr, temp);
  +  /* addr = ((sampler * 0x101) + base_binding_table_index)  0xfff
 */
  +  brw_MUL(p, addr, sampler_reg, brw_imm_ud(0x101));
  +  brw_ADD(p, addr, addr, brw_imm_ud(base_binding_table_index));
  +  brw_AND(p, addr, addr, brw_imm_ud(0xfff));
 
 brw_pop_insn_state(p);
 
  diff --git a/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp
 b/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp
  index 20d096c..1d3f5ed 100644
  --- a/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp
  +++ b/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp
  @@ -398,10 +398,8 @@ vec4_generator::generate_tex(vec4_instruction *inst,
 brw_mark_surface_used(prog_data-base, sampler +
 base_binding_table_index);
  } else {
 /* Non-constant sampler index. */
  -  /* Note: this clobbers `dst` as a temporary before emitting the
 send */
 
 struct brw_reg addr = vec1(retype(brw_address_reg(0),
 BRW_REGISTER_TYPE_UD));
  -  struct brw_reg temp = vec1(retype(dst, BRW_REGISTER_TYPE_UD));
 

 I'd delete the blank line here as well to match the fs code.

 Really nice solution. I'd been trying to figure out the best way to
 get an additional temporary in here, but this is clearly better.

 Reviewed-by: Matt Turner matts...@gmail.com
 ___
 mesa-dev mailing list
 mesa-dev@lists.freedesktop.org
 http://lists.freedesktop.org/mailman/listinfo/mesa-dev

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org

[Mesa-dev] [PATCH 3/3] i965: Use proper pitch for scalar GS pull constants and UBOs.

2015-05-29 Thread Kenneth Graunke
See the corresponding code in brw_vs_surface_state.c.

Signed-off-by: Kenneth Graunke kenn...@whitecape.org
---
 src/mesa/drivers/dri/i965/brw_gs_surface_state.c | 10 +++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_gs_surface_state.c 
b/src/mesa/drivers/dri/i965/brw_gs_surface_state.c
index a323e4d..bfc4516 100644
--- a/src/mesa/drivers/dri/i965/brw_gs_surface_state.c
+++ b/src/mesa/drivers/dri/i965/brw_gs_surface_state.c
@@ -47,11 +47,12 @@ brw_upload_gs_pull_constants(struct brw_context *brw)
   return;
 
/* BRW_NEW_GS_PROG_DATA */
-   const struct brw_stage_prog_data *prog_data = brw-gs.prog_data-base.base;
+   struct brw_vue_prog_data *prog_data = brw-gs.prog_data-base;
+   bool dword_pitch = prog_data-dispatch_mode == DISPATCH_MODE_SIMD8;
 
/* _NEW_PROGRAM_CONSTANTS */
brw_upload_pull_constants(brw, BRW_NEW_GS_CONSTBUF, gp-program.Base,
- stage_state, prog_data, false);
+ stage_state, prog_data-base, dword_pitch);
 }
 
 const struct brw_tracked_state brw_gs_pull_constants = {
@@ -77,8 +78,11 @@ brw_upload_gs_ubo_surfaces(struct brw_context *brw)
   return;
 
/* BRW_NEW_GS_PROG_DATA */
+   struct brw_vue_prog_data *prog_data = brw-gs.prog_data-base;
+   bool dword_pitch = prog_data-dispatch_mode == DISPATCH_MODE_SIMD8;
+
brw_upload_ubo_surfaces(brw, prog-_LinkedShaders[MESA_SHADER_GEOMETRY],
-  brw-gs.base, brw-gs.prog_data-base.base, false);
+  brw-gs.base, prog_data-base, dword_pitch);
 }
 
 const struct brw_tracked_state brw_gs_ubo_surfaces = {
-- 
2.4.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 2/3] i965: Create a shader_dispatch_mode enum to replace VS/GS fields.

2015-05-29 Thread Kenneth Graunke
We used to store the GS dispatch mode in brw_gs_prog_data while
separately storing the VS dispatch mode in brw_vue_prog_data::simd8.

This patch introduces an enum to represent all possible dispatch modes,
and stores it in brw_vue_prog_data::dispatch_mode, unifying the two.

Based on a suggestion by Matt Turner.

Signed-off-by: Kenneth Graunke kenn...@whitecape.org
---
 src/mesa/drivers/dri/i965/brw_context.h   | 16 +++-
 src/mesa/drivers/dri/i965/brw_defines.h   |  5 ++---
 src/mesa/drivers/dri/i965/brw_vec4.cpp|  5 -
 src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp |  8 
 src/mesa/drivers/dri/i965/brw_vs_surface_state.c  |  4 ++--
 src/mesa/drivers/dri/i965/gen7_gs_state.c |  2 +-
 src/mesa/drivers/dri/i965/gen8_gs_state.c |  3 ++-
 src/mesa/drivers/dri/i965/gen8_vs_state.c |  3 ++-
 8 files changed, 24 insertions(+), 22 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_context.h 
b/src/mesa/drivers/dri/i965/brw_context.h
index abc11f6..01c4283 100644
--- a/src/mesa/drivers/dri/i965/brw_context.h
+++ b/src/mesa/drivers/dri/i965/brw_context.h
@@ -605,6 +605,12 @@ struct brw_ff_gs_prog_data {
unsigned svbi_postincrement_value;
 };
 
+enum shader_dispatch_mode {
+   DISPATCH_MODE_4X1_SINGLE = 0,
+   DISPATCH_MODE_4X2_DUAL_INSTANCE = 1,
+   DISPATCH_MODE_4X2_DUAL_OBJECT = 2,
+   DISPATCH_MODE_SIMD8 = 3,
+};
 
 /* Note: brw_vue_prog_data_compare() must be updated when adding fields to
  * this struct!
@@ -622,7 +628,7 @@ struct brw_vue_prog_data {
 */
GLuint urb_entry_size;
 
-   bool simd8;
+   enum shader_dispatch_mode dispatch_mode;
 };
 
 
@@ -720,14 +726,6 @@ struct brw_gs_prog_data
int invocations;
 
/**
-* Dispatch mode, can be any of:
-* GEN7_GS_DISPATCH_MODE_DUAL_OBJECT
-* GEN7_GS_DISPATCH_MODE_DUAL_INSTANCE
-* GEN7_GS_DISPATCH_MODE_SINGLE
-*/
-   int dispatch_mode;
-
-   /**
 * Gen6 transform feedback enabled flag.
 */
bool gen6_xfb_enabled;
diff --git a/src/mesa/drivers/dri/i965/brw_defines.h 
b/src/mesa/drivers/dri/i965/brw_defines.h
index dedc381..f6da305 100644
--- a/src/mesa/drivers/dri/i965/brw_defines.h
+++ b/src/mesa/drivers/dri/i965/brw_defines.h
@@ -1773,9 +1773,8 @@ enum brw_message_target {
 # define GEN7_GS_CONTROL_DATA_FORMAT_GSCTL_SID 1
 # define GEN7_GS_CONTROL_DATA_HEADER_SIZE_SHIFT20
 # define GEN7_GS_INSTANCE_CONTROL_SHIFT15
-# define GEN7_GS_DISPATCH_MODE_SINGLE  (0  11)
-# define GEN7_GS_DISPATCH_MODE_DUAL_INSTANCE   (1  11)
-# define GEN7_GS_DISPATCH_MODE_DUAL_OBJECT (2  11)
+# define GEN7_GS_DISPATCH_MODE_SHIFT11
+# define GEN7_GS_DISPATCH_MODE_MASK INTEL_MASK(12, 11)
 # define GEN6_GS_STATISTICS_ENABLE (1  10)
 # define GEN6_GS_SO_STATISTICS_ENABLE  (1  9)
 # define GEN6_GS_RENDERING_ENABLE  (1  8)
diff --git a/src/mesa/drivers/dri/i965/brw_vec4.cpp 
b/src/mesa/drivers/dri/i965/brw_vec4.cpp
index a324798..5a9c3f5 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4.cpp
@@ -1894,6 +1894,8 @@ brw_vs_emit(struct brw_context *brw,
 brw_create_nir(brw, NULL, c-vp-program.Base, 
MESA_SHADER_VERTEX);
   }
 
+  prog_data-base.dispatch_mode = DISPATCH_MODE_SIMD8;
+
   fs_visitor v(brw, mem_ctx, MESA_SHADER_VERTEX, c-key,
prog_data-base.base, prog, c-vp-program.Base, 8);
   if (!v.run_vs()) {
@@ -1926,11 +1928,12 @@ brw_vs_emit(struct brw_context *brw,
   g.generate_code(v.cfg, 8);
   assembly = g.get_assembly(final_assembly_size);
 
-  prog_data-base.simd8 = true;
   c-base.last_scratch = v.last_scratch;
}
 
if (!assembly) {
+  prog_data-base.dispatch_mode = DISPATCH_MODE_4X2_DUAL_OBJECT;
+
   vec4_vs_visitor v(brw, c, prog_data, prog, mem_ctx);
   if (!v.run()) {
  if (prog) {
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp 
b/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp
index 363e30e..eacb2f5 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp
@@ -106,7 +106,7 @@ vec4_gs_visitor::setup_payload()
 * to be interleaved, so one register contains two attribute slots.
 */
int attributes_per_reg =
-  c-prog_data.dispatch_mode == GEN7_GS_DISPATCH_MODE_DUAL_OBJECT ? 1 : 2;
+  c-prog_data.base.dispatch_mode == DISPATCH_MODE_4X2_DUAL_OBJECT ? 1 : 2;
 
/* If a geometry shader tries to read from an input that wasn't written by
 * the vertex shader, that produces undefined results, but it shouldn't
@@ -655,7 +655,7 @@ brw_gs_emit(struct brw_context *brw,
*/
   if (c-prog_data.invocations = 1 
   likely(!(INTEL_DEBUG  DEBUG_NO_DUAL_OBJECT_GS))) {
- c-prog_data.dispatch_mode = GEN7_GS_DISPATCH_MODE_DUAL_OBJECT;
+ 

[Mesa-dev] [PATCH 1/3] i965: Drop Vector Mask Enable bit from 3DSTATE_GS on Gen8+.

2015-05-29 Thread Kenneth Graunke
The documentation makes it pretty clear that we shouldn't use this:

   Under normal conditions SW shall specify DMask, as the GS stage
will provide a Dispatch Mask appropriate to SIMD4x2 or SIMD8 thread
execution (as a function of dispatch mode).  E.g., for SIMD4x2
execution, the GS stage will generate a Dispatch Mask that is equal
to what the EU would use as the Vector Mask.  For SIMD8 execution
there is no known usage model for use of Vector Mask (as there is
for PS shaders).

I also managed to find descriptions of DMask and VMask, in the State
Register (sr0.2/3) field descriptions:

   Dispatch Mask (DMask).  This 32-bit field specifies which channels
are active at Dispatch time.

   Vector Mask (VMask).  This 32-bit field contains, for each 4-bit
group, the OR of the corresponding 4-bit group in the dispatch
mask.

SIMD4x2 shaders process one or two vec4 values, with each 4-bit group
corresponding to xyzw channel enables (either all on, or all off).
Thus, DMask = VMask in SIMD4x2 mode.  But in SIMD8 mode, 4-bit groups
are meaningless, so it just messes up your values.

Signed-off-by: Kenneth Graunke kenn...@whitecape.org
---
 src/mesa/drivers/dri/i965/gen8_gs_state.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

I'm working on SIMD8 geometry shader support.  These are a few simple patches
that are pretty straightforward and could be landed on their own.

diff --git a/src/mesa/drivers/dri/i965/gen8_gs_state.c 
b/src/mesa/drivers/dri/i965/gen8_gs_state.c
index 6a0e215..0763e91 100644
--- a/src/mesa/drivers/dri/i965/gen8_gs_state.c
+++ b/src/mesa/drivers/dri/i965/gen8_gs_state.c
@@ -48,8 +48,7 @@ gen8_upload_gs_state(struct brw_context *brw)
   OUT_BATCH(_3DSTATE_GS  16 | (10 - 2));
   OUT_BATCH(stage_state-prog_offset);
   OUT_BATCH(0);
-  OUT_BATCH(GEN6_GS_VECTOR_MASK_ENABLE |
-brw-geometry_program-VerticesIn |
+  OUT_BATCH(brw-geometry_program-VerticesIn |
 ((ALIGN(stage_state-sampler_count, 4)/4) 
  GEN6_GS_SAMPLER_COUNT_SHIFT) |
 ((prog_data-base.binding_table.size_bytes / 4) 
-- 
2.4.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 2/6] i965/gen9: Only allow Y-Tiled MCS buffers

2015-05-29 Thread Chad Versace
On Thu 28 May 2015, Ben Widawsky wrote:
 For GEN9, much of the logic to use X-Tiled buffers has been stripped out. It 
 is
 still supported in some places, but it's never desirable. Unfortunately we 
 don't
 yet have the ability to have Y-Tiled scanout (see:
 http://patchwork.freedesktop.org/patch/46984/),
 
 NOTE: This patch shouldn't actually do anything since SKL doesn't yet use fast
 clears, and that's the only case we can get to this function (by way of
 intel_update_winsys_renderbuffer_miptree)
 
 Signed-off-by: Ben Widawsky b...@bwidawsk.net

Reviewed-by: Chad Versace chad.vers...@intel.com
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 1/6] i965: Consolidate certain miptree params to flags

2015-05-29 Thread Matt Turner
On Thu, May 28, 2015 at 10:21 AM, Ben Widawsky
benjamin.widaw...@intel.com wrote:
 I think pretty much everyone agrees that having more than a single bool as a
 function argument is bordering on a bad idea. What sucks about the current
 code is in several instances it's necessary to propagate these boolean
 selections down to lower layers of the code. This requires plumbing 
 (mechanical,
 but still churn) pretty much all of the miptree functions each time.  By
 introducing the flags paramater, it is possible to add miptree constraints 
 very
 easily.

 The use of this, as is already the case, is sometimes we have some information
 at the time we create the miptree that needs to be known all the way at the
 lowest levels of the create/allocation, disable_aux_buffers is currently one
 such example. There will be another example coming up in a few patches.

 Cc: Chad Versace chad.vers...@linux.intel.com
 Signed-off-by: Ben Widawsky b...@bwidawsk.net
 ---
  src/mesa/drivers/dri/i965/intel_fbo.c  |  5 +-
  src/mesa/drivers/dri/i965/intel_mipmap_tree.c  | 86 
 +-
  src/mesa/drivers/dri/i965/intel_mipmap_tree.h  |  9 ++-
  src/mesa/drivers/dri/i965/intel_pixel_draw.c   |  2 +-
  src/mesa/drivers/dri/i965/intel_tex.c  |  8 +--
  src/mesa/drivers/dri/i965/intel_tex.h  |  2 +-
  src/mesa/drivers/dri/i965/intel_tex_image.c| 14 ++---
  src/mesa/drivers/dri/i965/intel_tex_validate.c |  3 +-
  8 files changed, 63 insertions(+), 66 deletions(-)

 diff --git a/src/mesa/drivers/dri/i965/intel_fbo.c 
 b/src/mesa/drivers/dri/i965/intel_fbo.c
 index aebed72..1b3a72f 100644
 --- a/src/mesa/drivers/dri/i965/intel_fbo.c
 +++ b/src/mesa/drivers/dri/i965/intel_fbo.c
 @@ -390,7 +390,7 @@ intel_image_target_renderbuffer_storage(struct gl_context 
 *ctx,
   image-height,
   1,
   image-pitch,
 - true /*disable_aux_buffers*/);
 + MIPTREE_LAYOUT_DISABLE_AUX);
 if (!irb-mt)
return;

 @@ -1027,10 +1027,9 @@ intel_renderbuffer_move_to_temp(struct brw_context 
 *brw,
   intel_image-base.Base.Level,
   intel_image-base.Base.Level,
   width, height, depth,
 - true,
   irb-mt-num_samples,
   INTEL_MIPTREE_TILING_ANY,
 - false);
 + MIPTREE_LAYOUT_ACCELERATED_UPLOAD);

 if (intel_miptree_wants_hiz_buffer(brw, new_mt)) {
intel_miptree_alloc_hiz(brw, new_mt);
 diff --git a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c 
 b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
 index 24a5c3d..b243f8a 100644
 --- a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
 +++ b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
 @@ -244,10 +244,8 @@ intel_miptree_create_layout(struct brw_context *brw,
  GLuint width0,
  GLuint height0,
  GLuint depth0,
 -bool for_bo,
  GLuint num_samples,
 -bool force_all_slices_at_each_lod,
 -bool disable_aux_buffers)
 +uint32_t layout_flags)
  {
 struct intel_mipmap_tree *mt = calloc(sizeof(*mt), 1);
 if (!mt)
 @@ -286,7 +284,7 @@ intel_miptree_create_layout(struct brw_context *brw,
 mt-logical_height0 = height0;
 mt-logical_depth0 = depth0;
 mt-fast_clear_state = INTEL_FAST_CLEAR_STATE_NO_MCS;
 -   mt-disable_aux_buffers = disable_aux_buffers;
 +   mt-disable_aux_buffers = !!(layout_flags  MIPTREE_LAYOUT_DISABLE_AUX);

FWIW, I much prefer (x  y) != 0 to !!(x  y).
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 15/15] egl: expose EGL 1.5 if all requirements are met

2015-05-29 Thread Marek Olšák
Yes, everything is reviewed.

Marek

On Fri, May 29, 2015 at 8:40 PM, Chad Versace chad.vers...@intel.com wrote:
 The last two patches, 14-15, are
 Reviewed-by: Chad Versace chad.vers...@intel.com

 I think I've reviewed all the patches in this series. Please let me know
 if I missed any.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 3/6] i965: Extract tiling from fast clear decision

2015-05-29 Thread Chad Versace
On Thu 28 May 2015, Ben Widawsky wrote:
 There are several constraints when determining if one can fast clear a 
 surface.
 Some of these are alignment, pixel density, tiling formats, and others that 
 vary
 by generation. The helper function which exists today does a suitable job,
 however it conflates BO properties with Miptree properties when using
 tiling. I consider the former to be attributes of the physical surface, things
 which are determined through BO allocation, and the latter being attributes
 which are derived from the API, and having nothing to do with the underlying
 surface.
 
 Tiling properties are a distinct operation from the creation of a miptree, and
 by removing this, we gain flexibility throughout the code to make 
 determinations
 about when we can or cannot fast clear strictly on the miptree.

I don't understand the above sentence. Tiling properties cannot be
a distinct operation, because tiling properties is not an operation at
all. I'm not being a grammar jerk. I'm just dense and don't understand
this paragraph's claims.

 To signify this change, I've also renamed the function to indicate it is a
 distinction made on the miptree. I am torn as to whether or not it was a good
 idea to remove non_msrt since it's a really nice thing for grep.
 
 Cc: Chad Versace chad.vers...@linux.intel.com
 Signed-off-by: Ben Widawsky b...@bwidawsk.net
 ---
  src/mesa/drivers/dri/i965/intel_mipmap_tree.c | 37 
 +++
  src/mesa/drivers/dri/i965/intel_mipmap_tree.h | 11 
  2 files changed, 32 insertions(+), 16 deletions(-)
 
 diff --git a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c 
 b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
 index 68d405c..75ee19a 100644
 --- a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
 +++ b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
 @@ -158,15 +158,33 @@ intel_get_non_msrt_mcs_alignment(struct brw_context 
 *brw,
 }
  }
  
 +/**
 + * Determine the BO backing the miptree has a suitable tile format. For Gen7,
 + * and 8 this means any tiled format.

Hmm... a suitable tile format for what? How about this rephrasing?

Does the hardware support non-MSRT for this tiling format

 + *
 + * From the Ivy Bridge PRM, Vol2 Part1 11.7 MCS Buffer for Render 
 Target(s),
 + * beneath the Fast Color Clear bullet (p326):
 + *
 + * - Support is limited to tiled render targets.
 + */
 +bool
 +intel_is_non_msrt_mcs_tile_supported(struct brw_context *brw,
 + unsigned tiling)

The function name is misleading. It hints that there exists such a thing
as an MCS tile. Perhaps rename it to
intel_tiling_supports_non_msrt_mcs?

 +{
 +   if (brw-gen = 9)
 +  return tiling == I915_TILING_Y;
 +
 +   return tiling != I915_TILING_NONE;

To be complete, the if-ladder should be...

if (brw-gen = 9) {
return tiling == I915_TILIING_Y;
} else if (brw-gen = 7) {
return tiling != I915_TILING_NONE;
} else {
return false;
}

...because gen6 and below do not support MCS (iirc).

 +}
  
  /**
   * For a single-sampled render target (non-MSRT), determine if an MCS 
 buffer
 - * can be used.
 + * can be used. This doesn't (and should not) inquire about the BO 
 properties of
 + * the buffer.

The comment stutters and confuses me, because acronym expansion results
in:

This doesn't (and should not) inquire about the buffer object
properties of the buffer.

Which is equivalent to:

This doesn't (and should not) inquire about the buffer's buffer
object properties.

Instead of referring the buffer's buffer object properties, I think
it's much clearer to refer to the properties of the miptree's BO:

This doesn't (and should not) inspect any properties of the
miptree's BO.


Other than the naming and documentation nitpicks, the code itself looks
good to me. I like the clean separation the patch introduces.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 1/6] i965: Consolidate certain miptree params to flags

2015-05-29 Thread Chad Versace
On Fri 29 May 2015, Matt Turner wrote:
 On Thu, May 28, 2015 at 10:21 AM, Ben Widawsky
  @@ -286,7 +284,7 @@ intel_miptree_create_layout(struct brw_context *brw,
  mt-logical_height0 = height0;
  mt-logical_depth0 = depth0;
  mt-fast_clear_state = INTEL_FAST_CLEAR_STATE_NO_MCS;
  -   mt-disable_aux_buffers = disable_aux_buffers;
  +   mt-disable_aux_buffers = !!(layout_flags  MIPTREE_LAYOUT_DISABLE_AUX);
 
 FWIW, I much prefer (x  y) != 0 to !!(x  y).

Matt, in the C code you've encountered in the wild, do you feel that
`(x  y) != 0` is more prevalent than `!!(x  y)`? I'm curious, because
we should probably choose the idiom which is more recognizable.

For the record, I slightly prefer !! because I've encountered it often
in idiomatic Python, but it really doesn't matter to me. I suspect that
!= 0 may be the more common idiom in C.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 12/15] egl: add eglCreateImage

2015-05-29 Thread Chad Versace
On Thu 28 May 2015, Marek Olšák wrote:
 A new patch is attached. Please review.

LGTM.
Reviewed-by: Chad Versace chad.vers...@intel.com
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] i965: Disable compaction for EOT send messages

2015-05-29 Thread Matt Turner
On Wed, May 27, 2015 at 10:16 PM, Ben Widawsky
benjamin.widaw...@intel.com wrote:
 AFAICT, there is no real way to make sure a send message with EOT is properly
 ignored from compact, nor can I see a way to actually encode EOT while
 compacting. Before the single send optimization we'd always bail because we 
 hit
 the is_immediate  !is_compactable_immediate case. However, with single send,
 is_immediate is not true, and so we end up trying to compact the 
 un-compactible.

I investigated, and yeah, your analysis is exactly right. The EOT bit
(bit 127) is unused when an instruction doesn't have an immediate, so
the compaction code doesn't read it in that case.

So I think this patch (with the checking for SEND/SENDC opcodes, since
they're the only instructions that can have EOT) is exactly what we
want.

Reviewed-by: Matt Turner matts...@gmail.com

Thanks!
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] PIPE_SHADER_* vs TGSI_PROCESSOR_*

2015-05-29 Thread Marek Olšák
Sometimes, having to translate from one to the other seems like
unnecessary annoyance.

Marek

On Fri, May 29, 2015 at 3:59 PM, Brian Paul bri...@vmware.com wrote:
 On 05/28/2015 05:36 PM, Marek Olšák wrote:

 Hi,

 Would people be okay with removing the TGSI_PROCESSOR_ enums and
 replacing all usages with PIPE_SHADER_*?


 It's kind of nice that the TGSI shader stuff is self-contained with no real
 dependency on gallium.  This would change that in a tiny way.

 Is this causing pain?  Would adjusting the TSGI/PIPE enum values so they
 match (VS/FS are mixed up) be of any help?

 -Brian


___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 2/2] i965: Don't add base_binding_table_index if it's zero

2015-05-29 Thread Ben Widawsky
On Fri, May 29, 2015 at 02:53:35PM +0100, Neil Roberts wrote:
 When calculating the binding table index for non-constant sampler
 array indexing it needs to add the base binding table index which is a
 constant within the generated code. Often this base is zero so we can
 avoid a redundant instruction in that case.
 
 It looks like nothing in shader-db is doing non-constant sampler array
 indexing so this patch doesn't make any difference but it might be
 worth having anyway.
 ---
  src/mesa/drivers/dri/i965/brw_fs_generator.cpp   | 3 ++-
  src/mesa/drivers/dri/i965/brw_vec4_generator.cpp | 3 ++-
  2 files changed, 4 insertions(+), 2 deletions(-)
 
 diff --git a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp 
 b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
 index ea46b1a..40a3db3 100644
 --- a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
 +++ b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
 @@ -789,7 +789,8 @@ fs_generator::generate_tex(fs_inst *inst, struct brw_reg 
 dst, struct brw_reg src
  
/* addr = ((sampler * 0x101) + base_binding_table_index)  0xfff */
brw_MUL(p, addr, sampler_reg, brw_imm_ud(0x101));
 -  brw_ADD(p, addr, addr, brw_imm_ud(base_binding_table_index));
 +  if (base_binding_table_index)
 + brw_ADD(p, addr, addr, brw_imm_ud(base_binding_table_index));
brw_AND(p, addr, addr, brw_imm_ud(0xfff));
  
brw_pop_insn_state(p);
 diff --git a/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp 
 b/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp
 index 1d3f5ed..cf1aa83 100644
 --- a/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp
 +++ b/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp
 @@ -409,7 +409,8 @@ vec4_generator::generate_tex(vec4_instruction *inst,
  
/* addr = ((sampler * 0x101) + base_binding_table_index)  0xfff */
brw_MUL(p, addr, sampler_reg, brw_imm_ud(0x101));
 -  brw_ADD(p, addr, addr, brw_imm_ud(base_binding_table_index));
 +  if (base_binding_table_index)
 + brw_ADD(p, addr, addr, brw_imm_ud(base_binding_table_index));
brw_AND(p, addr, addr, brw_imm_ud(0xfff));
  
brw_pop_insn_state(p);

Both are:
Acked-by: Ben Widawsky b...@bwidawsk.net

(More knowledgeable people have left their review already, I just want it merged
ASAP).

-- 
Ben Widawsky, Intel Open Source Technology Center
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 1/2] i965: Don't use a temporary when generating an indirect sample

2015-05-29 Thread Anuj Phogat
On Fri, May 29, 2015 at 6:53 AM, Neil Roberts n...@linux.intel.com wrote:
 Previously when generating the send instruction for a sample
 instruction with an indirect sampler it would use the destination
 register as a temporary store. This breaks when used in combination
 with the opt_sampler_eot optimisation because that forces the
 destination to be null. This patch fixes that by avoiding the temp
 register altogether.

 The reason the temporary register was needed was because it was trying
 to ensure the binding table index doesn't overflow a byte by and'ing
 it with 0xff. The result is then or'd with samper_index8. This patch
 instead just and's the whole thing by 0xfff. This will ensure that a
 bogus sampler index won't overflow into the rest of the message
 descriptor but unlike the previous code it won't ensure that the
 binding table index doesn't overflow into the sampler index. It
 doesn't seem like that should matter very much though because if the
 shader is generating a bogus sampler index then it's going to just get
 garbage out either way.

 Instead of doing sampler_index8|(sampler_index+base_table_index) the
 new code avoids one operation by doing
 sampler_index*0x101+base_table_index which should be equivalent.
 However if we wanted to avoid the multiply for some reason we could do
 this by adding an extra or instruction still without needing the
 temporary register.

 This fixes a number of Piglit tests on Skylake that were using
 indirect samplers such as:

  spec@arb_gpu_shader5@execution@sampler_array_indexing@fs-simple
 ---
  src/mesa/drivers/dri/i965/brw_fs_generator.cpp   | 17 -
  src/mesa/drivers/dri/i965/brw_vec4_generator.cpp | 16 
  2 files changed, 8 insertions(+), 25 deletions(-)

 diff --git a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp 
 b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
 index 0be0f86..ea46b1a 100644
 --- a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
 +++ b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
 @@ -779,27 +779,18 @@ fs_generator::generate_tex(fs_inst *inst, struct 
 brw_reg dst, struct brw_reg src
brw_mark_surface_used(prog_data, sampler + base_binding_table_index);
 } else {
/* Non-const sampler index */
 -  /* Note: this clobbers `dst` as a temporary before emitting the send */

struct brw_reg addr = vec1(retype(brw_address_reg(0), 
 BRW_REGISTER_TYPE_UD));
 -  struct brw_reg temp = vec1(retype(dst, BRW_REGISTER_TYPE_UD));
 -
struct brw_reg sampler_reg = vec1(retype(sampler_index, 
 BRW_REGISTER_TYPE_UD));

brw_push_insn_state(p);
brw_set_default_mask_control(p, BRW_MASK_DISABLE);
brw_set_default_access_mode(p, BRW_ALIGN_1);

 -  /* Some care required: `sampler` and `temp` may alias:
 -   *addr = sampler  0xff
 -   *temp = (sampler  8)  0xf00
 -   *addr = addr | temp
 -   */
 -  brw_ADD(p, addr, sampler_reg, brw_imm_ud(base_binding_table_index));
 -  brw_SHL(p, temp, sampler_reg, brw_imm_ud(8u));
 -  brw_AND(p, temp, temp, brw_imm_ud(0x0f00));
 -  brw_AND(p, addr, addr, brw_imm_ud(0x0ff));
 -  brw_OR(p, addr, addr, temp);
 +  /* addr = ((sampler * 0x101) + base_binding_table_index)  0xfff */
 +  brw_MUL(p, addr, sampler_reg, brw_imm_ud(0x101));
 +  brw_ADD(p, addr, addr, brw_imm_ud(base_binding_table_index));
 +  brw_AND(p, addr, addr, brw_imm_ud(0xfff));

brw_pop_insn_state(p);

 diff --git a/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp 
 b/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp
 index 20d096c..1d3f5ed 100644
 --- a/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp
 +++ b/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp
 @@ -398,10 +398,8 @@ vec4_generator::generate_tex(vec4_instruction *inst,
brw_mark_surface_used(prog_data-base, sampler + 
 base_binding_table_index);
 } else {
/* Non-constant sampler index. */
 -  /* Note: this clobbers `dst` as a temporary before emitting the send */

struct brw_reg addr = vec1(retype(brw_address_reg(0), 
 BRW_REGISTER_TYPE_UD));
 -  struct brw_reg temp = vec1(retype(dst, BRW_REGISTER_TYPE_UD));

struct brw_reg sampler_reg = vec1(retype(sampler_index, 
 BRW_REGISTER_TYPE_UD));

 @@ -409,16 +407,10 @@ vec4_generator::generate_tex(vec4_instruction *inst,
brw_set_default_mask_control(p, BRW_MASK_DISABLE);
brw_set_default_access_mode(p, BRW_ALIGN_1);

 -  /* Some care required: `sampler` and `temp` may alias:
 -   *addr = sampler  0xff
 -   *temp = (sampler  8)  0xf00
 -   *addr = addr | temp
 -   */
 -  brw_ADD(p, addr, sampler_reg, brw_imm_ud(base_binding_table_index));
 -  brw_SHL(p, temp, sampler_reg, brw_imm_ud(8u));
 -  brw_AND(p, temp, temp, brw_imm_ud(0x0f00));
 -  brw_AND(p, addr, addr, brw_imm_ud(0x0ff));
 -  brw_OR(p, addr, addr, temp);
 +  /* addr = ((sampler * 0x101) + base_binding_table_index)