date:20150325

On 2015-03-23 17:37:58, Kenneth Graunke wrote:
 Our fragment program backend implements support for TXP directly, and
 there's no NIR lowering pass to remove the projection.  When we switch
 fragment program support over to NIR, we need to support it somehow.
 
 It's easy enough to support directly.
 
 Signed-off-by: Kenneth Graunke kenn...@whitecape.org
 ---
  src/mesa/drivers/dri/i965/brw_fs_nir.cpp | 19 ++-
  1 file changed, 14 insertions(+), 5 deletions(-)
 
 diff --git a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp 
 b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
 index 094303f..63d5e3b 100644
 --- a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
 +++ b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
 @@ -1749,7 +1749,8 @@ fs_visitor::nir_emit_texture(nir_tex_instr *instr)
  
 int lod_components = 0, offset_components = 0;
  
 -   fs_reg coordinate, shadow_comparitor, lod, lod2, sample_index, mcs, 
 offset;
 +   fs_reg coordinate, shadow_comparitor, lod, lod2, sample_index, mcs;
 +   fs_reg tex_offset, projector;

offset = tex_offset seems like a separate patch.

Either way, Reviewed-by: Jordan Justen jordan.l.jus...@intel.com

 for (unsigned i = 0; i  instr-num_srcs; i++) {
fs_reg src = get_nir_src(instr-src[i].src);
 @@ -1795,14 +1796,15 @@ fs_visitor::nir_emit_texture(nir_tex_instr *instr)
   sample_index = retype(src, BRW_REGISTER_TYPE_UD);
   break;
case nir_tex_src_offset:
 - offset = retype(src, BRW_REGISTER_TYPE_D);
 + tex_offset = retype(src, BRW_REGISTER_TYPE_D);
   if (instr-is_array)
  offset_components = instr-coord_components - 1;
   else
  offset_components = instr-coord_components;
   break;
case nir_tex_src_projector:
 - unreachable(should be lowered);
 + projector = retype(src, BRW_REGISTER_TYPE_F);
 + break;
  
case nir_tex_src_sampler_offset: {
   /* Figure out the highest possible sampler index and mark it as 
 used */
 @@ -1826,6 +1828,13 @@ fs_visitor::nir_emit_texture(nir_tex_instr *instr)
}
 }
  
 +   if (projector.file != BAD_FILE) {
 +  fs_reg invproj = vgrf(glsl_type::float_type);
 +  emit_math(SHADER_OPCODE_RCP, invproj, projector);
 +  for (int i = 0; i  3; i++)
 + emit(MUL(offset(coordinate, i), offset(coordinate, i), invproj));
 +   }
 +
 if (instr-op == nir_texop_txf_ms) {
if (brw-gen = 7 
key_tex-compressed_multisample_layout_mask  (1  sampler)) {
 @@ -1838,7 +1847,7 @@ fs_visitor::nir_emit_texture(nir_tex_instr *instr)
 for (unsigned i = 0; i  3; i++) {
if (instr-const_offset[i] != 0) {
   assert(offset_components == 0);
 - offset = fs_reg(brw_texture_offset(ctx, instr-const_offset, 3));
 + tex_offset = fs_reg(brw_texture_offset(ctx, instr-const_offset, 
 3));
   break;
}
 }
 @@ -1880,7 +1889,7 @@ fs_visitor::nir_emit_texture(nir_tex_instr *instr)
  
 emit_texture(op, dest_type, coordinate, instr-coord_components,
  shadow_comparitor, lod, lod2, lod_components, sample_index,
 -offset, mcs, gather_component,
 +tex_offset, mcs, gather_component,
  is_cube_array, is_rect, sampler, sampler_reg, texunit);
  
 fs_reg dest = get_nir_dest(instr-dest);
 -- 
 2.3.3
 
 ___
 mesa-dev mailing list
 mesa-dev@lists.freedesktop.org
 http://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 1/3] mesa: Improve validation of target, format and type of glTexSubImage[2, 3]D

2015-03-25 Thread Eduardo Lima Mitev

On 03/25/2015 12:02 AM, Laura Ekstrand wrote:
  +   if (_mesa_is_gles3(ctx)) {
  +  /* Validation of format and type for ES3 has to be done here
  +   * after the texture image is resolved, because the internal
  +   * format is needed for the verification
  +   */
  +  err = _mesa_es3_error_check_format_and_type(ctx, format, 
 type,
  +
  texImage-InternalFormat);
  +  if (err != GL_NO_ERROR) {
  + _mesa_error(ctx, err,
  + %s(incompatible format = %s, type = %s, 
  + internalformat = %s),
  + callerName, _mesa_lookup_enum_by_nr(format),
  + _mesa_lookup_enum_by_nr(type),
  +
   _mesa_lookup_enum_by_nr(texImage-InternalFormat));
  + return GL_TRUE;
  +  }
  +   }
  +
 
 
 What does the (es)_error_check_format_and_type have to do with
 validating the target?  I thought legal_texsubimage_target did all of
 the target checking?  So if we move up _mesa_select_tex_image above the
 format and type checks, it shouldn't be a problem because the target is
 already checked.  (That's my understanding of the problem.)

_mesa_es3_error_check_format_and_type() takes the tex object's internal
format as argument, because gles3 impose additional restrictions that
depend on the internal format.

So, to check format and type for gles3 case, we need to have texIamge,
and to obtain it, we first need to validate the target (otherwise we get
the _mesa_problem() call we are trying to avoid here).

That's why gles3 validation of format and type indirectly depends on
having the target validated.

cheers,
Eduardo

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 5/6] i965/fs: Add ARB_fragment_program support to the NIR backend.

On 2015-03-23 17:37:59, Kenneth Graunke wrote:
 Use prog_to_nir where we would normally call glsl_to_nir, handle program
 parameter lists, and skip a few things that don't exist.
 
 Using NIR generates much better shader code than Mesa IR, since we get
 real optimizations, as opposed to prog_optimize:
 
 total instructions in shared programs: 314007 - 279892 (-10.86%)
 instructions in affected programs: 285173 - 251058 (-11.96%)
 helped:2001
 HURT:  67
 GAINED:4
 LOST:  7
 
 Signed-off-by: Kenneth Graunke kenn...@whitecape.org
 ---
  src/mesa/drivers/dri/i965/brw_fs.cpp | 16 +---
  src/mesa/drivers/dri/i965/brw_fs_nir.cpp | 45 
 ++--
  2 files changed, 44 insertions(+), 17 deletions(-)
 
 diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp 
 b/src/mesa/drivers/dri/i965/brw_fs.cpp
 index a57f501..6969286 100644
 --- a/src/mesa/drivers/dri/i965/brw_fs.cpp
 +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
 @@ -3954,15 +3954,13 @@ fs_visitor::run_fs()
/* Generate FS IR for main().  (the visitor only descends into
 * functions called main).
 */
 -  if (shader) {
 - if (env_var_as_boolean(INTEL_USE_NIR, false)) {
 -emit_nir_code();
 - } else {
 -foreach_in_list(ir_instruction, ir, shader-base.ir) {
 -   base_ir = ir;
 -   this-result = reg_undef;
 -   ir-accept(this);
 -}
 +  if (env_var_as_boolean(INTEL_USE_NIR, false)) {
 + emit_nir_code();
 +  } else if (shader) {
 + foreach_in_list(ir_instruction, ir, shader-base.ir) {
 +base_ir = ir;
 +this-result = reg_undef;
 +ir-accept(this);
   }
} else {
   emit_fragment_program_code();
 diff --git a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp 
 b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
 index 63d5e3b..4c0fd97 100644
 --- a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
 +++ b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
 @@ -24,6 +24,7 @@
  #include glsl/ir.h
  #include glsl/ir_optimization.h
  #include glsl/nir/glsl_to_nir.h
 +#include program/prog_to_nir.h
  #include brw_fs.h
  #include brw_nir.h
  
 @@ -86,9 +87,15 @@ fs_visitor::emit_nir_code()
 const nir_shader_compiler_options *options =
ctx-Const.ShaderCompilerOptions[stage].NirOptions;
  
 -   /* first, lower the GLSL IR shader to NIR */
 -   lower_output_reads(shader-base.ir);
 -   nir_shader *nir = glsl_to_nir(shader-base, options);
 +   nir_shader *nir;
 +   /* First, lower the GLSL IR or Mesa IR to NIR */
 +   if (shader_prog) {
 +  lower_output_reads(shader-base.ir);
 +  nir = glsl_to_nir(shader-base, options);
 +   } else {
 +  nir = prog_to_nir(prog, options);
 +  nir_convert_to_ssa(nir); /* turn registers into SSA */
 +   }
 nir_validate_shader(nir);
  
 nir_lower_global_vars_to_local(nir);
 @@ -106,9 +113,18 @@ fs_visitor::emit_nir_code()
 /* Get rid of split copies */
 nir_optimize(nir);
  
 -   nir_assign_var_locations_scalar_direct_first(nir, nir-uniforms,
 -num_direct_uniforms,
 -nir-num_uniforms);
 +   if (shader_prog) {
 +  nir_assign_var_locations_scalar_direct_first(nir, nir-uniforms,
 +   num_direct_uniforms,
 +   nir-num_uniforms);
 +   } else {
 +  /* ARB programs generally create a giant array of uniform data, and 
 allow
 +   * indirect addressing without any boundaries.  In the absence of 
 bounds
 +   * analysis, it's all or nothing.  num_direct_uniforms is only useful 
 when
 +   * we have some direct and some indirect access; it doesn't matter 
 here.
 +   */
 +  num_direct_uniforms = 0;
 +   }
 nir_assign_var_locations_scalar(nir-inputs, nir-num_inputs);
 nir_assign_var_locations_scalar(nir-outputs, nir-num_outputs);
  
 @@ -118,8 +134,10 @@ fs_visitor::emit_nir_code()
 nir_remove_dead_variables(nir);
 nir_validate_shader(nir);
  
 -   nir_lower_samplers(nir, shader_prog, shader-base.Program);
 -   nir_validate_shader(nir);
 +   if (shader_prog) {
 +  nir_lower_samplers(nir, shader_prog, shader-base.Program);
 +  nir_validate_shader(nir);
 +   }
  
 nir_lower_system_values(nir);
 nir_validate_shader(nir);
 @@ -320,6 +338,17 @@ fs_visitor::nir_setup_uniforms(nir_shader *shader)
 if (dispatch_width != 8)
return;
  
 +   if (!shader_prog) {
 +  /* prog_to_nir doesn't create uniform variables; set it up directly. */
 +  for (unsigned p = 0; p  prog-Parameters-NumParameters; p++) {
 + for (unsigned int i = 0; i  4; i++) {
 +stage_prog_data-param[4 * p + i] =
 +   prog-Parameters-ParameterValues[p][i];

Re: [Mesa-dev] [PATCH 1/6] nir: Add glsl_float_type() wrapper.

On 2015-03-23 17:37:55, Kenneth Graunke wrote:
 Signed-off-by: Kenneth Graunke kenn...@whitecape.org
 ---
  src/glsl/nir/nir_types.cpp | 6 ++
  src/glsl/nir/nir_types.h   | 1 +
  2 files changed, 7 insertions(+)
 
 Patches also available in the `nir-arb` branch of ~kwg/mesa.

Nice results.

With minor comments on 4-6, series
Reviewed-by: Jordan Justen jordan.l.jus...@intel.com

Definitely FWIW on patch 2. :)

 diff --git a/src/glsl/nir/nir_types.cpp b/src/glsl/nir/nir_types.cpp
 index a13c3e1..f0d0b46 100644
 --- a/src/glsl/nir/nir_types.cpp
 +++ b/src/glsl/nir/nir_types.cpp
 @@ -143,6 +143,12 @@ glsl_void_type(void)
  }
  
  const glsl_type *
 +glsl_float_type(void)
 +{
 +   return glsl_type::float_type;
 +}
 +
 +const glsl_type *
  glsl_vec4_type(void)
  {
 return glsl_type::vec4_type;
 diff --git a/src/glsl/nir/nir_types.h b/src/glsl/nir/nir_types.h
 index 494051a..276d4ad 100644
 --- a/src/glsl/nir/nir_types.h
 +++ b/src/glsl/nir/nir_types.h
 @@ -69,6 +69,7 @@ bool glsl_type_is_scalar(const struct glsl_type *type);
  bool glsl_type_is_matrix(const struct glsl_type *type);
  
  const struct glsl_type *glsl_void_type(void);
 +const struct glsl_type *glsl_float_type(void);
  const struct glsl_type *glsl_vec4_type(void);
  const struct glsl_type *glsl_array_type(const struct glsl_type *base,
  unsigned elements);
 -- 
 2.3.3
 
 ___
 mesa-dev mailing list
 mesa-dev@lists.freedesktop.org
 http://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 6/6] i965/nir: Use NIR for ARB_vertex_program support on Gen8+.

On 2015-03-23 17:38:00, Kenneth Graunke wrote:
 Everything is already in place; we simply have to take the scalar code
 generation path.  This gives us SIMD8 VS programs, instead of SIMD4x2.
 
 Signed-off-by: Kenneth Graunke kenn...@whitecape.org
 ---
  src/mesa/drivers/dri/i965/brw_vec4.cpp | 14 ++
  1 file changed, 10 insertions(+), 4 deletions(-)
 
 diff --git a/src/mesa/drivers/dri/i965/brw_vec4.cpp 
 b/src/mesa/drivers/dri/i965/brw_vec4.cpp
 index 918519c..21de1af 100644
 --- a/src/mesa/drivers/dri/i965/brw_vec4.cpp
 +++ b/src/mesa/drivers/dri/i965/brw_vec4.cpp
 @@ -1823,7 +1823,7 @@ brw_vs_emit(struct brw_context *brw,
 if (unlikely(INTEL_DEBUG  DEBUG_VS))
brw_dump_ir(vertex, prog, shader-base, c-vp-program.Base);
  
 -   if (prog  brw-gen = 8  brw-scalar_vs) {
 +   if (brw-gen = 8  brw-scalar_vs  (prog || getenv(INTEL_USE_NIR))) 
 {

We should be able to leave out the brw-gen = 8 check, right?

-Jordan

fs_visitor v(brw, mem_ctx, c-key, prog_data, prog, c-vp-program, 
 8);
if (!v.run_vs()) {
   if (prog) {
 @@ -1841,9 +1841,15 @@ brw_vs_emit(struct brw_context *brw,
   c-vp-program.Base, v.promoted_constants,
   v.runtime_check_aads_emit, VS);
if (INTEL_DEBUG  DEBUG_VS) {
 - char *name = ralloc_asprintf(mem_ctx, %s vertex shader %d,
 -  prog-Label ? prog-Label : unnamed,
 -  prog-Name);
 + char *name;
 + if (prog) {
 +name = ralloc_asprintf(mem_ctx, %s vertex shader %d,
 +   prog-Label ? prog-Label : unnamed,
 +   prog-Name);
 + } else {
 +name = ralloc_asprintf(mem_ctx, vertex program %d,
 +   c-vp-program.Base.Id);
 + }
   g.enable_debug(name);
}
g.generate_code(v.cfg, 8);
 -- 
 2.3.3
 
 ___
 mesa-dev mailing list
 mesa-dev@lists.freedesktop.org
 http://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH v4 16/17] main: Added entry points for NamedRenderbufferStorage/Multisample

2015-03-25 Thread Martin Peres


On 25/03/15 01:27, Laura Ekstrand wrote:

Looks good to me.

Reviewed-by: Laura Ekstrand la...@jlekstrand.net 
mailto:la...@jlekstrand.net


Thanks, I pushed the whole series.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 3/3] glx: Distinguish the macro value for Success and GLXBadContext

2015-03-25 Thread Xiong Zhang

The return value for glx_context-vtable-bind may be Success or
GLXBadContext, but the macro value for both Success and GLXBadContext
are 0, so the caller can't  identify the return value between them.

Signed-off-by: Xiong Zhang xiong.y.zh...@intel.com
---
 src/glx/applegl_glx.c  | 12 +++-
 src/glx/dri2_glx.c | 25 -
 src/glx/dri3_glx.c | 19 ---
 src/glx/dri_glx.c  | 18 +++---
 src/glx/drisw_glx.c| 18 +++---
 src/glx/glxclient.h|  4 ++--
 src/glx/glxcurrent.c   |  5 +++--
 src/glx/indirect_glx.c |  6 +++---
 8 files changed, 65 insertions(+), 42 deletions(-)

diff --git a/src/glx/applegl_glx.c b/src/glx/applegl_glx.c
index c086e51..72e40f2 100644
--- a/src/glx/applegl_glx.c
+++ b/src/glx/applegl_glx.c
@@ -48,9 +48,9 @@ applegl_destroy_context(struct glx_context *gc)
apple_glx_destroy_context(gc-driContext, gc-psc-dpy);
 }
 
-static int
+static bool
 applegl_bind_context(struct glx_context *gc, struct glx_context *old,
-GLXDrawable draw, GLXDrawable read)
+GLXDrawable draw, GLXDrawable read, int *errorPtr)
 {
Display *dpy = gc-psc-dpy;
bool error = apple_glx_make_current_context(dpy,
@@ -58,12 +58,14 @@ applegl_bind_context(struct glx_context *gc, struct 
glx_context *old,
   gc ? gc-driContext : NULL, 
draw);
 
apple_glx_diagnostic(%s: error %s\n, __func__, error ? YES : NO);
-   if (error)
-  return 1; /* GLXBadContext is the same as Success (0) */
+   if (error) {
+  *errorPtr = GLXBadContext;
+  return false;
+   }
 
apple_glapi_set_dispatch();
 
-   return Success;
+   return true;
 }
 
 static void
diff --git a/src/glx/dri2_glx.c b/src/glx/dri2_glx.c
index f980f82..c2d9fc6 100644
--- a/src/glx/dri2_glx.c
+++ b/src/glx/dri2_glx.c
@@ -133,7 +133,7 @@ dri2_destroy_context(struct glx_context *context)
 
 static Bool
 dri2_bind_context(struct glx_context *context, struct glx_context *old,
- GLXDrawable draw, GLXDrawable read)
+ GLXDrawable draw, GLXDrawable read, int *error)
 {
struct dri2_context *pcp = (struct dri2_context *) context;
struct dri2_screen *psc = (struct dri2_screen *) pcp-base.psc;
@@ -144,7 +144,8 @@ dri2_bind_context(struct glx_context *context, struct 
glx_context *old,
 
if ((draw != None  read == None) || (draw == None  read != None)) {
   driReleaseDrawables(pcp-base);
-  return GLXBadDrawable;
+  *error =  GLXBadDrawable;
+  return False;
}
 
pdraw = (struct dri2_drawable *) driFetchDrawable(context, draw);
@@ -154,16 +155,22 @@ dri2_bind_context(struct glx_context *context, struct 
glx_context *old,
 
if (pdraw)
   dri_draw = pdraw-driDrawable;
-   else if (draw != None)
-  return GLXBadDrawable;
+   else if (draw != None) {
+  *error =  GLXBadDrawable;
+  return False;
+   }
 
if (pread)
   dri_read = pread-driDrawable;
-   else if (read != None)
-  return GLXBadDrawable;
+   else if (read != None) {
+  *error =  GLXBadDrawable;
+  return False;
+   }
 
-   if (!(*psc-core-bindContext) (pcp-driContext, dri_draw, dri_read))
-  return GLXBadContext;
+   if (!(*psc-core-bindContext) (pcp-driContext, dri_draw, dri_read)) {
+  *error = GLXBadContext;
+  return False;
+   }
 
/* If the server doesn't send invalidate events, we may miss a
 * resize before the rendering starts.  Invalidate the buffers now
@@ -175,7 +182,7 @@ dri2_bind_context(struct glx_context *context, struct 
glx_context *old,
 dri2InvalidateBuffers(psc-base.dpy, pread-base.xDrawable);
}
 
-   return Success;
+   return True;
 }
 
 static void
diff --git a/src/glx/dri3_glx.c b/src/glx/dri3_glx.c
index 4f94167..e4e93c1 100644
--- a/src/glx/dri3_glx.c
+++ b/src/glx/dri3_glx.c
@@ -128,7 +128,7 @@ dri3_destroy_context(struct glx_context *context)
 
 static Bool
 dri3_bind_context(struct glx_context *context, struct glx_context *old,
-  GLXDrawable draw, GLXDrawable read)
+  GLXDrawable draw, GLXDrawable read, int *error)
 {
struct dri3_context *pcp = (struct dri3_context *) context;
struct dri3_screen *psc = (struct dri3_screen *) pcp-base.psc;
@@ -137,7 +137,8 @@ dri3_bind_context(struct glx_context *context, struct 
glx_context *old,
 
if ((draw != None  read == None) || (draw == None  read != None)) {
   driReleaseDrawables(pcp-base);
-  return GLXBadDrawable;
+  *error = GLXBadDrawable;
+  return False;
}
 
pdraw = (struct dri3_drawable *) driFetchDrawable(context, draw);
@@ -145,8 +146,10 @@ dri3_bind_context(struct glx_context *context, struct 
glx_context *old,
 
driReleaseDrawables(pcp-base);
 
-   if ((pdraw == NULL  draw != None) || (pread == NULL  read != None))
-  return GLXBadDrawable;
+   if ((pdraw == NULL  draw != None) || (pread == NULL  read != None)) {
+  *error = GLXBadDrawable;
+

[Mesa-dev] [PATCH 1/3] dri3_glx.c: Pass NULL DRI drawables into driver for None GLX drawables

2015-03-25 Thread Xiong Zhang

GLX_ARB_create_context spec says:
If either draw or read are not a valid GLX drawable, a GLXBadDrawable
error is generated, unless draw and read are both None and the OpenGL
version supported by ctx is 3.0 or greater.

So when both draw and read are None, it could pass NULL drawable into
driver instead of returing GLXBadDrawable.

v2: Fix space issue (Alex Davy)

https://bugs.freedesktop.org/show_bug.cgi?id=79629
Signed-off-by: Xiong Zhang xiong.y.zh...@intel.com
---
 src/glx/dri3_glx.c | 12 +---
 1 file changed, 9 insertions(+), 3 deletions(-)

diff --git a/src/glx/dri3_glx.c b/src/glx/dri3_glx.c
index 1ddc723..058f7e7 100644
--- a/src/glx/dri3_glx.c
+++ b/src/glx/dri3_glx.c
@@ -133,17 +133,23 @@ dri3_bind_context(struct glx_context *context, struct 
glx_context *old,
struct dri3_context *pcp = (struct dri3_context *) context;
struct dri3_screen *psc = (struct dri3_screen *) pcp-base.psc;
struct dri3_drawable *pdraw, *pread;
+   __DRIdrawable *dri_draw = NULL, *dri_read = NULL;
 
pdraw = (struct dri3_drawable *) driFetchDrawable(context, draw);
pread = (struct dri3_drawable *) driFetchDrawable(context, read);
 
driReleaseDrawables(pcp-base);
 
-   if (pdraw == NULL || pread == NULL)
+   if ((pdraw == NULL  draw != None) || (pread == NULL  read != None))
   return GLXBadDrawable;
 
-   if (!(*psc-core-bindContext) (pcp-driContext,
-   pdraw-driDrawable, pread-driDrawable))
+   if (pdraw)
+  dri_draw = pdraw-driDrawable;
+
+   if (pread)
+  dri_read = pread-driDrawable;
+
+   if (!(*psc-core-bindContext)(pcp-driContext, dri_draw, dri_read))
   return GLXBadContext;
 
return Success;
-- 
2.1.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 2/3] driX_glx.c: Draw and read must be either all non-zero or all zero

2015-03-25 Thread Xiong Zhang

Signed-off-by: Xiong Zhang xiong.y.zh...@intel.com
---
 src/glx/dri2_glx.c  | 5 +
 src/glx/dri3_glx.c  | 5 +
 src/glx/dri_glx.c   | 5 +
 src/glx/drisw_glx.c | 5 +
 4 files changed, 20 insertions(+)

diff --git a/src/glx/dri2_glx.c b/src/glx/dri2_glx.c
index 462d560..f980f82 100644
--- a/src/glx/dri2_glx.c
+++ b/src/glx/dri2_glx.c
@@ -142,6 +142,11 @@ dri2_bind_context(struct glx_context *context, struct 
glx_context *old,
struct glx_display *dpyPriv = psc-base.display;
struct dri2_display *pdp;
 
+   if ((draw != None  read == None) || (draw == None  read != None)) {
+  driReleaseDrawables(pcp-base);
+  return GLXBadDrawable;
+   }
+
pdraw = (struct dri2_drawable *) driFetchDrawable(context, draw);
pread = (struct dri2_drawable *) driFetchDrawable(context, read);
 
diff --git a/src/glx/dri3_glx.c b/src/glx/dri3_glx.c
index 058f7e7..4f94167 100644
--- a/src/glx/dri3_glx.c
+++ b/src/glx/dri3_glx.c
@@ -135,6 +135,11 @@ dri3_bind_context(struct glx_context *context, struct 
glx_context *old,
struct dri3_drawable *pdraw, *pread;
__DRIdrawable *dri_draw = NULL, *dri_read = NULL;
 
+   if ((draw != None  read == None) || (draw == None  read != None)) {
+  driReleaseDrawables(pcp-base);
+  return GLXBadDrawable;
+   }
+
pdraw = (struct dri3_drawable *) driFetchDrawable(context, draw);
pread = (struct dri3_drawable *) driFetchDrawable(context, read);
 
diff --git a/src/glx/dri_glx.c b/src/glx/dri_glx.c
index d087751..5c6c737 100644
--- a/src/glx/dri_glx.c
+++ b/src/glx/dri_glx.c
@@ -544,6 +544,11 @@ dri_bind_context(struct glx_context *context, struct 
glx_context *old,
struct dri_screen *psc = (struct dri_screen *) pcp-base.psc;
struct dri_drawable *pdraw, *pread;
 
+   if ((draw != None  read == None) || (draw == None  read != None)) {
+  driReleaseDrawables(pcp-base);
+  return GLXBadDrawable;
+   }
+
pdraw = (struct dri_drawable *) driFetchDrawable(context, draw);
pread = (struct dri_drawable *) driFetchDrawable(context, read);
 
diff --git a/src/glx/drisw_glx.c b/src/glx/drisw_glx.c
index 749ceb0..aa74840 100644
--- a/src/glx/drisw_glx.c
+++ b/src/glx/drisw_glx.c
@@ -242,6 +242,11 @@ drisw_bind_context(struct glx_context *context, struct 
glx_context *old,
struct drisw_screen *psc = (struct drisw_screen *) pcp-base.psc;
struct drisw_drawable *pdraw, *pread;
 
+   if ((draw != None  read == None) || (draw == None  read != None)) {
+  driReleaseDrawables(pcp-base);
+  return GLXBadDrawable;
+   }
+
pdraw = (struct drisw_drawable *) driFetchDrawable(context, draw);
pread = (struct drisw_drawable *) driFetchDrawable(context, read);
 
-- 
2.1.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] Building Mesa for Windows using Visual Studio

2015-03-25 Thread Jose Fonseca


See

  http://www.mesa3d.org/install.html

or docs/install.html from Mesa tree.


If you want to build with llvmpipe support, you'll also need to read and 
follow


  http://www.mesa3d.org/llvmpipe.html


I'm also experimenting on automated Windows builds with AppVeyor.  You 
can see the build logs on


  https://ci.appveyor.com/project/jrfonseca/mesa


Jose

On 25/03/15 01:00, Shervin Sharifi wrote:

Hi,
  I'm new to Mesa.
  I'm trying to build Mesa for Windows using Visual Studio, but couldn't
find instructions for that. The related threads on this mailing list
also seem outdated.
  Could anyone give me some hint or point me to instructions if there is
any?

  Thanks,
  Shervin



___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://urldefense.proofpoint.com/v2/url?u=http-3A__lists.freedesktop.org_mailman_listinfo_mesa-2Ddevd=AwIGaQc=Sqcl0Ez6M0X8aeM67LKIiDJAXVeAw-YihVMNtXt-uEsr=zfmBZnnVGHeYde45pMKNnVyzeaZbdIqVLprmZCM2zzEm=KwlxC7fWLwmVmiUUTznYf2r7emIl5UqECVgWWjHL0dAs=cfjvYkqjebZsPUkVyHAtSlKGP4Y12qExxEQPEqFLoWIe=



___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [Bug 81680] [r600g] Firefox crashes with hardware acceleration turned on

https://bugs.freedesktop.org/show_bug.cgi?id=81680

Michel Dänzer mic...@daenzer.net changed:

   What|Removed |Added

 CC||kami...@gmail.com

--- Comment #47 from Michel Dänzer mic...@daenzer.net ---
*** Bug 89745 has been marked as a duplicate of this bug. ***

-- 
You are receiving this mail because:
You are the assignee for the bug.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 0/7] clover: add clLinkProgram

2015-03-25 Thread EdB


Ping

Le 2014-12-14 11:31, EdB a écrit :

Hello,

This serie add the neccesary bits needed for clLinkProgram.
Patches from 1 to 2 are llvm/invocation.cpp little reorganisation.
Patches 3 to 5 rework our clCompileProgram implementation.
Patch 6 is for returning clover smart ptr to the API side.
Patch 7 finaly add clLinkProgram.

Patches from 1 to 6 are all neccesary to patch 7,
but they can be commited apart from clLinKProgram addition as there are 
clean up

and bug fixes in the current tree.

Please note that patch 7 only compile with llvm/clang 3.5 for the 
moment.
I was making it ready for 3.4 but it add lots of #ifdef and I found out 
that
master didn't even compile it for the moment. This is because 3.5 
enfore c++11

flags and that std::ofstream file(std:::string, ) is a c++11 addition.
Easy to fix but it went unnoticided until now.

I can come up with a 3.4 friendly v2 latter, but would appreciate your 
comments

on this version.
As I said 1 to 6 can be pushed before 7 as clean up and fixes.
And I also want to be sure 7 is the right way to go.

Regards
EdB

EdB (7):
  clover: move CLOVER_DEBUG stuff into anonymous ns
  clover: Dump the OpenCL C code earlier
  clover: split module::section::text
  clover: clCreateKernel now checks for executable
  clover: seperate build and compile
  clover: override ret_object
  clover: add clLinkProgram

 src/gallium/state_trackers/clover/api/dispatch.cpp |   2 +-
 src/gallium/state_trackers/clover/api/kernel.cpp   |   3 +
 src/gallium/state_trackers/clover/api/program.cpp  |  33 ++-
 src/gallium/state_trackers/clover/api/util.hpp |  11 +
 .../state_trackers/clover/core/compiler.hpp|  18 +-
 src/gallium/state_trackers/clover/core/error.hpp   |   7 +
 src/gallium/state_trackers/clover/core/kernel.cpp  |   2 +-
 src/gallium/state_trackers/clover/core/module.hpp  |   6 +-
 src/gallium/state_trackers/clover/core/program.cpp | 115 -
 src/gallium/state_trackers/clover/core/program.hpp |  10 +-
 .../state_trackers/clover/llvm/invocation.cpp  | 277 
++---

 .../state_trackers/clover/tgsi/compiler.cpp|   5 +-
 12 files changed, 427 insertions(+), 62 deletions(-)

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 18.1/23] glsl: Add is_rvalue, is_dereference, and is_jump methods

Ian Romanick i...@freedesktop.org writes:

 On 03/24/2015 02:14 PM, Matt Turner wrote:
 On Tue, Mar 24, 2015 at 11:25 AM, Ian Romanick i...@freedesktop.org wrote:
 From: Ian Romanick ian.d.roman...@intel.com

 These functions deteremine when an IR node is one of the non-leaf
 classes.

 Signed-off-by: Ian Romanick ian.d.roman...@intel.com
 Cc: Francisco Jerez curroje...@riseup.net
 ---
  src/glsl/ir.h | 25 +
  1 file changed, 25 insertions(+)

 diff --git a/src/glsl/ir.h b/src/glsl/ir.h
 index fdc22ed..b2b4822 100644
 --- a/src/glsl/ir.h
 +++ b/src/glsl/ir.h
 @@ -109,6 +109,31 @@ public:
 virtual ir_instruction *clone(void *mem_ctx,
  struct hash_table *ht) const = 0;

 +   bool is_rvalue() const
 +   {
 +  return ir_type == ir_type_dereference_array ||
 + ir_type == ir_type_dereference_record ||
 + ir_type == ir_type_dereference_variable ||
 + ir_type == ir_type_constant ||
 + ir_type == ir_type_expression ||
 + ir_type == ir_type_swizzle ||
 + ir_type == ir_type_texture;
 +   }
 +
 +   bool is_dereference() const
 +   {
 +  return ir_type == ir_type_dereference_array ||
 + ir_type == ir_type_dereference_record ||
 + ir_type == ir_type_dereference_variable;
 +   }
 +
 +   bool is_jump() const
 +   {
 +  return ir_type == ir_type_loop_jump ||
 + ir_type == ir_type_return ||
 + ir_type == ir_type_discard;
 
 Indent these expressions to line up with the first one after the return.

 By any chance... Do you know a way to get emacs to do that automatically
 without putting extra parenthesis around the whole expression?

Heh, yeah, I tend to do the same to get automatic alignment in cases
like this, but apparently some people find the extra parenthesis
annoying.  I don't have a better solution.

Anyway,
Reviewed-by: Francisco Jerez curroje...@riseup.net


 ___
 mesa-dev mailing list
 mesa-dev@lists.freedesktop.org
 http://lists.freedesktop.org/mailman/listinfo/mesa-dev


signature.asc
Description: PGP signature
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 18.3/23] glsl: Constify the as_foo functions

Ian Romanick i...@freedesktop.org writes:

 From: Ian Romanick ian.d.roman...@intel.com

 Now that they're all implemented using macros, this is trivial.

 Signed-off-by: Ian Romanick ian.d.roman...@intel.com
 Cc: Francisco Jerez curroje...@riseup.net
 ---
  src/glsl/ir.h | 10 ++
  1 file changed, 10 insertions(+)

 diff --git a/src/glsl/ir.h b/src/glsl/ir.h
 index ff30263..7294629 100644
 --- a/src/glsl/ir.h
 +++ b/src/glsl/ir.h
 @@ -147,6 +147,11 @@ public:
 {\
assume(this != NULL); \
return (is_##TYPE()) ? (ir_##TYPE *) this : NULL; \
 +   }\
 +   const class ir_##TYPE *as_##TYPE() const \
 +   {\
 +  assume(this != NULL); \
 +  return (is_##TYPE()) ? (ir_##TYPE *) this : NULL; \

Redundant parenthesis around is_##TYPE() here and in the previous
patch.  With that fixed:

Reviewed-by: Francisco Jerez curroje...@riseup.net

 }
  
 AS_BASE(rvalue)
 @@ -159,6 +164,11 @@ public:
 { \
assume(this != NULL); \
return ir_type == ir_type_##TYPE ? (ir_##TYPE *) this : NULL; \
 +   }  \
 +   const class ir_##TYPE * as_##TYPE() const  \
 +   {  \
 +  assume(this != NULL);   \
 +  return ir_type == ir_type_##TYPE ? (const ir_##TYPE *) this : NULL; \
 }
 AS_CHILD(variable)
 AS_CHILD(function)
 -- 
 2.1.0


signature.asc
Description: PGP signature
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [Bug 89759] WebGL OGL ES GLSL conformance test with mesa drivers fails

https://bugs.freedesktop.org/show_bug.cgi?id=89759

--- Comment #2 from Ilia Mirkin imir...@alum.mit.edu ---
Connor just confirmed that these pass on i965, so most likely a st/mesa issue.
[Which doesn't have its own component.]

-- 
You are receiving this mail because:
You are the QA Contact for the bug.
You are the assignee for the bug.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 18.2/23] glsl: Implement remaining as_foo functions with macros

Ian Romanick i...@freedesktop.org writes:

 From: Ian Romanick ian.d.roman...@intel.com

 The downcast functions for non-leaf classes were previously implemented
 by hand.  Now they are implemented using macros based on the is_foo
 functions added in the previous patch.

 Signed-off-by: Ian Romanick ian.d.roman...@intel.com
 Cc: Francisco Jerez curroje...@riseup.net

Reviewed-by: Francisco Jerez curroje...@riseup.net

 ---
  src/glsl/ir.h | 40 +---
  1 file changed, 9 insertions(+), 31 deletions(-)

 diff --git a/src/glsl/ir.h b/src/glsl/ir.h
 index b2b4822..ff30263 100644
 --- a/src/glsl/ir.h
 +++ b/src/glsl/ir.h
 @@ -142,39 +142,17 @@ public:
  * Additional downcast functions will be added as needed.
  */
 /*@{*/
 -   class ir_rvalue *as_rvalue()
 -   {
 -  assume(this != NULL);
 -  if (ir_type == ir_type_dereference_array ||
 -  ir_type == ir_type_dereference_record ||
 -  ir_type == ir_type_dereference_variable ||
 -  ir_type == ir_type_constant ||
 -  ir_type == ir_type_expression ||
 -  ir_type == ir_type_swizzle ||
 -  ir_type == ir_type_texture)
 - return (class ir_rvalue *) this;
 -  return NULL;
 -   }
 -
 -   class ir_dereference *as_dereference()
 -   {
 -  assume(this != NULL);
 -  if (ir_type == ir_type_dereference_array ||
 -  ir_type == ir_type_dereference_record ||
 -  ir_type == ir_type_dereference_variable)
 - return (class ir_dereference *) this;
 -  return NULL;
 +   #define AS_BASE(TYPE)\
 +   class ir_##TYPE *as_##TYPE() \
 +   {\
 +  assume(this != NULL); \
 +  return (is_##TYPE()) ? (ir_##TYPE *) this : NULL; \
 }
  
 -   class ir_jump *as_jump()
 -   {
 -  assume(this != NULL);
 -  if (ir_type == ir_type_loop_jump ||
 -  ir_type == ir_type_return ||
 -  ir_type == ir_type_discard)
 - return (class ir_jump *) this;
 -  return NULL;
 -   }
 +   AS_BASE(rvalue)
 +   AS_BASE(dereference)
 +   AS_BASE(jump)
 +   #undef AS_BASE
  
 #define AS_CHILD(TYPE) \
 class ir_##TYPE * as_##TYPE() \
 -- 
 2.1.0


signature.asc
Description: PGP signature
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 07/23] ir_to_mesa: Add support for ir_triop_csel

2015-03-25 Thread Juha-Pekka Heikkila

Took a moment to understand how this works but I think it does.

Reviewed-by: Juha-Pekka Heikkila juhapekka.heikk...@gmail.com

/Juha-Pekka

On 20.03.2015 22:58, Ian Romanick wrote:
 From: Ian Romanick ian.d.roman...@intel.com
 
 The only drivers that use this path are i915 and swrast.  Both of these
 support the CMP opcode.  This is untested, but it is similar to existing
 code in st_glsl_to_tgsi.cpp.
 
 Without this change, a later commits glsl: Distribute multiply over
 b2f and glsl: Optimize certain if-statements to ir_triop_csel will
 cause assertion failures in ir_to_mesa.
 
 Signed-off-by: Ian Romanick ian.d.roman...@intel.com
 Cc: Juha-Pekka Heikkila juhapekka.heikk...@gmail.com
 ---
  src/mesa/program/ir_to_mesa.cpp | 9 -
  1 file changed, 8 insertions(+), 1 deletion(-)
 
 diff --git a/src/mesa/program/ir_to_mesa.cpp b/src/mesa/program/ir_to_mesa.cpp
 index 39790ec..b7acbfa 100644
 --- a/src/mesa/program/ir_to_mesa.cpp
 +++ b/src/mesa/program/ir_to_mesa.cpp
 @@ -1426,6 +1426,14 @@ ir_to_mesa_visitor::visit(ir_expression *ir)
emit(ir, OPCODE_LRP, result_dst, op[2], op[1], op[0]);
break;
  
 +   case ir_triop_csel:
 +  /* We assume that Boolean true and false are 1.0 and 0.0.  OPCODE_CMP
 +   * selects src1 if src0 is  0, src2 otherwise.
 +   */
 +  op[0].negate = ~op[0].negate;
 +  emit(ir, OPCODE_CMP, result_dst, op[0], op[1], op[2]);
 +  break;
 +
 case ir_binop_vector_extract:
 case ir_binop_bfm:
 case ir_triop_fma:
 @@ -1434,7 +1442,6 @@ ir_to_mesa_visitor::visit(ir_expression *ir)
 case ir_triop_vector_insert:
 case ir_quadop_bitfield_insert:
 case ir_binop_ldexp:
 -   case ir_triop_csel:
 case ir_binop_carry:
 case ir_binop_borrow:
 case ir_binop_imul_high:
 

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH] r600g/sb: Enable SB for geometry shaders

2015-03-25 Thread Marc Dietrich

Am Dienstag, 24. März 2015, 20:05:46 schrieb Glenn Kennard:
 On Tue, 24 Mar 2015 17:21:35 +0100, Dieter Nützel die...@nuetzel-hh.de
 
 wrote:
  Am 20.03.2015 14:13, schrieb Glenn Kennard:
  Add SV_GEOMETRY_EMIT special variable type to track the
  implicit dependencies between CUT/EMIT_VERTEX/MEM_RING
  instructions so GCM/scheduler doesn't reorder them.
  
   Mark emit instructions as unkillable so DCE doesn't eat them.
   Signed-off-by: Glenn Kennard glenn.kenn...@gmail.com
  
  ---
  The hangs with SB on geometry shaders were all due to the CUT/EMIT
  instructions either being DCE:d or emitted out of order from the
  memory ring writes, so the hardware stalled forever waiting for
  completed primitives.
  
   Tested only on a Turks so far, but should behave the same across
  
  all R600 generations.
  
  Hello Glenn,
  
  what tests are preferred?
  Starting with a Turks XT here, too and could do some tests on RV730
  (AGP) then.
  
  -Dieter
 
 Just the usual piglit regression testing, at this point it's been tested
 on a Turks XT, and a RV770. A R6xx card and some VLIW4 gpu would complete
 the coverage needed.

I would like to, but piglit run quick stalls/crashes the gpu (rs880) too 
often. Maybe you could tell me some special tests to run instead of all.

Marc


signature.asc
Description: This is a digitally signed message part.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [Bug 89759] WebGL OGL ES GLSL conformance test with mesa drivers fails

https://bugs.freedesktop.org/show_bug.cgi?id=89759

Ilia Mirkin imir...@alum.mit.edu changed:

   What|Removed |Added

  Component|Drivers/DRI/nouveau |Mesa core
   Assignee|nouveau@lists.freedesktop.o |mesa-dev@lists.freedesktop.
   |rg  |org
 QA Contact|nouveau@lists.freedesktop.o |mesa-dev@lists.freedesktop.
   |rg  |org

--- Comment #1 from Ilia Mirkin imir...@alum.mit.edu ---
These also fail on r600. This is probably a st/mesa-level or glsl ir-level
issue. Haven't gotten any confirmation from anyone using i965, but I'm
reassigning this to mesa core.

I suspect that the following is the cause of the trouble:

b[1] = a[1];
c[1] += d[1];
a[1] += 1.0;

If the += 1 gets reordered to before the b[1] = a[1], then we're in trouble. I
get cyan as the output, with 182 for the first component, which means that

if(b[1] == 1.0) q += 2.0;

is failing. Haven't quite figured out how to convert these webgl tests into
piglits quite yet, although this one should be easy since it's not
input-dependent.

-- 
You are receiving this mail because:
You are the QA Contact for the bug.
You are the assignee for the bug.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 18.4/23] glsl: Constify ir_instruction::equals

Ian Romanick i...@freedesktop.org writes:

 From: Ian Romanick ian.d.roman...@intel.com

 v2: Don't be lazy.  Constify the as_foo functions and use those instead
 of ugly casts.  Suggested by Curro.

 Signed-off-by: Ian Romanick ian.d.roman...@intel.com
 Cc: Francisco Jerez curroje...@riseup.net

Thanks, this is much better:
Reviewed-by: Francisco Jerez curroje...@riseup.net

 ---
  src/glsl/ir.h  | 21 ++---
  src/glsl/ir_equals.cpp | 20 
  2 files changed, 26 insertions(+), 15 deletions(-)

 diff --git a/src/glsl/ir.h b/src/glsl/ir.h
 index 7294629..a318be4 100644
 --- a/src/glsl/ir.h
 +++ b/src/glsl/ir.h
 @@ -196,7 +196,8 @@ public:
  * in particular.  No support for other instruction types (assignments,
  * jumps, calls, etc.) is planned.
  */
 -   virtual bool equals(ir_instruction *ir, enum ir_node_type ignore = 
 ir_type_unset);
 +   virtual bool equals(const ir_instruction *ir,
 +   enum ir_node_type ignore = ir_type_unset) const;
  
  protected:
 ir_instruction(enum ir_node_type t)
 @@ -1611,7 +1612,8 @@ public:
  */
 ir_expression(int op, ir_rvalue *op0, ir_rvalue *op1, ir_rvalue *op2);
  
 -   virtual bool equals(ir_instruction *ir, enum ir_node_type ignore = 
 ir_type_unset);
 +   virtual bool equals(const ir_instruction *ir,
 +   enum ir_node_type ignore = ir_type_unset) const;
  
 virtual ir_expression *clone(void *mem_ctx, struct hash_table *ht) const;
  
 @@ -1922,7 +1924,8 @@ public:
  
 virtual ir_visitor_status accept(ir_hierarchical_visitor *);
  
 -   virtual bool equals(ir_instruction *ir, enum ir_node_type ignore = 
 ir_type_unset);
 +   virtual bool equals(const ir_instruction *ir,
 +   enum ir_node_type ignore = ir_type_unset) const;
  
 /**
  * Return a string representing the ir_texture_opcode.
 @@ -2023,7 +2026,8 @@ public:
  
 virtual ir_visitor_status accept(ir_hierarchical_visitor *);
  
 -   virtual bool equals(ir_instruction *ir, enum ir_node_type ignore = 
 ir_type_unset);
 +   virtual bool equals(const ir_instruction *ir,
 +   enum ir_node_type ignore = ir_type_unset) const;
  
 bool is_lvalue() const
 {
 @@ -2076,7 +2080,8 @@ public:
  
 virtual ir_constant *constant_expression_value(struct hash_table 
 *variable_context = NULL);
  
 -   virtual bool equals(ir_instruction *ir, enum ir_node_type ignore = 
 ir_type_unset);
 +   virtual bool equals(const ir_instruction *ir,
 +   enum ir_node_type ignore = ir_type_unset) const;
  
 /**
  * Get the variable that is ultimately referenced by an r-value
 @@ -2122,7 +2127,8 @@ public:
  
 virtual ir_constant *constant_expression_value(struct hash_table 
 *variable_context = NULL);
  
 -   virtual bool equals(ir_instruction *ir, enum ir_node_type ignore = 
 ir_type_unset);
 +   virtual bool equals(const ir_instruction *ir,
 +   enum ir_node_type ignore = ir_type_unset) const;
  
 /**
  * Get the variable that is ultimately referenced by an r-value
 @@ -2232,7 +2238,8 @@ public:
  
 virtual ir_visitor_status accept(ir_hierarchical_visitor *);
  
 -   virtual bool equals(ir_instruction *ir, enum ir_node_type ignore = 
 ir_type_unset);
 +   virtual bool equals(const ir_instruction *ir,
 +   enum ir_node_type ignore = ir_type_unset) const;
  
 /**
  * Get a particular component of a constant as a specific type
 diff --git a/src/glsl/ir_equals.cpp b/src/glsl/ir_equals.cpp
 index 65376cd..cc1964e 100644
 --- a/src/glsl/ir_equals.cpp
 +++ b/src/glsl/ir_equals.cpp
 @@ -28,7 +28,8 @@
   * can't access a's vtable in that case.
   */
  static bool
 -possibly_null_equals(ir_instruction *a, ir_instruction *b, enum ir_node_type 
 ignore)
 +possibly_null_equals(const ir_instruction *a, const ir_instruction *b,
 + enum ir_node_type ignore)
  {
 if (!a || !b)
return !a  !b;
 @@ -41,13 +42,13 @@ possibly_null_equals(ir_instruction *a, ir_instruction 
 *b, enum ir_node_type ign
   * about.
   */
  bool
 -ir_instruction::equals(ir_instruction *, enum ir_node_type)
 +ir_instruction::equals(const ir_instruction *, enum ir_node_type) const
  {
 return false;
  }
  
  bool
 -ir_constant::equals(ir_instruction *ir, enum ir_node_type)
 +ir_constant::equals(const ir_instruction *ir, enum ir_node_type) const
  {
 const ir_constant *other = ir-as_constant();
 if (!other)
 @@ -65,7 +66,8 @@ ir_constant::equals(ir_instruction *ir, enum ir_node_type)
  }
  
  bool
 -ir_dereference_variable::equals(ir_instruction *ir, enum ir_node_type)
 +ir_dereference_variable::equals(const ir_instruction *ir,
 +enum ir_node_type) const
  {
 const ir_dereference_variable *other = ir-as_dereference_variable();
 if (!other)
 @@ -75,7 +77,8 @@ ir_dereference_variable::equals(ir_instruction *ir, enum 
 ir_node_type)
  }
  
  bool

[Mesa-dev] [Bug 89759] WebGL OGL ES GLSL conformance test with mesa drivers fails

https://bugs.freedesktop.org/show_bug.cgi?id=89759

--- Comment #4 from Ilia Mirkin imir...@alum.mit.edu ---
Looks like the st/mesa copy propagation pass fails... commenting out

v-copy_propagate();

makes the original shader work. Probably not the right forum, but why do we
bother with the tgsi optimizations? All the backend drivers do the same things
anyway (and in a way that works... :) )

-- 
You are receiving this mail because:
You are the QA Contact for the bug.
You are the assignee for the bug.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 1/2] gallivm: pass jit_context pointer through to sampling

2015-03-25 Thread sroland

From: Roland Scheidegger srol...@vmware.com

The callbacks used for getting the dynamic texture/sampler state were using
the jit_context from the generated jit function. This works just fine, however
that way it's impossible to generate separate functions for texture sampling,
as will be done in the next commit. Hence, pass this pointer through all
interfaces so it can be passed to a separate function (technically, it would
probably be possible to extract this pointer from the current function instead,
but this feels hacky and would probably require some more hacks if we'd use
real functions instead of inlining all shader functions at some point).
There should be no difference in the generated code for now.
---
 src/gallium/auxiliary/draw/draw_llvm.c|  9 ++-
 src/gallium/auxiliary/draw/draw_llvm.h|  3 +-
 src/gallium/auxiliary/draw/draw_llvm_sample.c | 28 
 src/gallium/auxiliary/gallivm/lp_bld_sample.c | 39 ++-
 src/gallium/auxiliary/gallivm/lp_bld_sample.h | 83 ++-
 src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c | 74 +---
 src/gallium/auxiliary/gallivm/lp_bld_tgsi.h   |  4 ++
 src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c   |  6 ++
 src/gallium/drivers/llvmpipe/lp_state_fs.c|  5 +-
 src/gallium/drivers/llvmpipe/lp_tex_sample.c  | 28 
 src/gallium/drivers/llvmpipe/lp_tex_sample.h  |  4 +-
 11 files changed, 171 insertions(+), 112 deletions(-)

diff --git a/src/gallium/auxiliary/draw/draw_llvm.c 
b/src/gallium/auxiliary/draw/draw_llvm.c
index 6e1fb40..1e6e699 100644
--- a/src/gallium/auxiliary/draw/draw_llvm.c
+++ b/src/gallium/auxiliary/draw/draw_llvm.c
@@ -620,6 +620,7 @@ generate_vs(struct draw_llvm_variant *variant,
  system_values,
  inputs,
  outputs,
+ context_ptr,
  draw_sampler,
  llvm-draw-vs.vertex_shader-info,
  NULL);
@@ -1630,9 +1631,7 @@ draw_llvm_generate(struct draw_llvm *llvm, struct 
draw_llvm_variant *variant,
LLVMBuildStore(builder, lp_build_zero(gallivm, lp_int_type(vs_type)), 
clipmask_bool_ptr);
 
/* code generated texture sampling */
-   sampler = draw_llvm_sampler_soa_create(
-  draw_llvm_variant_key_samplers(key),
-  context_ptr);
+   sampler = draw_llvm_sampler_soa_create(draw_llvm_variant_key_samplers(key));
 
if (elts) {
   start = zero;
@@ -2163,8 +2162,7 @@ draw_gs_llvm_generate(struct draw_llvm *llvm,
   draw_gs_jit_context_num_constants(variant-gallivm, context_ptr);
 
/* code generated texture sampling */
-   sampler = draw_llvm_sampler_soa_create(variant-key.samplers,
-  context_ptr);
+   sampler = draw_llvm_sampler_soa_create(variant-key.samplers);
 
mask_val = generate_mask_value(variant, gs_type);
lp_build_mask_begin(mask, gallivm, gs_type, mask_val);
@@ -2187,6 +2185,7 @@ draw_gs_llvm_generate(struct draw_llvm *llvm,
  system_values,
  NULL,
  outputs,
+ context_ptr,
  sampler,
  llvm-draw-gs.geometry_shader-info,
  (const struct lp_build_tgsi_gs_iface *)gs_iface);
diff --git a/src/gallium/auxiliary/draw/draw_llvm.h 
b/src/gallium/auxiliary/draw/draw_llvm.h
index af1960e..9565fc6 100644
--- a/src/gallium/auxiliary/draw/draw_llvm.h
+++ b/src/gallium/auxiliary/draw/draw_llvm.h
@@ -526,8 +526,7 @@ void
 draw_gs_llvm_dump_variant_key(struct draw_gs_llvm_variant_key *key);
 
 struct lp_build_sampler_soa *
-draw_llvm_sampler_soa_create(const struct draw_sampler_static_state 
*static_state,
- LLVMValueRef context_ptr);
+draw_llvm_sampler_soa_create(const struct draw_sampler_static_state 
*static_state);
 
 void
 draw_llvm_set_sampler_state(struct draw_context *draw, unsigned shader_stage);
diff --git a/src/gallium/auxiliary/draw/draw_llvm_sample.c 
b/src/gallium/auxiliary/draw/draw_llvm_sample.c
index a6341fa..16d075c 100644
--- a/src/gallium/auxiliary/draw/draw_llvm_sample.c
+++ b/src/gallium/auxiliary/draw/draw_llvm_sample.c
@@ -59,8 +59,6 @@ struct draw_llvm_sampler_dynamic_state
struct lp_sampler_dynamic_state base;
 
const struct draw_sampler_static_state *static_state;
-
-   LLVMValueRef context_ptr;
 };
 
 
@@ -86,14 +84,13 @@ struct draw_llvm_sampler_soa
 static LLVMValueRef
 draw_llvm_texture_member(const struct lp_sampler_dynamic_state *base,
  struct gallivm_state *gallivm,
+ LLVMValueRef context_ptr,
  unsigned texture_unit,
  unsigned member_index,
  const char *member_name,
  boolean emit_load)
 {
LLVMBuilderRef builder = gallivm-builder;
-   struct draw_llvm_sampler_dynamic_state *state =
-  (struct

Re: [Mesa-dev] [PATCH 1/6] nir: Add glsl_float_type() wrapper.

2015-03-25 Thread Jason Ekstrand

I finally git a chance to look through this. I think Eric, Connor, and
Jordan have probably provided enough commentary so I don't have much to
add.  FWIW the series looks good to me pending the other comments.  Can't
say I read every line though.
--Jason
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 4/8] scons: Don't build osmesa.

On 24 March 2015 at 21:16, Jose Fonseca jfons...@vmware.com wrote:
 There doesn't seem much interest on osmesa on Windows, particularly classic 
 osmesa.

 If there is indeed interest in osmesa on Windows, we should instead
 integrate src/gallium/targets/osmesa into SCons.
Afaict the Octave people still use it. There was a guy in #dri-devel
who was having issues with the Windows build not too long ago [1]

Iirc the VTK folk are were using osmesa, although I'm not sure if (how
much) Windows support is a thing for them. Hence the presence of the
scons build.

All of that is more of jfyi rather than feeling sentimental about nuking it :)
-Emil

[1] 
http://people.freedesktop.org/~cbrill/dri-log/?channel=dri-develhighlight_names=andy1978date=2015-02-26
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [Bug 89759] WebGL OGL ES GLSL conformance test with mesa drivers fails

https://bugs.freedesktop.org/show_bug.cgi?id=89759

--- Comment #3 from Ilia Mirkin imir...@alum.mit.edu ---
Created attachment 114617
  -- https://bugs.freedesktop.org/attachment.cgi?id=114617action=edit
shader test

Confirmed that it's the

a[1] += 1.0;

line causing the trouble. If I comment it out, I get the expected white output.

-- 
You are receiving this mail because:
You are the QA Contact for the bug.
You are the assignee for the bug.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [Bug 89754] vertexAttrib fails WebGL Conformance test with mesa drivers

https://bugs.freedesktop.org/show_bug.cgi?id=89754

Matthew Dawson matt...@mjdsystems.ca changed:

   What|Removed |Added

 CC||matt...@mjdsystems.ca

-- 
You are receiving this mail because:
You are the assignee for the bug.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 1/3] mesa: Improve validation of target, format and type of glTexSubImage[2, 3]D

2015-03-25 Thread Laura Ekstrand

With your modifications in this patch, the target is already validated by
the time you get to this function, so it should be fine to move mesa select
tex image up above the format and type checks.
On Mar 24, 2015 11:44 PM, Eduardo Lima Mitev el...@igalia.com wrote:

 On 03/25/2015 12:02 AM, Laura Ekstrand wrote:
   +   if (_mesa_is_gles3(ctx)) {
   +  /* Validation of format and type for ES3 has to be done
 here
   +   * after the texture image is resolved, because the
 internal
   +   * format is needed for the verification
   +   */
   +  err = _mesa_es3_error_check_format_and_type(ctx,
 format, type,
   +
   texImage-InternalFormat);
   +  if (err != GL_NO_ERROR) {
   + _mesa_error(ctx, err,
   + %s(incompatible format = %s, type = %s,
 
   + internalformat = %s),
   + callerName,
 _mesa_lookup_enum_by_nr(format),
   + _mesa_lookup_enum_by_nr(type),
   +
_mesa_lookup_enum_by_nr(texImage-InternalFormat));
   + return GL_TRUE;
   +  }
   +   }
   +
 
 
  What does the (es)_error_check_format_and_type have to do with
  validating the target?  I thought legal_texsubimage_target did all of
  the target checking?  So if we move up _mesa_select_tex_image above the
  format and type checks, it shouldn't be a problem because the target is
  already checked.  (That's my understanding of the problem.)

 _mesa_es3_error_check_format_and_type() takes the tex object's internal
 format as argument, because gles3 impose additional restrictions that
 depend on the internal format.

 So, to check format and type for gles3 case, we need to have texIamge,
 and to obtain it, we first need to validate the target (otherwise we get
 the _mesa_problem() call we are trying to avoid here).

 That's why gles3 validation of format and type indirectly depends on
 having the target validated.

 cheers,
 Eduardo


___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH] r600g/sb: Enable SB for geometry shaders

2015-03-25 Thread Glenn Kennard


On Wed, 25 Mar 2015 14:26:40 +0100, Marc Dietrich marvi...@gmx.de wrote:


Am Dienstag, 24. März 2015, 20:05:46 schrieb Glenn Kennard:

On Tue, 24 Mar 2015 17:21:35 +0100, Dieter Nützel die...@nuetzel-hh.de

wrote:
 Am 20.03.2015 14:13, schrieb Glenn Kennard:
 Add SV_GEOMETRY_EMIT special variable type to track the
 implicit dependencies between CUT/EMIT_VERTEX/MEM_RING
 instructions so GCM/scheduler doesn't reorder them.

  Mark emit instructions as unkillable so DCE doesn't eat them.
  Signed-off-by: Glenn Kennard glenn.kenn...@gmail.com

 ---
 The hangs with SB on geometry shaders were all due to the CUT/EMIT
 instructions either being DCE:d or emitted out of order from the
 memory ring writes, so the hardware stalled forever waiting for
 completed primitives.

  Tested only on a Turks so far, but should behave the same across

 all R600 generations.

 Hello Glenn,

 what tests are preferred?
 Starting with a Turks XT here, too and could do some tests on RV730
 (AGP) then.

 -Dieter

Just the usual piglit regression testing, at this point it's been tested
on a Turks XT, and a RV770. A R6xx card and some VLIW4 gpu would  
complete

the coverage needed.


I would like to, but piglit run quick stalls/crashes the gpu (rs880)  
too

often. Maybe you could tell me some special tests to run instead of all.

Marc


-t geometry should be the smallest useful subset. It's likely that most of  
the hangs you get on rs880 (and other r6xx devices) are geometry shader  
related though so that might end up taking as long as a full quick run,  
unfortunately.

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 1/3] dri3_glx.c: Pass NULL DRI drawables into driver for None GLX drawables

Hi Xiong Zhang,
On 25 March 2015 at 08:05, Xiong Zhang xiong.y.zh...@intel.com wrote:
 GLX_ARB_create_context spec says:
 If either draw or read are not a valid GLX drawable, a GLXBadDrawable
 error is generated, unless draw and read are both None and the OpenGL
 version supported by ctx is 3.0 or greater.

 So when both draw and read are None, it could pass NULL drawable into
 driver instead of returing GLXBadDrawable.

I'm a bit fresh in the area, but where do we check that the OpenGL
version supported is 3.0 or greater ? Is there an assumption that all
dri2/3 modules support it ?

A similar commit has landed for dri2 in mesa 10.2 f658150639c(glx:
Pass NULL DRI drawables into the DRI driver for None GLX drawables).
Shouldn't a similar one be due for dri(1), drisw etc ? Not suggesting
that you should prep one, just asking.

From a quick look - if it wasn't for indirect glx, most of these (and
patch 2/3) changes could be pushed one level up to
glxcurrent.c:MakeContextCurrent. Bit unfortunate about the duplication
but that's another story.

Cheers
Emil
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 2/8] scons: Don't build egl on Windows.

On 24 March 2015 at 21:16, Jose Fonseca jfons...@vmware.com wrote:
 Useless, as there are no drivers for it.
Hi Jose,

Just a question - I was under the impression that one could use it
with softpipe/llvmpipe's dri module. Although not sure if there are
(m)any benefits of doing so.

-Emil
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 2/6] nir: Implement a Mesa IR - NIR translator.

On 24 March 2015 at 22:18, Kenneth Graunke kenn...@whitecape.org wrote:
 On Tuesday, March 24, 2015 08:52:01 PM Emil Velikov wrote:
 Hi Ken,

 On 24/03/15 00:37, Kenneth Graunke wrote:
  Shamelessly ripped off from Eric Anholt's tgsi_to_nir pass.
 
  Not compiled on SCons, like the rest of NIR.
 
  Signed-off-by: Kenneth Graunke kenn...@whitecape.org
  ---
   src/mesa/Makefile.am|2 +
   src/mesa/Makefile.sources   |5 +
   src/mesa/program/prog_instruction.h |2 +
   src/mesa/program/prog_to_nir.c  | 1189
 +++
   src/mesa/program/prog_to_nir.h  |   37 ++
   5 files changed, 1235 insertions(+)
   create mode 100644 src/mesa/program/prog_to_nir.c
   create mode 100644 src/mesa/program/prog_to_nir.h
 
  diff --git a/src/mesa/Makefile.am b/src/mesa/Makefile.am
  index 3dab8f0..60114e4 100644
  --- a/src/mesa/Makefile.am
  +++ b/src/mesa/Makefile.am
  @@ -174,6 +174,7 @@ endif
   libmesa_la_SOURCES = \
  $(MESA_FILES) \
  $(PROGRAM_FILES) \
  +   $(PROGRAM_NIR_FILES) \
  $(MESA_ASM_FILES_FOR_ARCH)
 
   libmesa_la_LIBADD = \
  @@ -183,6 +184,7 @@ libmesa_la_LIBADD = \
   libmesagallium_la_SOURCES = \
  $(MESA_GALLIUM_FILES) \
  $(PROGRAM_FILES) \
  +   $(PROGRAM_NIR_FILES) \
  $(MESA_ASM_FILES_FOR_ARCH)
 
   libmesagallium_la_LIBADD = \
  diff --git a/src/mesa/Makefile.sources b/src/mesa/Makefile.sources
  index 217be9a..cc166ce 100644
  --- a/src/mesa/Makefile.sources
  +++ b/src/mesa/Makefile.sources
  @@ -520,6 +520,10 @@ PROGRAM_FILES = \
  program/symbol_table.c \
  program/symbol_table.h
 
  +PROGRAM_NIR_FILES = \
  +   program/prog_to_nir.c \
  +   program/prog_to_nir.h
  +
   ASM_C_FILES =  \
  x86/common_x86.c \
  x86/x86_xform.c \
  @@ -608,6 +612,7 @@ INCLUDE_DIRS = \
  -I$(top_srcdir)/src \
  -I$(top_srcdir)/src/glsl \
  -I$(top_builddir)/src/glsl \
  +   -I$(top_builddir)/src/glsl/nir \
 Hi Ken,

 Thanks for handling all the build cruft :)

 Noticed that you mentioned some build issues - was it locally or with
 jenkins ? I've just pushed a series has some related patches. If you're
 still seeing those can you open a bug report and/or post the build log
 somewhere.

 Cheers,
 Emil

 It seems to be working fine now - I figured it out :)

 prog_to_nir.c includes nir.h, which contains #include nir_opcodes.h.
 nir_opcodes.h is autogenerated, so it lives in $(top_builddir)/src/glsl/nir.
 I normally do in-tree builds, so it worked fine for me, but broke on
 Jenkins (which is doing out-of-tree builds).  Ilia reminded me how to do
 out of tree builds and then the solution was easy - add the -I line
 above.

From a quick look at the log I was with the impression that things did
not work even with $(top_builddir)/src/glsl/nir. So I was suspecting
some race (lack of dependency tracking). Glad to hear that it's
sorted.

Cheers
Emil
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 4/8] scons: Don't build osmesa.

2015-03-25 Thread Jose Fonseca

On 25/03/15 14:53, Emil Velikov wrote:

On 24 March 2015 at 21:16, Jose Fonseca jfons...@vmware.com wrote:

There doesn't seem much interest on osmesa on Windows, particularly classic
osmesa.

If there is indeed interest in osmesa on Windows, we should instead
integrate src/gallium/targets/osmesa into SCons.

Afaict the Octave people still use it. There was a guy in #dri-devel
who was having issues with the Windows build not too long ago [1]

Iirc the VTK folk are were using osmesa, although I'm not sure if (how
much) Windows support is a thing for them. Hence the presence of the
scons build.

All of that is more of jfyi rather than feeling sentimental about nuking it :)
-Emil

[1]
https://urldefense.proofpoint.com/v2/url?u=http-3A__people.freedesktop.org_-7Ecbrill_dri-2Dlog_-3Fchannel-3Ddri-2Ddevel-26highlight-5Fnames-3Dandy1978-26date-3D2015-2D02-2D26d=AwIBaQc=Sqcl0Ez6M0X8aeM67LKIiDJAXVeAw-YihVMNtXt-uEsr=zfmBZnnVGHeYde45pMKNnVyzeaZbdIqVLprmZCM2zzEm=0--RMUucqcy-hAGpX9G5a-U9MF5M607lG9i3Bm4eD2ws=G-FQcwnoms56_DZd1eCFZAu3K6T9oDhuuqiIVGC7n6oe=

Thanks for the info Emil.

The osmesa I removed from scons was the one with classic sw rasterizer.

The osmesa w/ softpipe/llvmpipe is in src/gallium/targets/osmesa , but
was never integrated into scons build (just autotools.)

I'm OK adding support to build osmesa on Windows with llvmpipe/softpipe,
but I don't think there's value in supporting the classic rasterizer --
less code that we need to worry about MSVC portability. I might give it
a go when I find some time.

For the record, there was a point in time where I wanted SCons to build
most of Mesa in the hope of replacing autotools, which is why scons
build more than just Windows. But it's a loosing battle. Nowadays I'm
more interested in reducing the SCons scope as much as possible, so
there's less stuff for me and everybody else to maintain.

(And if there's a build system that could really work across multiple
platforms and make everybody happy, my bet would not be scons, but
rather cmake. But I doubt there will ever be agreement or time to take
such enterprise anyway.)

Jose
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 2/2] gallivm: use llvm function calls for texturing instead of inlining

2015-03-25 Thread sroland

From: Roland Scheidegger srol...@vmware.com

There are issues with inlining everything, most notably llvm will use much
more memory (and be slower) when compiling. Ideally we'd probably use
functions for shader functions too but texture sampling usually is responsible
for quite some IR (it can easily reach 80% of total IR instructions) so this
seems like a good start.
This still generates a different function for all different combinations just
like before, however it is possible llvm is missing some optimization
opportunities - it is believed though such opportunities should be somewhat
rare, but at least for now it can still be switched off (at compile time only).
It should probably make compiled code also smaller because the same function
should be used for different variants in the same module (so for the
opaque/partial or linear/elts variants).
No piglit change (though it does indeed speed up unrealistic tests like
fp-indirections2 by a factor of 30 or so).
Has a small negative performance impact in openarena - I suspect this could
be fixed by running some IPO passes (despite the private linkage, llvm right
now does NO optimization at all wrt anything going past the call, even if
there's just one caller - so things like values stored before the call and then
always written by the function etc. will not be optimized away, nor will dead
arguments (which we mostly shouldn't have) be eliminated, always constant
arguments promoted etc.).
---
 src/gallium/auxiliary/gallivm/lp_bld_init.c   |  23 ++
 src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c | 432 +-
 2 files changed, 437 insertions(+), 18 deletions(-)

diff --git a/src/gallium/auxiliary/gallivm/lp_bld_init.c 
b/src/gallium/auxiliary/gallivm/lp_bld_init.c
index 6133883..ee23ea0 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_init.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_init.c
@@ -113,6 +113,10 @@ create_pass_manager(struct gallivm_state *gallivm)
gallivm-passmgr = LLVMCreateFunctionPassManagerForModule(gallivm-module);
if (!gallivm-passmgr)
   return FALSE;
+   /*
+* FIXME: probably would need a per module pass manager (with some IPO
+* passes) to optimize the quite bad looking texture function calls.
+*/
 
// Old versions of LLVM get the DataLayout from the pass manager.
LLVMAddTargetData(gallivm-target, gallivm-passmgr);
@@ -575,6 +579,25 @@ gallivm_jit_function(struct gallivm_state *gallivm,
jit_func = pointer_to_func(code);
 
if (gallivm_debug  GALLIVM_DEBUG_ASM) {
+  /*
+   * XXX hack: we can only disassemble functions after compiling the
+   * module, however we've got no idea what texture functions we generated.
+   * Hence, get all functions in the module and print all matching some
+   * pattern. (Because this is triggered per function and not per module,
+   * this will of course print the texture functions each time
+   * gallivm_jit_function is invoked, not just once per module.)
+   */
+  LLVMValueRef llvm_func = LLVMGetFirstFunction(gallivm-module);
+
+  while (llvm_func) {
+ if (!util_strncmp(texfunc, LLVMGetValueName(llvm_func), 7)) {
+void *texfunc_code = LLVMGetPointerToGlobal(gallivm-engine, 
llvm_func);
+lp_disassemble(llvm_func, texfunc_code);
+ }
+ llvm_func = LLVMGetNextFunction(llvm_func);
+  }
+   }
+   if (gallivm_debug  GALLIVM_DEBUG_ASM) {
   lp_disassemble(func, code);
}
 
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c 
b/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c
index a90278e..5083087 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c
@@ -2357,30 +2357,30 @@ lp_build_sample_nop(struct gallivm_state *gallivm,
 
 
 /**
- * Build texture sampling code.
+ * Build the actual texture sampling code.
  * 'texel' will return a vector of four LLVMValueRefs corresponding to
  * R, G, B, A.
  * \param type  vector float type to use for coords, etc.
  * \param is_fetch  if this is a texel fetch instruction.
  * \param derivs  partial derivatives of (s,t,r,q) with respect to x and y
  */
-void
-lp_build_sample_soa(struct gallivm_state *gallivm,
-const struct lp_static_texture_state *static_texture_state,
-const struct lp_static_sampler_state *static_sampler_state,
-struct lp_sampler_dynamic_state *dynamic_state,
-struct lp_type type,
-boolean is_fetch,
-unsigned texture_index,
-unsigned sampler_index,
-LLVMValueRef context_ptr,
-const LLVMValueRef *coords,
-const LLVMValueRef *offsets,
-const struct lp_derivatives *derivs, /* optional */
-LLVMValueRef lod_bias, /* optional */
-LLVMValueRef explicit_lod, /* optional */
-

Re: [Mesa-dev] [PATCH] mesa/state_tracker: Fix draw-pixel-with-texture piglit test.

2015-03-25 Thread Brian Paul


Minor nit-picks below.


On 03/23/2015 11:38 PM, Matthew Dawson wrote:

When glDrawPixels was used with an external texture, the pixels passed in
were sampled instead of the texture.  Change gallium to instead move the user
texture to a new sampler below the glDrawPixels samplers and use the texture
coordinates from the raster position.

This uses a uniform for the texture coordinates instead passing it through
the vertex shader as the texture coordinates are constant for the entire
operation.  While working the vertex shader would be possible, implementing
that solution would break several assumptions throughout the glDrawPixels
implementation as well as helper functions used by other code paths, increasing
the chance for breakage.

Tested on llvmpipe, r600, and radeonsi.

V2: Complete everything missing from V1.
---
Thanks for all the help I've gotten getting this patch into shape, especially 
considering what functionality it covers.
I know it probably won't directly help anyone, but it was a decent introduction 
to some mesa concepts, and
I  hope I can apply the knowledge in the future.

  src/mesa/state_tracker/st_cb_drawpixels.c  | 25 ++---
  src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 25 +
  2 files changed, 47 insertions(+), 3 deletions(-)

diff --git a/src/mesa/state_tracker/st_cb_drawpixels.c 
b/src/mesa/state_tracker/st_cb_drawpixels.c
index 3edf31b..8847b1f1 100644
--- a/src/mesa/state_tracker/st_cb_drawpixels.c
+++ b/src/mesa/state_tracker/st_cb_drawpixels.c
@@ -44,6 +44,7 @@
  #include main/texstore.h
  #include main/glformats.h
  #include program/program.h
+#include program/prog_parameter.h
  #include program/prog_print.h
  #include program/prog_instruction.h

@@ -676,6 +677,7 @@ draw_textured_quad(struct gl_context *ctx, GLint x, GLint 
y, GLfloat z,
 GLfloat x0, y0, x1, y1;
 GLsizei maxSize;
 boolean normalized = sv[0]-texture-target != PIPE_TEXTURE_RECT;
+   int num_user_sampers = st-state.num_samplers[PIPE_SHADER_FRAGMENT];


unsigned or GLuint to match the RHS type.

s/sampers/samplers/





 /* limit checks */
 /* XXX if DrawPixels image is larger than max texture size, break
@@ -765,6 +767,9 @@ draw_textured_quad(struct gl_context *ctx, GLint x, GLint 
y, GLfloat z,
if (num_sampler_view  1) {
   cso_single_sampler(cso, PIPE_SHADER_FRAGMENT, 1, sampler);
}
+  for(int i = 0; i  num_user_sampers; ++i) {


move declaration of i to start of function.  Space between for and (



+ cso_single_sampler(cso, PIPE_SHADER_FRAGMENT, i+num_sampler_view, 
st-state.samplers[PIPE_SHADER_FRAGMENT][i]);


Line wrap to 78 columns or less


+  }
cso_single_sampler_done(cso, PIPE_SHADER_FRAGMENT);
 }

@@ -786,7 +791,12 @@ draw_textured_quad(struct gl_context *ctx, GLint x, GLint 
y, GLfloat z,
 cso_set_stream_outputs(st-cso_context, 0, NULL, NULL);

 /* texture state: */
-   cso_set_sampler_views(cso, PIPE_SHADER_FRAGMENT, num_sampler_view, sv);
+   {
+  struct pipe_sampler_view *lsv[PIPE_MAX_SAMPLERS];
+  memcpy(lsv, sv, num_sampler_view*sizeof(struct pipe_sampler_view*));
+  memcpy(lsv+num_sampler_view, 
st-state.sampler_views[PIPE_SHADER_FRAGMENT], num_user_sampers*sizeof(struct 
pipe_sampler_view*));
+  cso_set_sampler_views(cso, PIPE_SHADER_FRAGMENT, 
num_sampler_view+num_user_sampers, lsv);


Line wrap to 78 or fewer columns.


+   }

 /* Compute Gallium window coords (y=0=top) with pixel zoom.
  * Recall that these coords are transformed by the current
@@ -1160,8 +1170,17 @@ st_DrawPixels(struct gl_context *ctx, GLint x, GLint y,
}
 }

-   /* update fragment program constants */
-   st_upload_constants(st, fpv-parameters, PIPE_SHADER_FRAGMENT);
+   /* updated texture coordinates and fragment program constants. */
+   {
+  struct gl_program_parameter_list *parameters = fpv-parameters;
+  for(int i = 0; i  parameters-NumParameters; ++i) {
+ const char *name = parameters-Parameters[i].Name;
+ if (strncmp(texcoord, name, 8) == 0  strlen(name) == 9) {
+memcpy(parameters-ParameterValues[i], 
st-ctx-Current.RasterTexCoords[name[8] - 'A'], sizeof(GL_FLOAT) * 4);
+ }
+  }
+  st_upload_constants(st, parameters, PIPE_SHADER_FRAGMENT);
+   }


I'm not clear on what this hunk is actually doing.  The code looks a bit 
sketchy to me and at the very least needs a more detailed comment.





 /* draw with textured quad */
 {
diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp 
b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
index efee4b2..2613112 100644
--- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
+++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
@@ -4254,6 +4254,7 @@ get_pixel_transfer_visitor(struct st_fragment_program *fp,
 st_src_reg coord, src0;
 st_dst_reg dst0;
 glsl_to_tgsi_instruction *inst;
+   unsigned int count_samplers_used = 0;

 /* Copy attributes of the

[Mesa-dev] [PATCH] st/mesa: remove copy-propagation pass

It's buggy and unnecessary in the presence of optimizing backends. The
only backend that will suffer is nv30, but... meh.

Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=89759
Signed-off-by: Ilia Mirkin imir...@alum.mit.edu
---
 src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 199 -
 1 file changed, 199 deletions(-)

diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp 
b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
index efee4b2..0402ce3 100644
--- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
+++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
@@ -461,7 +461,6 @@ public:
int get_last_temp_read(int index);
int get_last_temp_write(int index);
 
-   void copy_propagate(void);
int eliminate_dead_code(void);
 
void merge_two_dsts(void);
@@ -3757,203 +3756,6 @@ glsl_to_tgsi_visitor::get_last_temp_write(int index)
 }
 
 /*
- * On a basic block basis, tracks available PROGRAM_TEMPORARY register
- * channels for copy propagation and updates following instructions to
- * use the original versions.
- *
- * The glsl_to_tgsi_visitor lazily produces code assuming that this pass
- * will occur.  As an example, a TXP production before this pass:
- *
- * 0: MOV TEMP[1], INPUT[4].xyyy;
- * 1: MOV TEMP[1].w, INPUT[4].;
- * 2: TXP TEMP[2], TEMP[1], texture[0], 2D;
- *
- * and after:
- *
- * 0: MOV TEMP[1], INPUT[4].xyyy;
- * 1: MOV TEMP[1].w, INPUT[4].;
- * 2: TXP TEMP[2], INPUT[4].xyyw, texture[0], 2D;
- *
- * which allows for dead code elimination on TEMP[1]'s writes.
- */
-void
-glsl_to_tgsi_visitor::copy_propagate(void)
-{
-   glsl_to_tgsi_instruction **acp = rzalloc_array(mem_ctx,
-  glsl_to_tgsi_instruction *,
-  this-next_temp * 4);
-   int *acp_level = rzalloc_array(mem_ctx, int, this-next_temp * 4);
-   int level = 0;
-
-   foreach_in_list(glsl_to_tgsi_instruction, inst, this-instructions) {
-  assert(inst-dst[0].file != PROGRAM_TEMPORARY
- || inst-dst[0].index  this-next_temp);
-
-  /* First, do any copy propagation possible into the src regs. */
-  for (int r = 0; r  3; r++) {
- glsl_to_tgsi_instruction *first = NULL;
- bool good = true;
- int acp_base = inst-src[r].index * 4;
-
- if (inst-src[r].file != PROGRAM_TEMPORARY ||
- inst-src[r].reladdr ||
- inst-src[r].reladdr2)
-continue;
-
- /* See if we can find entries in the ACP consisting of MOVs
-  * from the same src register for all the swizzled channels
-  * of this src register reference.
-  */
- for (int i = 0; i  4; i++) {
-int src_chan = GET_SWZ(inst-src[r].swizzle, i);
-glsl_to_tgsi_instruction *copy_chan = acp[acp_base + src_chan];
-
-if (!copy_chan) {
-   good = false;
-   break;
-}
-
-assert(acp_level[acp_base + src_chan] = level);
-
-if (!first) {
-   first = copy_chan;
-} else {
-   if (first-src[0].file != copy_chan-src[0].file ||
-   first-src[0].index != copy_chan-src[0].index ||
-   first-src[0].index2D != copy_chan-src[0].index2D) {
-  good = false;
-  break;
-   }
-}
- }
-
- if (good) {
-/* We've now validated that we can copy-propagate to
- * replace this src register reference.  Do it.
- */
-inst-src[r].file = first-src[0].file;
-inst-src[r].index = first-src[0].index;
-inst-src[r].index2D = first-src[0].index2D;
-inst-src[r].has_index2 = first-src[0].has_index2;
-
-int swizzle = 0;
-for (int i = 0; i  4; i++) {
-   int src_chan = GET_SWZ(inst-src[r].swizzle, i);
-   glsl_to_tgsi_instruction *copy_inst = acp[acp_base + src_chan];
-   swizzle |= (GET_SWZ(copy_inst-src[0].swizzle, src_chan)  (3 
* i));
-}
-inst-src[r].swizzle = swizzle;
- }
-  }
-
-  switch (inst-op) {
-  case TGSI_OPCODE_BGNLOOP:
-  case TGSI_OPCODE_ENDLOOP:
- /* End of a basic block, clear the ACP entirely. */
- memset(acp, 0, sizeof(*acp) * this-next_temp * 4);
- break;
-
-  case TGSI_OPCODE_IF:
-  case TGSI_OPCODE_UIF:
- ++level;
- break;
-
-  case TGSI_OPCODE_ENDIF:
-  case TGSI_OPCODE_ELSE:
- /* Clear all channels written inside the block from the ACP, but
-  * leaving those that were not touched.
-  */
- for (int r = 0; r  this-next_temp; r++) {
-for (int c = 0; c  4; c++) {
-   if (!acp[4 * r + c])
-  continue;
-
-   if (acp_level[4 * r + c] = level)
-  acp[4 * r + c] = NULL;
-}
- }
- if

Re: [Mesa-dev] [PATCH] i965/fs: Combine tex/fb_write operations (opt)

On Mon, Feb 23, 2015 at 10:02:26AM -0800, Matt Turner wrote:
 On Sun, Feb 22, 2015 at 3:06 PM, Ben Widawsky b...@bwidawsk.net wrote:
  On Sun, Feb 08, 2015 at 02:48:02PM -0800, Matt Turner wrote:
  On Sun, Feb 8, 2015 at 1:59 PM, Ben Widawsky
  benjamin.widaw...@intel.com wrote:
   +   /* The LOAD_PAYLOAD helper seems like the obvious choice here. 
   However, it
   +* requires a lot of information about the sources to appropriately 
   figure
   +* out the number of registers needed to be used. Given this stage 
   in our
   +* optimization, we may not have the appropriate GRFs required by
   +* LOAD_PAYLOAD at this point (copy propogation). Therefore, we need 
   to
 
  typo: propagation
 
  I'm not sure what w e may not have the appropriate GRFs ... means?
 
  Here is the relevant part of the original IRC conversation:
  jekstrand   [08:52:30] No, the problem is uniforms and immediates.
  jekstrand   [08:52:58] They can't go in a LOAD_PAYLOAD directly because 
  we don't know how many destination registers they take up.
  jekstrand   [08:54:16] for LOAD_PAYLOAD to work, we need more 
  information than a regular instruction.  We need to know how many 
  destination registers a given source takes up, we need to know whether it 
  needs to use the second-half quarter control for the MOV that gets 
  generated, etc.
  jekstrand   [08:54:34] Using GRF sources more-or-less gives us this.  
  Immediates don't.
  bwidawks[08:54:55] right - this is what confuses me though... the 
  immediates seem to already be there.
  jekstrand   [08:55:38] Right.  The immediates can get there through 
  copy-propagation and that's fine.  However, they're not there when it's 
  created.
  jekstrand   [08:55:43] It's all a mess
 
  Do you have a preferred way to state this concisely?
 
 Heh, I'm not sure I understand LOAD_PAYLOAD anymore. The comment's
 probably fine as-is.
 
   @@ -3609,6 +3709,7 @@ fs_visitor::optimize()
  OPT(opt_peephole_predicated_break);
  OPT(opt_cmod_propagation);
  OPT(dead_code_eliminate);
   +  OPT(opt_sampler_eot);
 
  Do you think we really need to do this in the optimization loop?
 
  I don't expect this to allow other optimization passes to make
  additional progress, and we can obviously do it successfully only
  once. I suspect we can do it after the optimization loop.
 
 
  It's possible I didn't quite spot where you want me to put the 
  optimization. I
  think that the way the code works right now, that will not work. The
  optimization is depending on DCE to kill off the only LOAD_PAYLOAD and it's
  corresponding MOVs. I agree that it is an optimization that can only occur 
  once,
  and generally it doesn't belong in the progress loop though.
 
 Ah, sorry. I'd probably do it between the end of the optimization loop
 and the call to lower_load_payload().

Right. I looked at this too and at least from the 5 second glance at the code,
it seems this path can skip DCE. If you are certain that this cannot happen, I
will gladly make the change.

I defer to you regarding whether or not the optimizations can do more after this
happens (ie. we definitely want to take it out of the loop).
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [Bug 89586] Drivers/DRI/swrast

https://bugs.freedesktop.org/show_bug.cgi?id=89586

Dan Sebald daniel.seb...@ieee.org changed:

   What|Removed |Added

 Attachment #114591|0   |1
is obsolete||

--- Comment #35 from Dan Sebald daniel.seb...@ieee.org ---
Created attachment 114619
  -- https://bugs.freedesktop.org/attachment.cgi?id=114619action=edit
Piglit pixelzoom test suite

Attached is an update to the piglit glPixelZoom()/glDrawPixels() tests.  I've
added a red background before the tests are run so that it's obvious when the
driver hasn't written to the frame buffer.  I also fixed a bug in the
over/underrun tests.  I won't update the summary of images right now.

-- 
You are receiving this mail because:
You are the QA Contact for the bug.
You are the assignee for the bug.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [Bug 89759] WebGL OGL ES GLSL conformance test with mesa drivers fails

https://bugs.freedesktop.org/show_bug.cgi?id=89759

--- Comment #5 from Ilia Mirkin imir...@alum.mit.edu ---
FTR, the issue is that before the copy prop we have

DCL TEMP[1..4], ARRAY(1), LOCAL
DCL TEMP[5..8], ARRAY(2), LOCAL
DCL TEMP[9..12], ARRAY(3), LOCAL
DCL TEMP[13..16], ARRAY(4), LOCAL
DCL TEMP[17..20], ARRAY(5), LOCAL
DCL TEMP[21..24], ARRAY(6), LOCAL
DCL TEMP[25..26], LOCAL

 28: MOV TEMP[25].x, TEMP[14].
 30: ADD TEMP[14].x, TEMP[14]., IMM[0].
 46: FSEQ TEMP[26].x, TEMP[25]., IMM[0].

And the copy propagation pass decides to copy-prop TEMP[14] into TEMP[25] on
line 46. This ends poorly. Perhaps because file == PROGRAM_ARRAY isn't properly
supported by the pass? Not sure. Sent a patch to just remove.

-- 
You are receiving this mail because:
You are the QA Contact for the bug.
You are the assignee for the bug.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH] st/mesa: remove copy-propagation pass

2015-03-25 Thread Marek Olšák

Reviewed-by: Marek Olšák marek.ol...@amd.com

I might need to wait for other people's opinion too.

Marek

On Wed, Mar 25, 2015 at 6:34 PM, Ilia Mirkin imir...@alum.mit.edu wrote:
 It's buggy and unnecessary in the presence of optimizing backends. The
 only backend that will suffer is nv30, but... meh.

 Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=89759
 Signed-off-by: Ilia Mirkin imir...@alum.mit.edu
 ---
  src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 199 
 -
  1 file changed, 199 deletions(-)

 diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp 
 b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
 index efee4b2..0402ce3 100644
 --- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
 +++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
 @@ -461,7 +461,6 @@ public:
 int get_last_temp_read(int index);
 int get_last_temp_write(int index);

 -   void copy_propagate(void);
 int eliminate_dead_code(void);

 void merge_two_dsts(void);
 @@ -3757,203 +3756,6 @@ glsl_to_tgsi_visitor::get_last_temp_write(int index)
  }

  /*
 - * On a basic block basis, tracks available PROGRAM_TEMPORARY register
 - * channels for copy propagation and updates following instructions to
 - * use the original versions.
 - *
 - * The glsl_to_tgsi_visitor lazily produces code assuming that this pass
 - * will occur.  As an example, a TXP production before this pass:
 - *
 - * 0: MOV TEMP[1], INPUT[4].xyyy;
 - * 1: MOV TEMP[1].w, INPUT[4].;
 - * 2: TXP TEMP[2], TEMP[1], texture[0], 2D;
 - *
 - * and after:
 - *
 - * 0: MOV TEMP[1], INPUT[4].xyyy;
 - * 1: MOV TEMP[1].w, INPUT[4].;
 - * 2: TXP TEMP[2], INPUT[4].xyyw, texture[0], 2D;
 - *
 - * which allows for dead code elimination on TEMP[1]'s writes.
 - */
 -void
 -glsl_to_tgsi_visitor::copy_propagate(void)
 -{
 -   glsl_to_tgsi_instruction **acp = rzalloc_array(mem_ctx,
 -  glsl_to_tgsi_instruction *,
 -  this-next_temp * 4);
 -   int *acp_level = rzalloc_array(mem_ctx, int, this-next_temp * 4);
 -   int level = 0;
 -
 -   foreach_in_list(glsl_to_tgsi_instruction, inst, this-instructions) {
 -  assert(inst-dst[0].file != PROGRAM_TEMPORARY
 - || inst-dst[0].index  this-next_temp);
 -
 -  /* First, do any copy propagation possible into the src regs. */
 -  for (int r = 0; r  3; r++) {
 - glsl_to_tgsi_instruction *first = NULL;
 - bool good = true;
 - int acp_base = inst-src[r].index * 4;
 -
 - if (inst-src[r].file != PROGRAM_TEMPORARY ||
 - inst-src[r].reladdr ||
 - inst-src[r].reladdr2)
 -continue;
 -
 - /* See if we can find entries in the ACP consisting of MOVs
 -  * from the same src register for all the swizzled channels
 -  * of this src register reference.
 -  */
 - for (int i = 0; i  4; i++) {
 -int src_chan = GET_SWZ(inst-src[r].swizzle, i);
 -glsl_to_tgsi_instruction *copy_chan = acp[acp_base + src_chan];
 -
 -if (!copy_chan) {
 -   good = false;
 -   break;
 -}
 -
 -assert(acp_level[acp_base + src_chan] = level);
 -
 -if (!first) {
 -   first = copy_chan;
 -} else {
 -   if (first-src[0].file != copy_chan-src[0].file ||
 -   first-src[0].index != copy_chan-src[0].index ||
 -   first-src[0].index2D != copy_chan-src[0].index2D) {
 -  good = false;
 -  break;
 -   }
 -}
 - }
 -
 - if (good) {
 -/* We've now validated that we can copy-propagate to
 - * replace this src register reference.  Do it.
 - */
 -inst-src[r].file = first-src[0].file;
 -inst-src[r].index = first-src[0].index;
 -inst-src[r].index2D = first-src[0].index2D;
 -inst-src[r].has_index2 = first-src[0].has_index2;
 -
 -int swizzle = 0;
 -for (int i = 0; i  4; i++) {
 -   int src_chan = GET_SWZ(inst-src[r].swizzle, i);
 -   glsl_to_tgsi_instruction *copy_inst = acp[acp_base + 
 src_chan];
 -   swizzle |= (GET_SWZ(copy_inst-src[0].swizzle, src_chan)  
 (3 * i));
 -}
 -inst-src[r].swizzle = swizzle;
 - }
 -  }
 -
 -  switch (inst-op) {
 -  case TGSI_OPCODE_BGNLOOP:
 -  case TGSI_OPCODE_ENDLOOP:
 - /* End of a basic block, clear the ACP entirely. */
 - memset(acp, 0, sizeof(*acp) * this-next_temp * 4);
 - break;
 -
 -  case TGSI_OPCODE_IF:
 -  case TGSI_OPCODE_UIF:
 - ++level;
 - break;
 -
 -  case TGSI_OPCODE_ENDIF:
 -  case TGSI_OPCODE_ELSE:
 - /* Clear all channels written inside the block from the ACP, but
 -  * leaving those that were not touched.
 -

Re: [Mesa-dev] [PATCH] st/mesa: remove copy-propagation pass

2015-03-25 Thread Brian Paul

The problem is, our binary shader interface only supports 32 temps at 
this time.  We sometimes bump into that limit as-is.


-Brian

On 03/25/2015 01:04 PM, Ilia Mirkin wrote:

Yes, more temp registers and more instructions. But presumably the
backend has an optimization pass that is at least as good as this one
(hopefully better!). Is that not the case for vmware?

On Wed, Mar 25, 2015 at 2:59 PM, Brian Paul bri...@vmware.com wrote:

Will removing this pass have much effect on the number of temp regs used?
It looks like more instructions may be emitted w/out this pass.

We're kind of sensitive to that in the VMware driver.

-Brian

On 03/25/2015 12:16 PM, Marek Olšák wrote:


Reviewed-by: Marek Olšák marek.ol...@amd.com

I might need to wait for other people's opinion too.

Marek

On Wed, Mar 25, 2015 at 6:34 PM, Ilia Mirkin imir...@alum.mit.edu wrote:


It's buggy and unnecessary in the presence of optimizing backends. The
only backend that will suffer is nv30, but... meh.

Bugzilla:
https://urldefense.proofpoint.com/v2/url?u=https-3A__bugs.freedesktop.org_show-5Fbug.cgi-3Fid-3D89759d=AwIGaQc=Sqcl0Ez6M0X8aeM67LKIiDJAXVeAw-YihVMNtXt-uEsr=T0t4QG7chq2ZwJo6wilkFznRSFy-8uDKartPGbomVj8m=hW65RavQ_Xuvw96f61daCkas_SjeEudtADNX3BzgNQUs=zjWC0LOuYp8NH6K072ITDgPYCCE0F_a_LCdd9zrdrhAe=

Signed-off-by: Ilia Mirkin imir...@alum.mit.edu
---
   src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 199
-
   1 file changed, 199 deletions(-)

diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
index efee4b2..0402ce3 100644
--- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
+++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
@@ -461,7 +461,6 @@ public:
  int get_last_temp_read(int index);
  int get_last_temp_write(int index);

-   void copy_propagate(void);
  int eliminate_dead_code(void);

  void merge_two_dsts(void);
@@ -3757,203 +3756,6 @@ glsl_to_tgsi_visitor::get_last_temp_write(int
index)
   }

   /*
- * On a basic block basis, tracks available PROGRAM_TEMPORARY register
- * channels for copy propagation and updates following instructions to
- * use the original versions.
- *
- * The glsl_to_tgsi_visitor lazily produces code assuming that this pass
- * will occur.  As an example, a TXP production before this pass:
- *
- * 0: MOV TEMP[1], INPUT[4].xyyy;
- * 1: MOV TEMP[1].w, INPUT[4].;
- * 2: TXP TEMP[2], TEMP[1], texture[0], 2D;
- *
- * and after:
- *
- * 0: MOV TEMP[1], INPUT[4].xyyy;
- * 1: MOV TEMP[1].w, INPUT[4].;
- * 2: TXP TEMP[2], INPUT[4].xyyw, texture[0], 2D;
- *
- * which allows for dead code elimination on TEMP[1]'s writes.
- */
-void
-glsl_to_tgsi_visitor::copy_propagate(void)
-{
-   glsl_to_tgsi_instruction **acp = rzalloc_array(mem_ctx,
-
glsl_to_tgsi_instruction *,
-  this-next_temp * 4);
-   int *acp_level = rzalloc_array(mem_ctx, int, this-next_temp * 4);
-   int level = 0;
-
-   foreach_in_list(glsl_to_tgsi_instruction, inst, this-instructions)
{
-  assert(inst-dst[0].file != PROGRAM_TEMPORARY
- || inst-dst[0].index  this-next_temp);
-
-  /* First, do any copy propagation possible into the src regs. */
-  for (int r = 0; r  3; r++) {
- glsl_to_tgsi_instruction *first = NULL;
- bool good = true;
- int acp_base = inst-src[r].index * 4;
-
- if (inst-src[r].file != PROGRAM_TEMPORARY ||
- inst-src[r].reladdr ||
- inst-src[r].reladdr2)
-continue;
-
- /* See if we can find entries in the ACP consisting of MOVs
-  * from the same src register for all the swizzled channels
-  * of this src register reference.
-  */
- for (int i = 0; i  4; i++) {
-int src_chan = GET_SWZ(inst-src[r].swizzle, i);
-glsl_to_tgsi_instruction *copy_chan = acp[acp_base +
src_chan];
-
-if (!copy_chan) {
-   good = false;
-   break;
-}
-
-assert(acp_level[acp_base + src_chan] = level);
-
-if (!first) {
-   first = copy_chan;
-} else {
-   if (first-src[0].file != copy_chan-src[0].file ||
-   first-src[0].index != copy_chan-src[0].index ||
-   first-src[0].index2D != copy_chan-src[0].index2D) {
-  good = false;
-  break;
-   }
-}
- }
-
- if (good) {
-/* We've now validated that we can copy-propagate to
- * replace this src register reference.  Do it.
- */
-inst-src[r].file = first-src[0].file;
-inst-src[r].index = first-src[0].index;
-inst-src[r].index2D = first-src[0].index2D;
-inst-src[r].has_index2 = first-src[0].has_index2;
-
-int swizzle = 0;
-for (int i = 0; i  4; i++) {
-   int src_chan = GET_SWZ(inst-src[r].swizzle, i);

[Mesa-dev] [PATCH 1/2 v2] mesa: Validate target before resolving tex obj in glTex(ture)SubImageXD

2015-03-25 Thread Eduardo Lima Mitev

Currently, glTexSubImageXD attempt to resolve the texture object
(by calling _mesa_get_current_tex_object()) before validating the given
target. However, that method explicitly states that target must have been
validated before calling it, so it never returns a user error.

The target validation occurs later when texsubimage_error_check() is called.

This patch reorganizes target validation, taking it out from the error check
function and into a point before the texture object is resolved.
---
 src/mesa/main/teximage.c | 29 ++---
 1 file changed, 14 insertions(+), 15 deletions(-)

diff --git a/src/mesa/main/teximage.c b/src/mesa/main/teximage.c
index 8d9d7cf..17c9f56 100644
--- a/src/mesa/main/teximage.c
+++ b/src/mesa/main/teximage.c
@@ -2478,13 +2478,6 @@ texsubimage_error_check(struct gl_context *ctx, GLuint 
dimensions,
   return GL_TRUE;
}
 
-   /* check target (proxies not allowed) */
-   if (!legal_texsubimage_target(ctx, dimensions, target, dsa)) {
-  _mesa_error(ctx, GL_INVALID_ENUM, %s(target=%s),
-  callerName, _mesa_lookup_enum_by_nr(target));
-  return GL_TRUE;
-   }
-
/* level check */
if (level  0 || level = _mesa_max_texture_levels(ctx, target)) {
   _mesa_error(ctx, GL_INVALID_VALUE, %s(level=%d), callerName, level);
@@ -3512,14 +3505,6 @@ _mesa_texture_sub_image(struct gl_context *ctx, GLuint 
dims,
 {
FLUSH_VERTICES(ctx, 0);
 
-   /* check target (proxies not allowed) */
-   if (!legal_texsubimage_target(ctx, dims, target, dsa)) {
-  _mesa_error(ctx, GL_INVALID_ENUM, glTex%sSubImage%uD(target=%s),
-  dsa ? ture : ,
-  dims, _mesa_lookup_enum_by_nr(target));
-  return;
-   }
-
if (ctx-NewState  _NEW_PIXEL)
   _mesa_update_state(ctx);
 
@@ -3569,6 +3554,13 @@ texsubimage(struct gl_context *ctx, GLuint dims, GLenum 
target, GLint level,
struct gl_texture_object *texObj;
struct gl_texture_image *texImage;
 
+   /* check target (proxies not allowed) */
+   if (!legal_texsubimage_target(ctx, dims, target, false)) {
+  _mesa_error(ctx, GL_INVALID_ENUM, glTexSubImage%uD(target=%s),
+  dims, _mesa_lookup_enum_by_nr(target));
+  return;
+   }
+
texObj = _mesa_get_current_tex_object(ctx, target);
if (!texObj)
   return;
@@ -3629,6 +3621,13 @@ texturesubimage(struct gl_context *ctx, GLuint dims,
   return;
}
 
+   /* check target (proxies not allowed) */
+   if (!legal_texsubimage_target(ctx, dims, texObj-Target, true)) {
+  _mesa_error(ctx, GL_INVALID_ENUM, %s(target=%s),
+  callerName, _mesa_lookup_enum_by_nr(texObj-Target));
+  return;
+   }
+
if (texsubimage_error_check(ctx, dims, texObj, texObj-Target, level,
xoffset, yoffset, zoffset,
width, height, depth, format, type,
-- 
2.1.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 2/2 v2] mesa: Add missing check of format and type in glTexSubImageXD on GLES 3.0

2015-03-25 Thread Eduardo Lima Mitev

Argument validation for glTexSubImageXD is missing a check of format and type
against texture object's internal format when profile is OpenGL-ES 3.0+.

This patch also groups together all format and type checks into a single
block of code for clarity.

Fixes 2 dEQP tests:
* dEQP-GLES3.functional.negative_api.texture.texsubimage2d
* dEQP-GLES3.functional.negative_api.texture.texsubimage3d
---
 src/mesa/main/teximage.c | 57 ++--
 1 file changed, 36 insertions(+), 21 deletions(-)

diff --git a/src/mesa/main/teximage.c b/src/mesa/main/teximage.c
index 17c9f56..ffa0fd1 100644
--- a/src/mesa/main/teximage.c
+++ b/src/mesa/main/teximage.c
@@ -2484,19 +2484,12 @@ texsubimage_error_check(struct gl_context *ctx, GLuint 
dimensions,
   return GL_TRUE;
}
 
-   /* OpenGL ES 1.x and OpenGL ES 2.0 impose additional restrictions on the
-* combinations of format and type that can be used.  Formats and types
-* that require additional extensions (e.g., GL_FLOAT requires
-* GL_OES_texture_float) are filtered elsewhere.
-*/
-   if (_mesa_is_gles(ctx)  !_mesa_is_gles3(ctx)) {
-  err = _mesa_es_error_check_format_and_type(format, type, dimensions);
-  if (err != GL_NO_ERROR) {
- _mesa_error(ctx, err, %s(format = %s, type = %s),
- callerName, _mesa_lookup_enum_by_nr(format),
- _mesa_lookup_enum_by_nr(type));
- return GL_TRUE;
-  }
+   texImage = _mesa_select_tex_image(texObj, target, level);
+   if (!texImage) {
+  /* non-existant texture level */
+  _mesa_error(ctx, GL_INVALID_OPERATION, %s(invalid texture image),
+  callerName);
+  return GL_TRUE;
}
 
err = _mesa_error_check_format_and_type(ctx, format, type);
@@ -2508,6 +2501,36 @@ texsubimage_error_check(struct gl_context *ctx, GLuint 
dimensions,
   return GL_TRUE;
}
 
+   /* OpenGL ES impose additional restrictions on the combinations of format
+* and type that can be used. Formats and types that require additional
+* extensions (e.g., GL_FLOAT requires GL_OES_texture_float) are filtered
+* elsewhere.
+*/
+   if (_mesa_is_gles(ctx)) {
+  if (!_mesa_is_gles3(ctx)) {
+ err = _mesa_es_error_check_format_and_type(format, type, dimensions);
+ if (err != GL_NO_ERROR) {
+_mesa_error(ctx, err, %s(format = %s, type = %s),
+callerName, _mesa_lookup_enum_by_nr(format),
+_mesa_lookup_enum_by_nr(type));
+return GL_TRUE;
+ }
+  }
+  else {
+ err = _mesa_es3_error_check_format_and_type(ctx, format, type,
+ texImage-InternalFormat);
+ if (err != GL_NO_ERROR) {
+_mesa_error(ctx, err,
+%s(incompatible format = %s, type = %s, 
+internalformat = %s),
+callerName, _mesa_lookup_enum_by_nr(format),
+_mesa_lookup_enum_by_nr(type),
+_mesa_lookup_enum_by_nr(texImage-InternalFormat));
+return GL_TRUE;
+ }
+  }
+   }
+
/* validate the bound PBO, if any */
if (!_mesa_validate_pbo_source(ctx, dimensions, ctx-Unpack,
   width, height, depth, format, type,
@@ -2515,14 +2538,6 @@ texsubimage_error_check(struct gl_context *ctx, GLuint 
dimensions,
   return GL_TRUE;
}
 
-   texImage = _mesa_select_tex_image(texObj, target, level);
-   if (!texImage) {
-  /* non-existant texture level */
-  _mesa_error(ctx, GL_INVALID_OPERATION, %s(invalid texture image),
-  callerName);
-  return GL_TRUE;
-   }
-
if (error_check_subtexture_dimensions(ctx, dimensions,
  texImage, xoffset, yoffset, zoffset,
  width, height, depth, callerName)) {
-- 
2.1.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH] gallivm: use llvm function calls for texturing instead of inlining

2015-03-25 Thread sroland

From: Roland Scheidegger srol...@vmware.com

There are issues with inlining everything, most notably llvm will use much
more memory (and be slower) when compiling. Ideally we'd probably use
functions for shader functions too but texture sampling usually is responsible
for quite some IR (it can easily reach 80% of total IR instructions) so this
seems like a good start.
This still generates a different function for all different combinations just
like before, however it is possible llvm is missing some optimization
opportunities - it is believed though such opportunities should be somewhat
rare, but at least for now it can still be switched off (at compile time only).
It should probably make compiled code also smaller because the same function
should be used for different variants in the same module (so for the
opaque/partial or linear/elts variants).
No piglit change (though it does indeed speed up unrealistic tests like
fp-indirections2 by a factor of 30 or so).
Has a small negative performance impact in openarena - I suspect this could
be fixed by running some IPO passes (despite the private linkage, llvm right
now does NO optimization at all wrt anything going past the call, even if
there's just one caller - so things like values stored before the call and then
always written by the function etc. will not be optimized away, nor will dead
arguments (which we mostly shouldn't have) be eliminated, always constant
arguments promoted etc.).

v2: use proper return values instead of pointer function arguments.
llvm supports aggregate return values, which do wonders here eliminating
unnecessary stack variables - everything in fact will be returned in registers
even without any IPO optimizations. It makes the code simpler too.
With this I could not measure a peformance impact in openarena any longer
(though since there's still no constant value propagation etc. into the tex
functions this does not mean it couldn't have a negative impact elsewhere).
---
 src/gallium/auxiliary/gallivm/lp_bld_init.c   |  23 ++
 src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c | 418 +-
 2 files changed, 423 insertions(+), 18 deletions(-)

diff --git a/src/gallium/auxiliary/gallivm/lp_bld_init.c 
b/src/gallium/auxiliary/gallivm/lp_bld_init.c
index 6133883..ee23ea0 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_init.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_init.c
@@ -113,6 +113,10 @@ create_pass_manager(struct gallivm_state *gallivm)
gallivm-passmgr = LLVMCreateFunctionPassManagerForModule(gallivm-module);
if (!gallivm-passmgr)
   return FALSE;
+   /*
+* FIXME: probably would need a per module pass manager (with some IPO
+* passes) to optimize the quite bad looking texture function calls.
+*/
 
// Old versions of LLVM get the DataLayout from the pass manager.
LLVMAddTargetData(gallivm-target, gallivm-passmgr);
@@ -575,6 +579,25 @@ gallivm_jit_function(struct gallivm_state *gallivm,
jit_func = pointer_to_func(code);
 
if (gallivm_debug  GALLIVM_DEBUG_ASM) {
+  /*
+   * XXX hack: we can only disassemble functions after compiling the
+   * module, however we've got no idea what texture functions we generated.
+   * Hence, get all functions in the module and print all matching some
+   * pattern. (Because this is triggered per function and not per module,
+   * this will of course print the texture functions each time
+   * gallivm_jit_function is invoked, not just once per module.)
+   */
+  LLVMValueRef llvm_func = LLVMGetFirstFunction(gallivm-module);
+
+  while (llvm_func) {
+ if (!util_strncmp(texfunc, LLVMGetValueName(llvm_func), 7)) {
+void *texfunc_code = LLVMGetPointerToGlobal(gallivm-engine, 
llvm_func);
+lp_disassemble(llvm_func, texfunc_code);
+ }
+ llvm_func = LLVMGetNextFunction(llvm_func);
+  }
+   }
+   if (gallivm_debug  GALLIVM_DEBUG_ASM) {
   lp_disassemble(func, code);
}
 
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c 
b/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c
index a90278e..c91ae59 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c
@@ -2357,30 +2357,30 @@ lp_build_sample_nop(struct gallivm_state *gallivm,
 
 
 /**
- * Build texture sampling code.
+ * Build the actual texture sampling code.
  * 'texel' will return a vector of four LLVMValueRefs corresponding to
  * R, G, B, A.
  * \param type  vector float type to use for coords, etc.
  * \param is_fetch  if this is a texel fetch instruction.
  * \param derivs  partial derivatives of (s,t,r,q) with respect to x and y
  */
-void
-lp_build_sample_soa(struct gallivm_state *gallivm,
-const struct lp_static_texture_state *static_texture_state,
-const struct lp_static_sampler_state *static_sampler_state,
-struct lp_sampler_dynamic_state *dynamic_state,
-

Re: [Mesa-dev] [PATCH 4/8] scons: Don't build osmesa.

On 25/03/15 15:21, Jose Fonseca wrote:
On 25/03/15 14:53, Emil Velikov wrote:
On 24 March 2015 at 21:16, Jose Fonseca jfons...@vmware.com wrote:
There doesn't seem much interest on osmesa on Windows, particularly
classic osmesa.

If there is indeed interest in osmesa on Windows, we should instead
integrate src/gallium/targets/osmesa into SCons.
Afaict the Octave people still use it. There was a guy in #dri-devel
who was having issues with the Windows build not too long ago [1]

Iirc the VTK folk are were using osmesa, although I'm not sure if (how
much) Windows support is a thing for them. Hence the presence of the
scons build.

All of that is more of jfyi rather than feeling sentimental about
nuking it :)
-Emil

Thanks for the info Emil.

The osmesa I removed from scons was the one with classic sw rasterizer.

That's precisely the one they are using. Although I'll add a note in the
release notes (update the rest of the docs) so that there is no funny
surprises :-)

The osmesa w/ softpipe/llvmpipe is in src/gallium/targets/osmesa , but
was never integrated into scons build (just autotools.)

I see your concern and I'm not trying to force anything on you.

Fwiw if I have to choose between scons and cmake I would go for the
latter. Yet considering the amount of work/time required, I'd assume
that fixing the autotools bugs would be the faster route. If you're ok
with the idea, feel free to submit bugs and I'll check them over.

Cheers
Emil

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH] clover: Update the wait_count of the correct event when chaining events

2015-03-25 Thread Tom Stellard

Cc: 10.5 10.4 mesa-sta...@lists.freedesktop.org
---
 src/gallium/state_trackers/clover/core/event.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/gallium/state_trackers/clover/core/event.cpp 
b/src/gallium/state_trackers/clover/core/event.cpp
index 58de888..9d78b48 100644
--- a/src/gallium/state_trackers/clover/core/event.cpp
+++ b/src/gallium/state_trackers/clover/core/event.cpp
@@ -67,7 +67,7 @@ event::signalled() const {
 void
 event::chain(event ev) {
if (wait_count) {
-  ev.wait_count++;
+  wait_count++;
   _chain.push_back(ev);
}
ev.deps.push_back(*this);
-- 
2.0.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH] st/mesa: remove copy-propagation pass

Yes, more temp registers and more instructions. But presumably the
backend has an optimization pass that is at least as good as this one
(hopefully better!). Is that not the case for vmware?

On Wed, Mar 25, 2015 at 2:59 PM, Brian Paul bri...@vmware.com wrote:
 Will removing this pass have much effect on the number of temp regs used?
 It looks like more instructions may be emitted w/out this pass.

 We're kind of sensitive to that in the VMware driver.

 -Brian

 On 03/25/2015 12:16 PM, Marek Olšák wrote:

 Reviewed-by: Marek Olšák marek.ol...@amd.com

 I might need to wait for other people's opinion too.

 Marek

 On Wed, Mar 25, 2015 at 6:34 PM, Ilia Mirkin imir...@alum.mit.edu wrote:

 It's buggy and unnecessary in the presence of optimizing backends. The
 only backend that will suffer is nv30, but... meh.

 Bugzilla:
 https://urldefense.proofpoint.com/v2/url?u=https-3A__bugs.freedesktop.org_show-5Fbug.cgi-3Fid-3D89759d=AwIGaQc=Sqcl0Ez6M0X8aeM67LKIiDJAXVeAw-YihVMNtXt-uEsr=T0t4QG7chq2ZwJo6wilkFznRSFy-8uDKartPGbomVj8m=hW65RavQ_Xuvw96f61daCkas_SjeEudtADNX3BzgNQUs=zjWC0LOuYp8NH6K072ITDgPYCCE0F_a_LCdd9zrdrhAe=

 Signed-off-by: Ilia Mirkin imir...@alum.mit.edu
 ---
   src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 199
 -
   1 file changed, 199 deletions(-)

 diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
 b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
 index efee4b2..0402ce3 100644
 --- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
 +++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
 @@ -461,7 +461,6 @@ public:
  int get_last_temp_read(int index);
  int get_last_temp_write(int index);

 -   void copy_propagate(void);
  int eliminate_dead_code(void);

  void merge_two_dsts(void);
 @@ -3757,203 +3756,6 @@ glsl_to_tgsi_visitor::get_last_temp_write(int
 index)
   }

   /*
 - * On a basic block basis, tracks available PROGRAM_TEMPORARY register
 - * channels for copy propagation and updates following instructions to
 - * use the original versions.
 - *
 - * The glsl_to_tgsi_visitor lazily produces code assuming that this pass
 - * will occur.  As an example, a TXP production before this pass:
 - *
 - * 0: MOV TEMP[1], INPUT[4].xyyy;
 - * 1: MOV TEMP[1].w, INPUT[4].;
 - * 2: TXP TEMP[2], TEMP[1], texture[0], 2D;
 - *
 - * and after:
 - *
 - * 0: MOV TEMP[1], INPUT[4].xyyy;
 - * 1: MOV TEMP[1].w, INPUT[4].;
 - * 2: TXP TEMP[2], INPUT[4].xyyw, texture[0], 2D;
 - *
 - * which allows for dead code elimination on TEMP[1]'s writes.
 - */
 -void
 -glsl_to_tgsi_visitor::copy_propagate(void)
 -{
 -   glsl_to_tgsi_instruction **acp = rzalloc_array(mem_ctx,
 -
 glsl_to_tgsi_instruction *,
 -  this-next_temp * 4);
 -   int *acp_level = rzalloc_array(mem_ctx, int, this-next_temp * 4);
 -   int level = 0;
 -
 -   foreach_in_list(glsl_to_tgsi_instruction, inst, this-instructions)
 {
 -  assert(inst-dst[0].file != PROGRAM_TEMPORARY
 - || inst-dst[0].index  this-next_temp);
 -
 -  /* First, do any copy propagation possible into the src regs. */
 -  for (int r = 0; r  3; r++) {
 - glsl_to_tgsi_instruction *first = NULL;
 - bool good = true;
 - int acp_base = inst-src[r].index * 4;
 -
 - if (inst-src[r].file != PROGRAM_TEMPORARY ||
 - inst-src[r].reladdr ||
 - inst-src[r].reladdr2)
 -continue;
 -
 - /* See if we can find entries in the ACP consisting of MOVs
 -  * from the same src register for all the swizzled channels
 -  * of this src register reference.
 -  */
 - for (int i = 0; i  4; i++) {
 -int src_chan = GET_SWZ(inst-src[r].swizzle, i);
 -glsl_to_tgsi_instruction *copy_chan = acp[acp_base +
 src_chan];
 -
 -if (!copy_chan) {
 -   good = false;
 -   break;
 -}
 -
 -assert(acp_level[acp_base + src_chan] = level);
 -
 -if (!first) {
 -   first = copy_chan;
 -} else {
 -   if (first-src[0].file != copy_chan-src[0].file ||
 -   first-src[0].index != copy_chan-src[0].index ||
 -   first-src[0].index2D != copy_chan-src[0].index2D) {
 -  good = false;
 -  break;
 -   }
 -}
 - }
 -
 - if (good) {
 -/* We've now validated that we can copy-propagate to
 - * replace this src register reference.  Do it.
 - */
 -inst-src[r].file = first-src[0].file;
 -inst-src[r].index = first-src[0].index;
 -inst-src[r].index2D = first-src[0].index2D;
 -inst-src[r].has_index2 = first-src[0].has_index2;
 -
 -int swizzle = 0;
 -for (int i = 0; i  4; i++) {
 -   int src_chan = GET_SWZ(inst-src[r].swizzle, i);
 -   glsl_to_tgsi_instruction *copy_inst = acp[acp_base +

Re: [Mesa-dev] [PATCH] st/mesa: remove copy-propagation pass

2015-03-25 Thread Rob Clark

On Wed, Mar 25, 2015 at 2:16 PM, Marek Olšák mar...@gmail.com wrote:
 Reviewed-by: Marek Olšák marek.ol...@amd.com

 I might need to wait for other people's opinion too.

I guess technically a2xx backend might suffer.. not even really sure
about that, but meh.  We can probably eventually do
tgsi-nir-optimize-tgsi if it was that big of a problem.

Reviewed-by: Rob Clark robdcl...@gmail.com


 Marek

 On Wed, Mar 25, 2015 at 6:34 PM, Ilia Mirkin imir...@alum.mit.edu wrote:
 It's buggy and unnecessary in the presence of optimizing backends. The
 only backend that will suffer is nv30, but... meh.

 Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=89759
 Signed-off-by: Ilia Mirkin imir...@alum.mit.edu
 ---
  src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 199 
 -
  1 file changed, 199 deletions(-)

 diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp 
 b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
 index efee4b2..0402ce3 100644
 --- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
 +++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
 @@ -461,7 +461,6 @@ public:
 int get_last_temp_read(int index);
 int get_last_temp_write(int index);

 -   void copy_propagate(void);
 int eliminate_dead_code(void);

 void merge_two_dsts(void);
 @@ -3757,203 +3756,6 @@ glsl_to_tgsi_visitor::get_last_temp_write(int index)
  }

  /*
 - * On a basic block basis, tracks available PROGRAM_TEMPORARY register
 - * channels for copy propagation and updates following instructions to
 - * use the original versions.
 - *
 - * The glsl_to_tgsi_visitor lazily produces code assuming that this pass
 - * will occur.  As an example, a TXP production before this pass:
 - *
 - * 0: MOV TEMP[1], INPUT[4].xyyy;
 - * 1: MOV TEMP[1].w, INPUT[4].;
 - * 2: TXP TEMP[2], TEMP[1], texture[0], 2D;
 - *
 - * and after:
 - *
 - * 0: MOV TEMP[1], INPUT[4].xyyy;
 - * 1: MOV TEMP[1].w, INPUT[4].;
 - * 2: TXP TEMP[2], INPUT[4].xyyw, texture[0], 2D;
 - *
 - * which allows for dead code elimination on TEMP[1]'s writes.
 - */
 -void
 -glsl_to_tgsi_visitor::copy_propagate(void)
 -{
 -   glsl_to_tgsi_instruction **acp = rzalloc_array(mem_ctx,
 -  glsl_to_tgsi_instruction 
 *,
 -  this-next_temp * 4);
 -   int *acp_level = rzalloc_array(mem_ctx, int, this-next_temp * 4);
 -   int level = 0;
 -
 -   foreach_in_list(glsl_to_tgsi_instruction, inst, this-instructions) {
 -  assert(inst-dst[0].file != PROGRAM_TEMPORARY
 - || inst-dst[0].index  this-next_temp);
 -
 -  /* First, do any copy propagation possible into the src regs. */
 -  for (int r = 0; r  3; r++) {
 - glsl_to_tgsi_instruction *first = NULL;
 - bool good = true;
 - int acp_base = inst-src[r].index * 4;
 -
 - if (inst-src[r].file != PROGRAM_TEMPORARY ||
 - inst-src[r].reladdr ||
 - inst-src[r].reladdr2)
 -continue;
 -
 - /* See if we can find entries in the ACP consisting of MOVs
 -  * from the same src register for all the swizzled channels
 -  * of this src register reference.
 -  */
 - for (int i = 0; i  4; i++) {
 -int src_chan = GET_SWZ(inst-src[r].swizzle, i);
 -glsl_to_tgsi_instruction *copy_chan = acp[acp_base + src_chan];
 -
 -if (!copy_chan) {
 -   good = false;
 -   break;
 -}
 -
 -assert(acp_level[acp_base + src_chan] = level);
 -
 -if (!first) {
 -   first = copy_chan;
 -} else {
 -   if (first-src[0].file != copy_chan-src[0].file ||
 -   first-src[0].index != copy_chan-src[0].index ||
 -   first-src[0].index2D != copy_chan-src[0].index2D) {
 -  good = false;
 -  break;
 -   }
 -}
 - }
 -
 - if (good) {
 -/* We've now validated that we can copy-propagate to
 - * replace this src register reference.  Do it.
 - */
 -inst-src[r].file = first-src[0].file;
 -inst-src[r].index = first-src[0].index;
 -inst-src[r].index2D = first-src[0].index2D;
 -inst-src[r].has_index2 = first-src[0].has_index2;
 -
 -int swizzle = 0;
 -for (int i = 0; i  4; i++) {
 -   int src_chan = GET_SWZ(inst-src[r].swizzle, i);
 -   glsl_to_tgsi_instruction *copy_inst = acp[acp_base + 
 src_chan];
 -   swizzle |= (GET_SWZ(copy_inst-src[0].swizzle, src_chan)  
 (3 * i));
 -}
 -inst-src[r].swizzle = swizzle;
 - }
 -  }
 -
 -  switch (inst-op) {
 -  case TGSI_OPCODE_BGNLOOP:
 -  case TGSI_OPCODE_ENDLOOP:
 - /* End of a basic block, clear the ACP entirely. */
 - memset(acp, 0, sizeof(*acp) * this-next_temp * 4);
 - break;

Re: [Mesa-dev] [PATCH] st/mesa: remove copy-propagation pass

2015-03-25 Thread Brian Paul

Will removing this pass have much effect on the number of temp regs 
used?  It looks like more instructions may be emitted w/out this pass.


We're kind of sensitive to that in the VMware driver.

-Brian

On 03/25/2015 12:16 PM, Marek Olšák wrote:

Reviewed-by: Marek Olšák marek.ol...@amd.com

I might need to wait for other people's opinion too.

Marek

On Wed, Mar 25, 2015 at 6:34 PM, Ilia Mirkin imir...@alum.mit.edu wrote:

It's buggy and unnecessary in the presence of optimizing backends. The
only backend that will suffer is nv30, but... meh.

Bugzilla: 
https://urldefense.proofpoint.com/v2/url?u=https-3A__bugs.freedesktop.org_show-5Fbug.cgi-3Fid-3D89759d=AwIGaQc=Sqcl0Ez6M0X8aeM67LKIiDJAXVeAw-YihVMNtXt-uEsr=T0t4QG7chq2ZwJo6wilkFznRSFy-8uDKartPGbomVj8m=hW65RavQ_Xuvw96f61daCkas_SjeEudtADNX3BzgNQUs=zjWC0LOuYp8NH6K072ITDgPYCCE0F_a_LCdd9zrdrhAe=
Signed-off-by: Ilia Mirkin imir...@alum.mit.edu
---
  src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 199 -
  1 file changed, 199 deletions(-)

diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp 
b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
index efee4b2..0402ce3 100644
--- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
+++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
@@ -461,7 +461,6 @@ public:
 int get_last_temp_read(int index);
 int get_last_temp_write(int index);

-   void copy_propagate(void);
 int eliminate_dead_code(void);

 void merge_two_dsts(void);
@@ -3757,203 +3756,6 @@ glsl_to_tgsi_visitor::get_last_temp_write(int index)
  }

  /*
- * On a basic block basis, tracks available PROGRAM_TEMPORARY register
- * channels for copy propagation and updates following instructions to
- * use the original versions.
- *
- * The glsl_to_tgsi_visitor lazily produces code assuming that this pass
- * will occur.  As an example, a TXP production before this pass:
- *
- * 0: MOV TEMP[1], INPUT[4].xyyy;
- * 1: MOV TEMP[1].w, INPUT[4].;
- * 2: TXP TEMP[2], TEMP[1], texture[0], 2D;
- *
- * and after:
- *
- * 0: MOV TEMP[1], INPUT[4].xyyy;
- * 1: MOV TEMP[1].w, INPUT[4].;
- * 2: TXP TEMP[2], INPUT[4].xyyw, texture[0], 2D;
- *
- * which allows for dead code elimination on TEMP[1]'s writes.
- */
-void
-glsl_to_tgsi_visitor::copy_propagate(void)
-{
-   glsl_to_tgsi_instruction **acp = rzalloc_array(mem_ctx,
-  glsl_to_tgsi_instruction *,
-  this-next_temp * 4);
-   int *acp_level = rzalloc_array(mem_ctx, int, this-next_temp * 4);
-   int level = 0;
-
-   foreach_in_list(glsl_to_tgsi_instruction, inst, this-instructions) {
-  assert(inst-dst[0].file != PROGRAM_TEMPORARY
- || inst-dst[0].index  this-next_temp);
-
-  /* First, do any copy propagation possible into the src regs. */
-  for (int r = 0; r  3; r++) {
- glsl_to_tgsi_instruction *first = NULL;
- bool good = true;
- int acp_base = inst-src[r].index * 4;
-
- if (inst-src[r].file != PROGRAM_TEMPORARY ||
- inst-src[r].reladdr ||
- inst-src[r].reladdr2)
-continue;
-
- /* See if we can find entries in the ACP consisting of MOVs
-  * from the same src register for all the swizzled channels
-  * of this src register reference.
-  */
- for (int i = 0; i  4; i++) {
-int src_chan = GET_SWZ(inst-src[r].swizzle, i);
-glsl_to_tgsi_instruction *copy_chan = acp[acp_base + src_chan];
-
-if (!copy_chan) {
-   good = false;
-   break;
-}
-
-assert(acp_level[acp_base + src_chan] = level);
-
-if (!first) {
-   first = copy_chan;
-} else {
-   if (first-src[0].file != copy_chan-src[0].file ||
-   first-src[0].index != copy_chan-src[0].index ||
-   first-src[0].index2D != copy_chan-src[0].index2D) {
-  good = false;
-  break;
-   }
-}
- }
-
- if (good) {
-/* We've now validated that we can copy-propagate to
- * replace this src register reference.  Do it.
- */
-inst-src[r].file = first-src[0].file;
-inst-src[r].index = first-src[0].index;
-inst-src[r].index2D = first-src[0].index2D;
-inst-src[r].has_index2 = first-src[0].has_index2;
-
-int swizzle = 0;
-for (int i = 0; i  4; i++) {
-   int src_chan = GET_SWZ(inst-src[r].swizzle, i);
-   glsl_to_tgsi_instruction *copy_inst = acp[acp_base + src_chan];
-   swizzle |= (GET_SWZ(copy_inst-src[0].swizzle, src_chan)  (3 
* i));
-}
-inst-src[r].swizzle = swizzle;
- }
-  }
-
-  switch (inst-op) {
-  case TGSI_OPCODE_BGNLOOP:
-  case TGSI_OPCODE_ENDLOOP:
- /* End of a basic block, clear the ACP entirely.

Re: [Mesa-dev] [PATCH] clover: Update the wait_count of the correct event when chaining events

Tom Stellard thomas.stell...@amd.com writes:

 Cc: 10.5 10.4 mesa-sta...@lists.freedesktop.org
 ---
  src/gallium/state_trackers/clover/core/event.cpp | 2 +-
  1 file changed, 1 insertion(+), 1 deletion(-)

 diff --git a/src/gallium/state_trackers/clover/core/event.cpp 
 b/src/gallium/state_trackers/clover/core/event.cpp
 index 58de888..9d78b48 100644
 --- a/src/gallium/state_trackers/clover/core/event.cpp
 +++ b/src/gallium/state_trackers/clover/core/event.cpp
 @@ -67,7 +67,7 @@ event::signalled() const {
  void
  event::chain(event ev) {
 if (wait_count) {
 -  ev.wait_count++;
 +  wait_count++;

ev0-chain(ev1) establishes a dependency between ev0 and ev1 so that ev1
is signaled after ev0, while doing that only the number of unmet
dependencies of ev1 (wait_count) can be incremented by one, so the code
was already doing the right thing.

_chain.push_back(ev);
 }
 ev.deps.push_back(*this);
 -- 
 2.0.4


signature.asc
Description: PGP signature
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 4/8] scons: Don't build osmesa.

2015-03-25 Thread Jose Fonseca

On 25/03/15 19:35, Emil Velikov wrote:

On 25/03/15 15:21, Jose Fonseca wrote:

On 25/03/15 14:53, Emil Velikov wrote:

On 24 March 2015 at 21:16, Jose Fonseca jfons...@vmware.com wrote:

There doesn't seem much interest on osmesa on Windows, particularly
classic osmesa.

If there is indeed interest in osmesa on Windows, we should instead
integrate src/gallium/targets/osmesa into SCons.

Afaict the Octave people still use it. There was a guy in #dri-devel
who was having issues with the Windows build not too long ago [1]

Iirc the VTK folk are were using osmesa, although I'm not sure if (how
much) Windows support is a thing for them. Hence the presence of the
scons build.

All of that is more of jfyi rather than feeling sentimental about
nuking it :)
-Emil

Thanks for the info Emil.

The osmesa I removed from scons was the one with classic sw rasterizer.

That's precisely the one they are using. Although I'll add a note in the
release notes (update the rest of the docs) so that there is no funny
surprises :-)

The osmesa w/ softpipe/llvmpipe is in src/gallium/targets/osmesa , but
was never integrated into scons build (just autotools.)

I see your concern and I'm not trying to force anything on you.

I'm afraid that getting autotools to work with MSVC would be harder than
writing a completely new build system from scratch. MinGW's doable, but
not MSVC. And we must support MSVC, as MinGW is OK for quick testing has
too many drawbacks to be used for production drivers.

Jose
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH] st/mesa: remove copy-propagation pass

On Wed, Mar 25, 2015 at 4:44 PM, Tom Stellard t...@stellard.net wrote:
 On Wed, Mar 25, 2015 at 04:35:03PM -0400, Ilia Mirkin wrote:
 On Wed, Mar 25, 2015 at 4:27 PM, Dave Airlie airl...@gmail.com wrote:
  On 26 March 2015 at 06:07, Ilia Mirkin imir...@alum.mit.edu wrote:
  So what do you do when someone goes to shadertoy.com which on
  average uses 1000 temps?
 
  Fall over in a heap, like every other driver on shadertoy,

 nouveau (nv50/nvc0) tends to do pretty well. There's one particular
 shader that ends up being shifted over for reasons unknown, but no
 compilation failures. i965 tends to do OK too, although I've seen
 weird rendering artifacts, although those could well be due to the
 shaders relying on unspecified behaviour.

 
  This would be raelly bad for r600g in its present state, since it
  doesn't go TGSI-SB yet, and thus has a TEMP limit that this would
  only make worse.

 D'oh, right, r600 relies on there being no more than 124 or 128
 registers. Forgot about that.

 
  so Nak from me.
 
  Dave.

 So I suppose you want to fix the copy_propagation issue? :)


 Can you add a flag for drivers to disable it?

Well, the present reality is that the pass is broken. I was hoping
that simply disabling it would be an acceptable fix for the issue, but
it sounds like there are still a few use-cases that want it around. I
think that reducing the amount of different TGSI output that st/mesa
can produce would be best, so I'd rather explore other options before
making it an optional pass. Even if I make it optional for nouveau,
it'll still break code when feeding to r600/etc...

  -ilia
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH] st/mesa: remove copy-propagation pass

2015-03-25 Thread Tom Stellard

On Wed, Mar 25, 2015 at 04:35:03PM -0400, Ilia Mirkin wrote:
 On Wed, Mar 25, 2015 at 4:27 PM, Dave Airlie airl...@gmail.com wrote:
  On 26 March 2015 at 06:07, Ilia Mirkin imir...@alum.mit.edu wrote:
  So what do you do when someone goes to shadertoy.com which on
  average uses 1000 temps?
 
  Fall over in a heap, like every other driver on shadertoy,
 
 nouveau (nv50/nvc0) tends to do pretty well. There's one particular
 shader that ends up being shifted over for reasons unknown, but no
 compilation failures. i965 tends to do OK too, although I've seen
 weird rendering artifacts, although those could well be due to the
 shaders relying on unspecified behaviour.
 
 
  This would be raelly bad for r600g in its present state, since it
  doesn't go TGSI-SB yet, and thus has a TEMP limit that this would
  only make worse.
 
 D'oh, right, r600 relies on there being no more than 124 or 128
 registers. Forgot about that.
 
 
  so Nak from me.
 
  Dave.
 
 So I suppose you want to fix the copy_propagation issue? :)
 

Can you add a flag for drivers to disable it?

-Tom

   -ilia
 ___
 mesa-dev mailing list
 mesa-dev@lists.freedesktop.org
 http://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH] nir: Add optional lowering of flrp.

2015-03-25 Thread Eric Anholt

---

I missed this one when I was doing my other lowering bits.  I don't
have ffma, and I certainly don't have flrp.

 src/gallium/drivers/vc4/vc4_program.c | 1 +
 src/glsl/nir/nir.h| 1 +
 src/glsl/nir/nir_opt_algebraic.py | 1 +
 3 files changed, 3 insertions(+)

diff --git a/src/gallium/drivers/vc4/vc4_program.c 
b/src/gallium/drivers/vc4/vc4_program.c
index 26816ca..e768b1a 100644
--- a/src/gallium/drivers/vc4/vc4_program.c
+++ b/src/gallium/drivers/vc4/vc4_program.c
@@ -2005,6 +2005,7 @@ nir_to_qir(struct vc4_compile *c)
 
 static const nir_shader_compiler_options nir_options = {
 .lower_ffma = true,
+.lower_flrp = true,
 .lower_fpow = true,
 .lower_fsat = true,
 .lower_fsqrt = true,
diff --git a/src/glsl/nir/nir.h b/src/glsl/nir/nir.h
index 29fe942..7b886e3 100644
--- a/src/glsl/nir/nir.h
+++ b/src/glsl/nir/nir.h
@@ -1365,6 +1365,7 @@ typedef struct nir_function {
 
 typedef struct nir_shader_compiler_options {
bool lower_ffma;
+   bool lower_flrp;
bool lower_fpow;
bool lower_fsat;
bool lower_fsqrt;
diff --git a/src/glsl/nir/nir_opt_algebraic.py 
b/src/glsl/nir/nir_opt_algebraic.py
index 1ee51a0..20ec4d3 100644
--- a/src/glsl/nir/nir_opt_algebraic.py
+++ b/src/glsl/nir/nir_opt_algebraic.py
@@ -75,6 +75,7 @@ optimizations = [
(('flrp', a, b, 1.0), b),
(('flrp', a, a, b), a),
(('flrp', 0.0, a, b), ('fmul', a, b)),
+   (('flrp', a, b, c), ('fadd', ('fmul', c, ('fsub', b, a)), a), 
'options-lower_flrp'),
(('ffma', a, b, c), ('fadd', ('fmul', a, b), c), 'options-lower_ffma'),
(('fadd', ('fmul', a, b), c), ('ffma', a, b, c), '!options-lower_ffma'),
# Comparison simplifications
-- 
2.1.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH] nir: Add optional lowering of flrp.

2015-03-25 Thread Connor Abbott

Reviewed-by: Connor Abbott cwabbo...@gmail.com

We should probably use this for Gen4-5 too.

On Wed, Mar 25, 2015 at 5:11 PM, Eric Anholt e...@anholt.net wrote:
 ---

 I missed this one when I was doing my other lowering bits.  I don't
 have ffma, and I certainly don't have flrp.

  src/gallium/drivers/vc4/vc4_program.c | 1 +
  src/glsl/nir/nir.h| 1 +
  src/glsl/nir/nir_opt_algebraic.py | 1 +
  3 files changed, 3 insertions(+)

 diff --git a/src/gallium/drivers/vc4/vc4_program.c 
 b/src/gallium/drivers/vc4/vc4_program.c
 index 26816ca..e768b1a 100644
 --- a/src/gallium/drivers/vc4/vc4_program.c
 +++ b/src/gallium/drivers/vc4/vc4_program.c
 @@ -2005,6 +2005,7 @@ nir_to_qir(struct vc4_compile *c)

  static const nir_shader_compiler_options nir_options = {
  .lower_ffma = true,
 +.lower_flrp = true,
  .lower_fpow = true,
  .lower_fsat = true,
  .lower_fsqrt = true,
 diff --git a/src/glsl/nir/nir.h b/src/glsl/nir/nir.h
 index 29fe942..7b886e3 100644
 --- a/src/glsl/nir/nir.h
 +++ b/src/glsl/nir/nir.h
 @@ -1365,6 +1365,7 @@ typedef struct nir_function {

  typedef struct nir_shader_compiler_options {
 bool lower_ffma;
 +   bool lower_flrp;
 bool lower_fpow;
 bool lower_fsat;
 bool lower_fsqrt;
 diff --git a/src/glsl/nir/nir_opt_algebraic.py 
 b/src/glsl/nir/nir_opt_algebraic.py
 index 1ee51a0..20ec4d3 100644
 --- a/src/glsl/nir/nir_opt_algebraic.py
 +++ b/src/glsl/nir/nir_opt_algebraic.py
 @@ -75,6 +75,7 @@ optimizations = [
 (('flrp', a, b, 1.0), b),
 (('flrp', a, a, b), a),
 (('flrp', 0.0, a, b), ('fmul', a, b)),
 +   (('flrp', a, b, c), ('fadd', ('fmul', c, ('fsub', b, a)), a), 
 'options-lower_flrp'),
 (('ffma', a, b, c), ('fadd', ('fmul', a, b), c), 'options-lower_ffma'),
 (('fadd', ('fmul', a, b), c), ('ffma', a, b, c), '!options-lower_ffma'),
 # Comparison simplifications
 --
 2.1.4

 ___
 mesa-dev mailing list
 mesa-dev@lists.freedesktop.org
 http://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH] st/mesa: remove copy-propagation pass

2015-03-25 Thread Dave Airlie

On 26 March 2015 at 06:07, Ilia Mirkin imir...@alum.mit.edu wrote:
 So what do you do when someone goes to shadertoy.com which on
 average uses 1000 temps?

Fall over in a heap, like every other driver on shadertoy,

This would be raelly bad for r600g in its present state, since it
doesn't go TGSI-SB yet, and thus has a TEMP limit that this would
only make worse.

so Nak from me.

Dave.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 0/9] NIR: Several optimization patches

2015-03-25 Thread Matt Turner

On Mon, Mar 23, 2015 at 8:13 PM, Jason Ekstrand ja...@jlekstrand.net wrote:
 There is one notable place where this hurts us.  There are three shaders in
 Kerbal Space Program that now spill.  However, this does not seem to be the
 fault of NIR.  See patch 8 for more details.

I've got an optimization that brings them (#610 and #622) back to
their original instruction count, so I don't think it's a serious
issue regardless.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH] docs: note that classic osmesa/libEGL no longer builds with scons

Plus nuke final note of osmesa from README.WIN32

Signed-off-by: Emil Velikov emil.l.veli...@gmail.com
---
 docs/README.WIN32 | 4 
 docs/relnotes/10.6.0.html | 2 ++
 2 files changed, 2 insertions(+), 4 deletions(-)

diff --git a/docs/README.WIN32 b/docs/README.WIN32
index e0e5b9b..94e1d6f 100644
--- a/docs/README.WIN32
+++ b/docs/README.WIN32
@@ -11,10 +11,6 @@ no longer shipped or supported.
 
 Run
 
-  scons osmesa
-
-to build classic osmesa driver; or
-
   scons libgl-gdi
 
 to build gallium based GDI driver.
diff --git a/docs/relnotes/10.6.0.html b/docs/relnotes/10.6.0.html
index 005..3233637 100644
--- a/docs/relnotes/10.6.0.html
+++ b/docs/relnotes/10.6.0.html
@@ -65,6 +65,8 @@ TBD.
 liRemoved OpenVG support./li
 liRemoved the galahad gallium driver./li
 liRemoved the identity gallium driver./li
+liRemoved the EGL loader from the Windows SCons build./li
+liRemoved the classic osmesa from the Windows SCons build./li
 /ul
 
 /div
-- 
2.1.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH] st/mesa: remove copy-propagation pass

So what do you do when someone goes to shadertoy.com which on
average uses 1000 temps?

On Wed, Mar 25, 2015 at 3:21 PM, Brian Paul bri...@vmware.com wrote:
 The problem is, our binary shader interface only supports 32 temps at this
 time.  We sometimes bump into that limit as-is.

 -Brian


 On 03/25/2015 01:04 PM, Ilia Mirkin wrote:

 Yes, more temp registers and more instructions. But presumably the
 backend has an optimization pass that is at least as good as this one
 (hopefully better!). Is that not the case for vmware?

 On Wed, Mar 25, 2015 at 2:59 PM, Brian Paul bri...@vmware.com wrote:

 Will removing this pass have much effect on the number of temp regs used?
 It looks like more instructions may be emitted w/out this pass.

 We're kind of sensitive to that in the VMware driver.

 -Brian

 On 03/25/2015 12:16 PM, Marek Olšák wrote:


 Reviewed-by: Marek Olšák marek.ol...@amd.com

 I might need to wait for other people's opinion too.

 Marek

 On Wed, Mar 25, 2015 at 6:34 PM, Ilia Mirkin imir...@alum.mit.edu
 wrote:


 It's buggy and unnecessary in the presence of optimizing backends. The
 only backend that will suffer is nv30, but... meh.

 Bugzilla:

 https://urldefense.proofpoint.com/v2/url?u=https-3A__bugs.freedesktop.org_show-5Fbug.cgi-3Fid-3D89759d=AwIGaQc=Sqcl0Ez6M0X8aeM67LKIiDJAXVeAw-YihVMNtXt-uEsr=T0t4QG7chq2ZwJo6wilkFznRSFy-8uDKartPGbomVj8m=hW65RavQ_Xuvw96f61daCkas_SjeEudtADNX3BzgNQUs=zjWC0LOuYp8NH6K072ITDgPYCCE0F_a_LCdd9zrdrhAe=

 Signed-off-by: Ilia Mirkin imir...@alum.mit.edu
 ---
src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 199
 -
1 file changed, 199 deletions(-)

 diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
 b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
 index efee4b2..0402ce3 100644
 --- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
 +++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
 @@ -461,7 +461,6 @@ public:
   int get_last_temp_read(int index);
   int get_last_temp_write(int index);

 -   void copy_propagate(void);
   int eliminate_dead_code(void);

   void merge_two_dsts(void);
 @@ -3757,203 +3756,6 @@ glsl_to_tgsi_visitor::get_last_temp_write(int
 index)
}

/*
 - * On a basic block basis, tracks available PROGRAM_TEMPORARY register
 - * channels for copy propagation and updates following instructions to
 - * use the original versions.
 - *
 - * The glsl_to_tgsi_visitor lazily produces code assuming that this
 pass
 - * will occur.  As an example, a TXP production before this pass:
 - *
 - * 0: MOV TEMP[1], INPUT[4].xyyy;
 - * 1: MOV TEMP[1].w, INPUT[4].;
 - * 2: TXP TEMP[2], TEMP[1], texture[0], 2D;
 - *
 - * and after:
 - *
 - * 0: MOV TEMP[1], INPUT[4].xyyy;
 - * 1: MOV TEMP[1].w, INPUT[4].;
 - * 2: TXP TEMP[2], INPUT[4].xyyw, texture[0], 2D;
 - *
 - * which allows for dead code elimination on TEMP[1]'s writes.
 - */
 -void
 -glsl_to_tgsi_visitor::copy_propagate(void)
 -{
 -   glsl_to_tgsi_instruction **acp = rzalloc_array(mem_ctx,
 -
 glsl_to_tgsi_instruction *,
 -  this-next_temp *
 4);
 -   int *acp_level = rzalloc_array(mem_ctx, int, this-next_temp * 4);
 -   int level = 0;
 -
 -   foreach_in_list(glsl_to_tgsi_instruction, inst,
 this-instructions)
 {
 -  assert(inst-dst[0].file != PROGRAM_TEMPORARY
 - || inst-dst[0].index  this-next_temp);
 -
 -  /* First, do any copy propagation possible into the src regs. */
 -  for (int r = 0; r  3; r++) {
 - glsl_to_tgsi_instruction *first = NULL;
 - bool good = true;
 - int acp_base = inst-src[r].index * 4;
 -
 - if (inst-src[r].file != PROGRAM_TEMPORARY ||
 - inst-src[r].reladdr ||
 - inst-src[r].reladdr2)
 -continue;
 -
 - /* See if we can find entries in the ACP consisting of MOVs
 -  * from the same src register for all the swizzled channels
 -  * of this src register reference.
 -  */
 - for (int i = 0; i  4; i++) {
 -int src_chan = GET_SWZ(inst-src[r].swizzle, i);
 -glsl_to_tgsi_instruction *copy_chan = acp[acp_base +
 src_chan];
 -
 -if (!copy_chan) {
 -   good = false;
 -   break;
 -}
 -
 -assert(acp_level[acp_base + src_chan] = level);
 -
 -if (!first) {
 -   first = copy_chan;
 -} else {
 -   if (first-src[0].file != copy_chan-src[0].file ||
 -   first-src[0].index != copy_chan-src[0].index ||
 -   first-src[0].index2D != copy_chan-src[0].index2D)
 {
 -  good = false;
 -  break;
 -   }
 -}
 - }
 -
 - if (good) {
 -/* We've now validated that we can copy-propagate to
 - * replace this src register reference.  Do it.
 - */
 -inst-src[r].file = first-src[0].file;
 -

Re: [Mesa-dev] [PATCH] st/mesa: remove copy-propagation pass

On Wed, Mar 25, 2015 at 4:27 PM, Dave Airlie airl...@gmail.com wrote:
 On 26 March 2015 at 06:07, Ilia Mirkin imir...@alum.mit.edu wrote:
 So what do you do when someone goes to shadertoy.com which on
 average uses 1000 temps?

 Fall over in a heap, like every other driver on shadertoy,

nouveau (nv50/nvc0) tends to do pretty well. There's one particular
shader that ends up being shifted over for reasons unknown, but no
compilation failures. i965 tends to do OK too, although I've seen
weird rendering artifacts, although those could well be due to the
shaders relying on unspecified behaviour.


 This would be raelly bad for r600g in its present state, since it
 doesn't go TGSI-SB yet, and thus has a TEMP limit that this would
 only make worse.

D'oh, right, r600 relies on there being no more than 124 or 128
registers. Forgot about that.


 so Nak from me.

 Dave.

So I suppose you want to fix the copy_propagation issue? :)

  -ilia
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH] nir: Add optional lowering of flrp.

2015-03-25 Thread Matt Turner

On Wed, Mar 25, 2015 at 2:11 PM, Eric Anholt e...@anholt.net wrote:
 ---

 I missed this one when I was doing my other lowering bits.  I don't
 have ffma, and I certainly don't have flrp.

Reviewed-by: Matt Turner matts...@gmail.com
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [Freedreno] [Mesa-stable] [PATCH 2/2] freedreno/a3xx: fix 3d texture layout

On Wed, Mar 25, 2015 at 5:59 PM, Emil Velikov emil.l.veli...@gmail.com wrote:
 On 15 March 2015 at 23:18, Ilia Mirkin imir...@alum.mit.edu wrote:
 The SZ2 field contains the layer size of a lower miplevel. It only
 contains 4 bits, which limits the maximum layer size it can describe. In
 situations where the next miplevel would be too big, the hardware
 appears to keep minifying the size until it hits one of that size.
 Unfortunately the hardware's ideas about sizes can differ from
 freedreno's which can still lead to issues. Minimize those by stopping
 to minify as soon as possible.

 Signed-off-by: Ilia Mirkin imir...@alum.mit.edu
 Cc: 10.4 10.5 mesa-sta...@lists.freedesktop.org
 ---

 OK, so I'm not *particularly* happy about the hack in setup_slices,
 which is generic code. But I also didn't really see a better way to do
 it. Hopefully this doesn't break 3d on a4xx... not sure what the
 requirements there are.

 Hi Ilia,

 Just a humble ping that this hasn't landed in master yet. Can you let
 me know if it's obsolete, or you're simply looking alternative
 solution.

Just side-tracked. No need to wait on this for doing releases or anything.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 6/6] i965/nir: Use NIR for ARB_vertex_program support on Gen8+.

On Tuesday, March 24, 2015 11:51:53 PM Jordan Justen wrote:
 On 2015-03-23 17:38:00, Kenneth Graunke wrote:
  Everything is already in place; we simply have to take the scalar code
  generation path.  This gives us SIMD8 VS programs, instead of SIMD4x2.
  
  Signed-off-by: Kenneth Graunke kenn...@whitecape.org
  ---
   src/mesa/drivers/dri/i965/brw_vec4.cpp | 14 ++
   1 file changed, 10 insertions(+), 4 deletions(-)
  
  diff --git a/src/mesa/drivers/dri/i965/brw_vec4.cpp 
b/src/mesa/drivers/dri/i965/brw_vec4.cpp
  index 918519c..21de1af 100644
  --- a/src/mesa/drivers/dri/i965/brw_vec4.cpp
  +++ b/src/mesa/drivers/dri/i965/brw_vec4.cpp
  @@ -1823,7 +1823,7 @@ brw_vs_emit(struct brw_context *brw,
  if (unlikely(INTEL_DEBUG  DEBUG_VS))
 brw_dump_ir(vertex, prog, shader-base, c-vp-program.Base);
   
  -   if (prog  brw-gen = 8  brw-scalar_vs) {
  +   if (brw-gen = 8  brw-scalar_vs  (prog || 
getenv(INTEL_USE_NIR))) {
 
 We should be able to leave out the brw-gen = 8 check, right?
 
 -Jordan

Yup - that's never been necessary.  I made a separate patch to delete
that (prior to this one), and went ahead and put your Reviewed-by on it.


signature.asc
Description: This is a digitally signed message part.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH v2] egl/dri2: Fix GCC maybe-uninitialized warning.

On Wed, Mar 25, 2015 at 6:51 PM, Jan Vesely jan.ves...@rutgers.edu wrote:
 On Fri, 2015-03-06 at 23:54 -0800, Vinson Lee wrote:
 egl_dri2.c: In function ‘dri2_bind_tex_image’:
 egl_dri2.c:1240:4: warning: ‘format’ may be used uninitialized in this 
 function [-Wmaybe-uninitialized]
 (*dri2_dpy-tex_buffer-setTexBuffer2)(dri2_ctx-dri_context,
 ^

 Suggested-by: Ilia Mirkin imir...@alum.mit.edu
 Signed-off-by: Vinson Lee v...@freedesktop.org
 ---
  src/egl/drivers/dri2/egl_dri2.c | 6 --
  1 file changed, 4 insertions(+), 2 deletions(-)

 diff --git a/src/egl/drivers/dri2/egl_dri2.c 
 b/src/egl/drivers/dri2/egl_dri2.c
 index d503196..c5c475d 100644
 --- a/src/egl/drivers/dri2/egl_dri2.c
 +++ b/src/egl/drivers/dri2/egl_dri2.c
 @@ -1226,7 +1226,8 @@ dri2_bind_tex_image(_EGLDriver *drv,
format = __DRI_TEXTURE_FORMAT_RGBA;
break;
 default:
 -  assert(0);
 +  _eglError(EGL_BAD_SURFACE, unrecognized format);
 +  return EGL_FALSE;

 does using:
 unreachable(unrecognized format);
 instead of
 assert(0);
 fix the warning?

unreachable is for *truly* unreachable code... it sounded like this
was reachable with bad input.


 }

 switch (dri2_surf-base.TextureTarget) {
 @@ -1234,7 +1235,8 @@ dri2_bind_tex_image(_EGLDriver *drv,
target = GL_TEXTURE_2D;
break;
 default:
 -  assert(0);
 +  _eglError(EGL_BAD_SURFACE, unrecognized target);
 +  return EGL_FALSE;
 }

 (*dri2_dpy-tex_buffer-setTexBuffer2)(dri2_ctx-dri_context,

 --
 Jan Vesely jan.ves...@rutgers.edu

 ___
 mesa-dev mailing list
 mesa-dev@lists.freedesktop.org
 http://lists.freedesktop.org/mailman/listinfo/mesa-dev

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 3/6] i965: Create and use #defines for blitter constraints

On Mon, Mar 23, 2015 at 02:18:30PM +, Neil Roberts wrote:
 Ben Widawsky benjamin.widaw...@intel.com writes:
 
  diff --git a/src/mesa/drivers/dri/i965/intel_blit.h 
  b/src/mesa/drivers/dri/i965/intel_blit.h
  index f563939..531d329 100644
  --- a/src/mesa/drivers/dri/i965/intel_blit.h
  +++ b/src/mesa/drivers/dri/i965/intel_blit.h
  @@ -30,6 +30,9 @@
   
   #include brw_context.h
   
  +#define INTEL_MAX_BLIT_PITCH 32768
  +#define INTEL_MAX_BLIT_ROWS 32768
  +
 
 Isn't the actual maximum 32767 not 32768? I think it would be a lot less
 confusing if we did that and then changed all of the ‘=’ comparisons to
 just ‘’.
 
 - Neil

I can do that, but I'd like to keep the rename and the modification as separate
patches. Though they should bother have no functional impact, I like to keep
rename patches distinct. Is that okay?

-- 
Ben Widawsky, Intel Open Source Technology Center
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH] i965/skl: Don't use the PMA depth stall workaround

The PMA depth stall must be enabled (optimization turned off) under certain
circumstances on gen8. This was supposedly fixed for Gen9, which means we do not
need to check, or toggle the state. The hardware is supposed to enable the
hardware optimization by default, unlike BDW, so we also don't need to set it at
init. For whatever reason this improves stability on ETQW with the bug mentioned
below.

TODO: This patch still needs to be tested on a full piglit run.

References: https://bugs.freedesktop.org/show_bug.cgi?id=89039 (doesn't fix)
Cc: Anuj Phogat anuj.pho...@intel.com
Cc: Eero Tamminen eero.t.tammi...@intel.com
Signed-off-by: Ben Widawsky b...@bwidawsk.net
---
 src/mesa/drivers/dri/i965/gen8_depth_state.c | 7 ++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/src/mesa/drivers/dri/i965/gen8_depth_state.c 
b/src/mesa/drivers/dri/i965/gen8_depth_state.c
index c6494c9..3d126cf 100644
--- a/src/mesa/drivers/dri/i965/gen8_depth_state.c
+++ b/src/mesa/drivers/dri/i965/gen8_depth_state.c
@@ -368,6 +368,10 @@ static void
 gen8_emit_pma_stall_workaround(struct brw_context *brw)
 {
uint32_t bits = 0;
+
+   if (brw-gen = 9)
+  return;
+
if (pma_fix_enable(brw))
   bits |= GEN8_HIZ_NP_PMA_FIX_ENABLE | GEN8_HIZ_NP_EARLY_Z_FAILS_DISABLE;
 
@@ -400,7 +404,8 @@ gen8_hiz_exec(struct brw_context *brw, struct 
intel_mipmap_tree *mt,
   return;
 
/* Disable the PMA stall fix since we're about to do a HiZ operation. */
-   write_pma_stall_bits(brw, 0);
+   if (brw-gen == 8)
+  write_pma_stall_bits(brw, 0);
 
assert(mt-first_level == 0);
assert(mt-logical_depth0 = 1);
-- 
2.3.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH] st_glsl_to_tgsi: only do mov copy propagation on temps (v2)

2015-03-25 Thread Dave Airlie

From: Dave Airlie airl...@redhat.com

Don't propagate ARRAYs

This should fix:
https://bugs.freedesktop.org/show_bug.cgi?id=89759

v2: just specify arrays so we get input propagation
Signed-off-by: Dave Airlie airl...@redhat.com
Cc: mesa-sta...@lists.freedesktop.org
---
 src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp 
b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
index db69a08..b5e6c62 100644
--- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
+++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
@@ -3938,6 +3938,7 @@ glsl_to_tgsi_visitor::copy_propagate(void)
  inst-dst[0].index == inst-src[0].index) 
   !inst-dst[0].reladdr 
   !inst-saturate 
+  inst-src[0].file != PROGRAM_ARRAY 
   !inst-src[0].reladdr 
   !inst-src[0].reladdr2 
   !inst-src[0].negate) {
-- 
2.1.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH] nv50/ir: take postFactor into account when doing peephole optimizations

Multiply operations can have a post-factor on them, which other ops
don't support. Only perform the peephole optimizations when there is no
post-factor involved.

Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=89758
Signed-off-by: Ilia Mirkin imir...@alum.mit.edu
---
 src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp | 12 
 1 file changed, 8 insertions(+), 4 deletions(-)

diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp 
b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
index 6a4ea4e..dc048e6 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
@@ -451,7 +451,9 @@ ConstantFolding::expr(Instruction *i,
 b-data.f32 = 0.0f;
   }
   switch (i-dType) {
-  case TYPE_F32: res.data.f32 = a-data.f32 * b-data.f32; break;
+  case TYPE_F32:
+ res.data.f32 = a-data.f32 * b-data.f32 * exp2f(i-postFactor);
+ break;
   case TYPE_F64: res.data.f64 = a-data.f64 * b-data.f64; break;
   case TYPE_S32:
  if (i-subOp == NV50_IR_SUBOP_MUL_HIGH) {
@@ -579,6 +581,7 @@ ConstantFolding::expr(Instruction *i,
 
i-src(0).mod = Modifier(0);
i-src(1).mod = Modifier(0);
+   i-postFactor = 0;
 
i-setSrc(0, new_ImmediateValue(i-bb-getProgram(), res.data.u32));
i-setSrc(1, NULL);
@@ -682,7 +685,7 @@ ConstantFolding::tryCollapseChainedMULs(Instruction *mul2,
Instruction *insn;
Instruction *mul1 = NULL; // mul1 before mul2
int e = 0;
-   float f = imm2.reg.data.f32;
+   float f = imm2.reg.data.f32 * exp2f(mul2-postFactor);
ImmediateValue imm1;
 
assert(mul2-op == OP_MUL  mul2-dType == TYPE_F32);
@@ -782,9 +785,10 @@ ConstantFolding::opnd(Instruction *i, ImmediateValue 
imm0, int s)
  i-op = OP_MOV;
  i-setSrc(0, new_ImmediateValue(prog, 0u));
  i-src(0).mod = Modifier(0);
+ i-postFactor = 0;
  i-setSrc(1, NULL);
   } else
-  if (imm0.isInteger(1) || imm0.isInteger(-1)) {
+  if (!i-postFactor  (imm0.isInteger(1) || imm0.isInteger(-1))) {
  if (imm0.isNegative())
 i-src(t).mod = i-src(t).mod ^ Modifier(NV50_IR_MOD_NEG);
  i-op = i-src(t).mod.getOp();
@@ -797,7 +801,7 @@ ConstantFolding::opnd(Instruction *i, ImmediateValue imm0, 
int s)
 i-src(0).mod = 0;
  i-setSrc(1, NULL);
   } else
-  if (imm0.isInteger(2) || imm0.isInteger(-2)) {
+  if (!i-postFactor  (imm0.isInteger(2) || imm0.isInteger(-2))) {
  if (imm0.isNegative())
 i-src(t).mod = i-src(t).mod ^ Modifier(NV50_IR_MOD_NEG);
  i-op = OP_ADD;
-- 
2.0.5

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [Mesa-stable] [PATCH 2/2] freedreno/a3xx: fix 3d texture layout

On 15 March 2015 at 23:18, Ilia Mirkin imir...@alum.mit.edu wrote:
 The SZ2 field contains the layer size of a lower miplevel. It only
 contains 4 bits, which limits the maximum layer size it can describe. In
 situations where the next miplevel would be too big, the hardware
 appears to keep minifying the size until it hits one of that size.
 Unfortunately the hardware's ideas about sizes can differ from
 freedreno's which can still lead to issues. Minimize those by stopping
 to minify as soon as possible.

 Signed-off-by: Ilia Mirkin imir...@alum.mit.edu
 Cc: 10.4 10.5 mesa-sta...@lists.freedesktop.org
 ---

 OK, so I'm not *particularly* happy about the hack in setup_slices,
 which is generic code. But I also didn't really see a better way to do
 it. Hopefully this doesn't break 3d on a4xx... not sure what the
 requirements there are.

Hi Ilia,

Just a humble ping that this hasn't landed in master yet. Can you let
me know if it's obsolete, or you're simply looking alternative
solution.

Cheers,
Emil
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH v2] i965/fs: in MAD optimizations, switch last argument to be immediate

On 16 March 2015 at 08:19, Kenneth Graunke kenn...@whitecape.org wrote:
 On Monday, March 16, 2015 10:08:08 AM Tapani Pälli wrote:
 Commit bb33a31 introduced optimizations that transform cases of MAD
 in to simpler forms but it did not take in to account that src[0]
 can not be immediate and did not report progress. Patch switches
 src[0] and src[1] if src[0] is immediate and adds progress
 reporting. If both sources are immediates, this is taken care of by
 the same opt_algebraic pass on later run.

 v2: Fix for all cases, use temporary fs_reg (Matt, Kenneth)

 Signed-off-by: Tapani Pälli tapani.pa...@intel.com
 Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=89569
 Reviewed-by: Francisco Jerez curroje...@riseup.net (v1)
...
 Cc: 10.5 mesa-sta...@lists.freedesktop.org
 Reviewed-by: Kenneth Graunke kenn...@whitecape.org

Hi gents,

Considering that the MAD optimisations mentioned never made it in 10.5
I'm planning to drop this patch.

Thanks
Emil
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH v2] egl/dri2: Fix GCC maybe-uninitialized warning.

2015-03-25 Thread Mark Janes

This was never pushed, so

Reviewed-by: Mark Janes mark.a.ja...@intel.com

Vinson Lee v...@freedesktop.org writes:

 egl_dri2.c: In function ‘dri2_bind_tex_image’:
 egl_dri2.c:1240:4: warning: ‘format’ may be used uninitialized in this 
 function [-Wmaybe-uninitialized]
 (*dri2_dpy-tex_buffer-setTexBuffer2)(dri2_ctx-dri_context,
 ^

 Suggested-by: Ilia Mirkin imir...@alum.mit.edu
 Signed-off-by: Vinson Lee v...@freedesktop.org
 ---
  src/egl/drivers/dri2/egl_dri2.c | 6 --
  1 file changed, 4 insertions(+), 2 deletions(-)

 diff --git a/src/egl/drivers/dri2/egl_dri2.c b/src/egl/drivers/dri2/egl_dri2.c
 index d503196..c5c475d 100644
 --- a/src/egl/drivers/dri2/egl_dri2.c
 +++ b/src/egl/drivers/dri2/egl_dri2.c
 @@ -1226,7 +1226,8 @@ dri2_bind_tex_image(_EGLDriver *drv,
format = __DRI_TEXTURE_FORMAT_RGBA;
break;
 default:
 -  assert(0);
 +  _eglError(EGL_BAD_SURFACE, unrecognized format);
 +  return EGL_FALSE;
 }
  
 switch (dri2_surf-base.TextureTarget) {
 @@ -1234,7 +1235,8 @@ dri2_bind_tex_image(_EGLDriver *drv,
target = GL_TEXTURE_2D;
break;
 default:
 -  assert(0);
 +  _eglError(EGL_BAD_SURFACE, unrecognized target);
 +  return EGL_FALSE;
 }
  
 (*dri2_dpy-tex_buffer-setTexBuffer2)(dri2_ctx-dri_context,
 -- 
 2.3.1

 ___
 mesa-dev mailing list
 mesa-dev@lists.freedesktop.org
 http://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [Bug 89586] Drivers/DRI/swrast

https://bugs.freedesktop.org/show_bug.cgi?id=89586

--- Comment #36 from Dan Sebald daniel.seb...@ieee.org ---
What is the general thought regarding floating point numerical issues and the
OpenGL formulas?  The reason the swrast-legacy driver is failing one the Piglit
pixelzoom tests is that this computation:

 c1 = imageX + (GLint) ceil((spanX + spanWidth - imageX) *
ctx-Pixel.ZoomX);

has numerical issues when ctx-Pixel.ZoomX is fractional and negative.  (Sign
isn't at the heart of the issue, the fact it is fractional is, i.e., -1 
ctx-Pixel.ZoomX  0.)  A few example numbers work out as follows (this is part
of the succession of smaller filled rectangles test):

span.x=160 span.end=400
(c0=-0.00 c1=-119.08)=(-0.00,-119.00)

span.x=160 span.end=400
(c0=-0.00 c1=-117.92)=(-0.00,-117.00)

span.x=160 span.end=400
(c0=-0.00 c1=-116.92)=(-0.00,-116.00)

span.x=160 span.end=400
(c0=-0.00 c1=-116.00)=(-0.00,-116.00)

Note how there are two end columns that are shorter than they should be, and
the scaling here isn't very drastic, 1/2.5.

Is there anything about the OpenGL standard that allows for, say, a band of
numerical tolerance for ceil and floor operations?  Or is it supposed to be
that formulas consider GLfloat xfactor and yfactor as though they are real
numbers, i.e., need exactness?

I can make all the swrast tests pass if I put some tolerance in for the ceiling
function.

-- 
You are receiving this mail because:
You are the QA Contact for the bug.
You are the assignee for the bug.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [Bug 89586] Drivers/DRI/swrast

https://bugs.freedesktop.org/show_bug.cgi?id=89586

--- Comment #37 from Ilia Mirkin imir...@alum.mit.edu ---
(In reply to Dan Sebald from comment #36)
 What is the general thought regarding floating point numerical issues and
 the OpenGL formulas?

In early GLSL versions, floating point accuracy was supposed to be to within
0.1 (i.e. pretty huge errors were allowable). However if the hw were to start
introducing such errors left and right, it'd wreak huge havoc... with
ARB_shader_precision (part of GL 4.1), it specifies these things fairly
precisely.

Not sure how that carries over to the glPixelZoom-style formulas.

-- 
You are receiving this mail because:
You are the QA Contact for the bug.
You are the assignee for the bug.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH v2 2/6] nir: Add builder helpers for MOVs with ALU sources and swizzling MOVs.

These will be useful for prog-nir and tgsi-nir.

Signed-off-by: Kenneth Graunke kenn...@whitecape.org
---
 src/glsl/nir/nir_builder.h | 44 
 1 file changed, 44 insertions(+)

diff --git a/src/glsl/nir/nir_builder.h b/src/glsl/nir/nir_builder.h
index fe6cb37..cdd413e 100644
--- a/src/glsl/nir/nir_builder.h
+++ b/src/glsl/nir/nir_builder.h
@@ -162,4 +162,48 @@ nir_##op(nir_builder *build, nir_ssa_def *src0,
   \
 
 #include nir_builder_opcodes.h
 
+/**
+ * Similar to nir_fmov, but takes a nir_alu_src instead of a nir_ssa_def.
+ */
+static inline nir_ssa_def *
+nir_fmov_alu(nir_builder *build, nir_alu_src src, unsigned num_components)
+{
+   nir_alu_instr *mov = nir_alu_instr_create(build-shader, nir_op_fmov);
+   nir_ssa_dest_init(mov-instr, mov-dest.dest, num_components, NULL);
+   mov-dest.write_mask = (1  num_components) - 1;
+   mov-src[0] = src;
+   nir_instr_insert_after_cf_list(build-cf_node_list, mov-instr);
+
+   return mov-dest.dest.ssa;
+}
+
+static inline nir_ssa_def *
+nir_imov_alu(nir_builder *build, nir_alu_src src, unsigned num_components)
+{
+   nir_alu_instr *mov = nir_alu_instr_create(build-shader, nir_op_imov);
+   nir_ssa_dest_init(mov-instr, mov-dest.dest, num_components, NULL);
+   mov-dest.write_mask = (1  num_components) - 1;
+   mov-src[0] = src;
+   nir_instr_insert_after_cf_list(build-cf_node_list, mov-instr);
+
+   return mov-dest.dest.ssa;
+}
+
+/**
+ * Construct an fmov or imov that reswizzles the source's components.
+ */
+static nir_ssa_def *
+nir_swizzle(nir_builder *build, nir_ssa_def *src, unsigned swiz[4],
+unsigned num_components, bool use_fmov)
+{
+   nir_alu_src alu_src;
+   memset(alu_src, 0, sizeof(alu_src));
+   alu_src.src = nir_src_for_ssa(src);
+   for (int i = 0; i  4; i++)
+  alu_src.swizzle[i] = swiz[i];
+
+   return use_fmov ? nir_fmov_alu(build, alu_src, num_components) :
+ nir_imov_alu(build, alu_src, num_components);
+}
+
 #endif /* NIR_BUILDER_H */
-- 
2.3.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH v2 5/6] i965/fs: Add ARB_fragment_program support to the NIR backend.

Use prog_to_nir where we would normally call glsl_to_nir, handle program
parameter lists, and skip a few things that don't exist.

Using NIR generates much better shader code than Mesa IR, since we get
real optimizations, as opposed to prog_optimize:

total instructions in shared programs: 314007 - 279892 (-10.86%)
instructions in affected programs: 285173 - 251058 (-11.96%)
helped:2001
HURT:  67
GAINED:4
LOST:  7

v2: Change early return in nir_setup_uniforms to if/else (Jordan).

Signed-off-by: Kenneth Graunke kenn...@whitecape.org
Reviewed-by: Jordan Justen jordan.l.jus...@intel.com [v1]
---
 src/mesa/drivers/dri/i965/brw_fs.cpp | 16 -
 src/mesa/drivers/dri/i965/brw_fs_nir.cpp | 61 +++-
 2 files changed, 51 insertions(+), 26 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp 
b/src/mesa/drivers/dri/i965/brw_fs.cpp
index a57f501..6969286 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
@@ -3954,15 +3954,13 @@ fs_visitor::run_fs()
   /* Generate FS IR for main().  (the visitor only descends into
* functions called main).
*/
-  if (shader) {
- if (env_var_as_boolean(INTEL_USE_NIR, false)) {
-emit_nir_code();
- } else {
-foreach_in_list(ir_instruction, ir, shader-base.ir) {
-   base_ir = ir;
-   this-result = reg_undef;
-   ir-accept(this);
-}
+  if (env_var_as_boolean(INTEL_USE_NIR, false)) {
+ emit_nir_code();
+  } else if (shader) {
+ foreach_in_list(ir_instruction, ir, shader-base.ir) {
+base_ir = ir;
+this-result = reg_undef;
+ir-accept(this);
  }
   } else {
  emit_fragment_program_code();
diff --git a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp 
b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
index 0b8ed1a..21e52fe 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
@@ -24,6 +24,7 @@
 #include glsl/ir.h
 #include glsl/ir_optimization.h
 #include glsl/nir/glsl_to_nir.h
+#include program/prog_to_nir.h
 #include brw_fs.h
 #include brw_nir.h
 
@@ -86,9 +87,15 @@ fs_visitor::emit_nir_code()
const nir_shader_compiler_options *options =
   ctx-Const.ShaderCompilerOptions[stage].NirOptions;
 
-   /* first, lower the GLSL IR shader to NIR */
-   lower_output_reads(shader-base.ir);
-   nir_shader *nir = glsl_to_nir(shader-base, options);
+   nir_shader *nir;
+   /* First, lower the GLSL IR or Mesa IR to NIR */
+   if (shader_prog) {
+  lower_output_reads(shader-base.ir);
+  nir = glsl_to_nir(shader-base, options);
+   } else {
+  nir = prog_to_nir(prog, options);
+  nir_convert_to_ssa(nir); /* turn registers into SSA */
+   }
nir_validate_shader(nir);
 
nir_lower_global_vars_to_local(nir);
@@ -106,9 +113,18 @@ fs_visitor::emit_nir_code()
/* Get rid of split copies */
nir_optimize(nir);
 
-   nir_assign_var_locations_scalar_direct_first(nir, nir-uniforms,
-num_direct_uniforms,
-nir-num_uniforms);
+   if (shader_prog) {
+  nir_assign_var_locations_scalar_direct_first(nir, nir-uniforms,
+   num_direct_uniforms,
+   nir-num_uniforms);
+   } else {
+  /* ARB programs generally create a giant array of uniform data, and 
allow
+   * indirect addressing without any boundaries.  In the absence of bounds
+   * analysis, it's all or nothing.  num_direct_uniforms is only useful 
when
+   * we have some direct and some indirect access; it doesn't matter here.
+   */
+  num_direct_uniforms = 0;
+   }
nir_assign_var_locations_scalar(nir-inputs, nir-num_inputs);
nir_assign_var_locations_scalar(nir-outputs, nir-num_outputs);
 
@@ -118,8 +134,10 @@ fs_visitor::emit_nir_code()
nir_remove_dead_variables(nir);
nir_validate_shader(nir);
 
-   nir_lower_samplers(nir, shader_prog, shader-base.Program);
-   nir_validate_shader(nir);
+   if (shader_prog) {
+  nir_lower_samplers(nir, shader_prog, shader-base.Program);
+  nir_validate_shader(nir);
+   }
 
nir_lower_system_values(nir);
nir_validate_shader(nir);
@@ -320,16 +338,25 @@ fs_visitor::nir_setup_uniforms(nir_shader *shader)
if (dispatch_width != 8)
   return;
 
-   foreach_list_typed(nir_variable, var, node, shader-uniforms) {
-  /* UBO's and atomics don't take up space in the uniform file */
-
-  if (var-interface_type != NULL || var-type-contains_atomic())
- continue;
+   if (shader_prog) {
+  foreach_list_typed(nir_variable, var, node, shader-uniforms) {
+ /* UBO's and atomics don't take up space in

[Mesa-dev] [PATCH v2 6/6] i965/nir: Use NIR for ARB_vertex_program support on Gen8+.

Everything is already in place; we simply have to take the scalar code
generation path.  This gives us SIMD8 VS programs, instead of SIMD4x2.

v2: Rebase on the patch that drops brw-gen = 8.

Signed-off-by: Kenneth Graunke kenn...@whitecape.org
Reviewed-by: Jordan Justen jordan.l.jus...@intel.com
---
 src/mesa/drivers/dri/i965/brw_vec4.cpp | 14 ++
 1 file changed, 10 insertions(+), 4 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_vec4.cpp 
b/src/mesa/drivers/dri/i965/brw_vec4.cpp
index 9d2e375..38fb1c3 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4.cpp
@@ -1823,7 +1823,7 @@ brw_vs_emit(struct brw_context *brw,
if (unlikely(INTEL_DEBUG  DEBUG_VS))
   brw_dump_ir(vertex, prog, shader-base, c-vp-program.Base);
 
-   if (prog  brw-scalar_vs) {
+   if (brw-scalar_vs  (prog || getenv(INTEL_USE_NIR))) {
   fs_visitor v(brw, mem_ctx, c-key, prog_data, prog, c-vp-program, 8);
   if (!v.run_vs()) {
  if (prog) {
@@ -1841,9 +1841,15 @@ brw_vs_emit(struct brw_context *brw,
  c-vp-program.Base, v.promoted_constants,
  v.runtime_check_aads_emit, VS);
   if (INTEL_DEBUG  DEBUG_VS) {
- char *name = ralloc_asprintf(mem_ctx, %s vertex shader %d,
-  prog-Label ? prog-Label : unnamed,
-  prog-Name);
+ char *name;
+ if (prog) {
+name = ralloc_asprintf(mem_ctx, %s vertex shader %d,
+   prog-Label ? prog-Label : unnamed,
+   prog-Name);
+ } else {
+name = ralloc_asprintf(mem_ctx, vertex program %d,
+   c-vp-program.Base.Id);
+ }
  g.enable_debug(name);
   }
   g.generate_code(v.cfg, 8);
-- 
2.3.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 6/6] i965: Allow Y-tiled allocations for large surfaces

On Mon, Mar 23, 2015 at 02:52:50PM +, Neil Roberts wrote:
 Sorry for the delay in replying to this.
 
 Ben Widawsky b...@bwidawsk.net writes:
 
   +static inline uint32_t
   +intel_miptree_blit_height(struct intel_mipmap_tree *mt)
   +{
   +   switch (mt-target) {
   +   case GL_TEXTURE_CUBE_MAP:
   +   case GL_TEXTURE_1D_ARRAY:
   +   case GL_TEXTURE_2D_ARRAY:
   +   case GL_TEXTURE_2D_MULTISAMPLE_ARRAY:
   +   case GL_TEXTURE_CUBE_MAP_ARRAY:
   +  assert(mt-logical_depth0);
   +  return mt-qpitch;
   +   case GL_TEXTURE_3D:
   +  /* FIXME 3d textures don't have a qpitch. I think it's simply the 
   tiled
   +   * aligned mt-physical_height0. Since 3D textures aren't used 
   often, just
   +   * print the perf debug from the caller and bail
   +   */
   +  /* fallthrough */
   +   default:
   +  return mt-total_height;
   +   }
   +}
  
  This function might stop working on Skylake if we land my patch to fix
  the qpitch calculation. In that case the qpitch isn't necessarily a
  count of the number of rows. In 1D textures it is the number of pixels
  and for compressed textures it is the number of blocks. Maybe we could
  also store the physical_qpitch that is calculated in
  brw_miptree_layout_texture_array?
  
 
  I'm pretty sure today we never use the blitter for compressed
  textures. Therefore, I believe we can ignore that case. In the case
  where we use pixels, I believe it will still work fine as long as long
  as each layer is tightly packed (which I thought it was). If it's not,
  then I suppose we have a problem. I'm also totally fine with making 1D
  fallthrough since I don't think it's a particularly common case for it
  to surpass total_height anyway.
 
 I'm not sure what you are getting at here. Regardless of whether the 1D
 slices are tightly packed, we can't just return the qpitch value here
 for 1D textures because it has no relation to the height of the image.
 The height of the image is always 1. The images actually aren't tightly
 packed on Skylake because they need to be aligned to 64 pixels.

Sorry, you are correct I was thinking total_height, not qpitch. As for the SKL
restriction, you're also right, SKL support wasn't yet merged when I originally
authored the patches.

 
 Is there any reason why we can't just use mt-logical_height0 instead of
 trying to look at the qpitch? If everything using the blitter is
 operating on one slice at a time, why would it ever try to blit
 something that is taller than the height? It would be pointless to try
 to include the padding between slices in the blit, wouldn't it?

You're right about the last part. Given that I wanted this function to return
the height to be blitted, I can't return just logical_height0 since it's not
necessarily tiled aligned. The hypocrisy is noted in that I am already not
returning the actual amount to be blitted.

Rounding logical_height0 up to a tile achieves what I want [I think].
Coincidentally the next part you point out does take care of the problem where
your height might be blittable but the tile aligned height is not.

I'd like Jason and/or Jordan to weigh in since they were a large part of the
current design. It seems like if I do return the tiled aligned height here, I
can kill miptree_exceeds_blit_height() and do the simple height compare. I would
be in favor of that.

 
 Looking at the patch again in more detail I noticed something else that
 I missed the first time.
 
  diff --git a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c 
  b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
  index 16bd151..ee8fae4 100644
  --- a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
  +++ b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
  @@ -86,7 +86,6 @@ compute_msaa_layout(struct brw_context *brw, mesa_format 
  format, GLenum target)
  }
   }
   
  -
   /**
* For single-sampled render targets (non-MSRT), the MCS buffer is a
* scaled-down bitfield representation of the color buffer which is 
  capable of
  @@ -437,6 +436,12 @@ intel_miptree_create_layout(struct brw_context *brw,
  return mt;
   }
   
  +static bool
  +miptree_exceeds_blit_height(struct intel_mipmap_tree *mt)
  +{
  +   return intel_miptree_blit_height(mt) = 
  intel_blit_tile_height(mt-tiling);
  +}
 
 Is that supposed to be = intel_blit_max_height instead? Otherwise it's
 going to disable tiling for any texture that is taller than a single
 tile, right?

See above. If I do keep it, it definitely needs a comment.

 
 Regards,
 - Neil

Thanks.

-- 
Ben Widawsky, Intel Open Source Technology Center
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH v2 1/6] nir: Add nir_builder helpers for creating load_const intrinsics.

2015-03-25 Thread Eric Anholt

Kenneth Graunke kenn...@whitecape.org writes:

 Both prog-nir and tgsi-nir will want to use these.

 Signed-off-by: Kenneth Graunke kenn...@whitecape.org

Patches 1, 2, 4 are:

Reviewed-by: Eric Anholt e...@anholt.net

and the changes in 3 all sound good.


signature.asc
Description: PGP signature
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [Bug 89586] Drivers/DRI/swrast

https://bugs.freedesktop.org/show_bug.cgi?id=89586

--- Comment #39 from Ilia Mirkin imir...@alum.mit.edu ---
(In reply to Dan Sebald from comment #38)
 I can experiment with some of the C next nearest number routines and get a
 feeling for the ULP.  Thanks.

FWIW I think you're going way overboard with this. BTW, the proper way to
submit tests (and any other patches), is by mailing them to the relevant list
(pig...@lists.freedesktop.org in this case).

-- 
You are receiving this mail because:
You are the QA Contact for the bug.
You are the assignee for the bug.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH] st_glsl_to_tgsi: only do mov copy propogation on temps

On Wed, Mar 25, 2015 at 7:18 PM, Dave Airlie airl...@gmail.com wrote:
 From: Dave Airlie airl...@redhat.com

 Don't propoate ARRAY

 This should fix:
 https://bugs.freedesktop.org/show_bug.cgi?id=89759

 Signed-off-by: Dave Airlie airl...@redhat.com
 Cc: mesa-sta...@lists.freedesktop.org

Note that this disables propagating IN[] reads as well, which was
apparently the point of this pass (based on the comments on the top).
And while IN[] can never be modified, TEMP[] certainly can -- why have
we never seen this issue with temps? Is that what the ACP tracking is
for? (What does ACP stand for? A** Copy Propagation?)

Perhaps you should do inst-src[0].file != PROGRAM_ARRAY ?

 ---
  src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 1 +
  1 file changed, 1 insertion(+)

 diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp 
 b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
 index db69a08..829644f 100644
 --- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
 +++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
 @@ -3938,6 +3938,7 @@ glsl_to_tgsi_visitor::copy_propagate(void)
   inst-dst[0].index == inst-src[0].index) 
!inst-dst[0].reladdr 
!inst-saturate 
 +  inst-src[0].file == PROGRAM_TEMPORARY 
!inst-src[0].reladdr 
!inst-src[0].reladdr2 
!inst-src[0].negate) {
 --
 2.1.0

 ___
 mesa-dev mailing list
 mesa-dev@lists.freedesktop.org
 http://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH] st_glsl_to_tgsi: only do mov copy propogation on temps

2015-03-25 Thread Dave Airlie

From: Dave Airlie airl...@redhat.com

Don't propoate ARRAY

This should fix:
https://bugs.freedesktop.org/show_bug.cgi?id=89759

Signed-off-by: Dave Airlie airl...@redhat.com
Cc: mesa-sta...@lists.freedesktop.org
---
 src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp 
b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
index db69a08..829644f 100644
--- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
+++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
@@ -3938,6 +3938,7 @@ glsl_to_tgsi_visitor::copy_propagate(void)
  inst-dst[0].index == inst-src[0].index) 
   !inst-dst[0].reladdr 
   !inst-saturate 
+  inst-src[0].file == PROGRAM_TEMPORARY 
   !inst-src[0].reladdr 
   !inst-src[0].reladdr2 
   !inst-src[0].negate) {
-- 
2.1.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH] st_glsl_to_tgsi: only do mov copy propogation on temps

On Wednesday, March 25, 2015 07:33:43 PM Ilia Mirkin wrote:
 On Wed, Mar 25, 2015 at 7:18 PM, Dave Airlie airl...@gmail.com wrote:
  From: Dave Airlie airl...@redhat.com
 
  Don't propoate ARRAY
 
  This should fix:
  https://bugs.freedesktop.org/show_bug.cgi?id=89759
 
  Signed-off-by: Dave Airlie airl...@redhat.com
  Cc: mesa-sta...@lists.freedesktop.org
 
 Note that this disables propagating IN[] reads as well, which was
 apparently the point of this pass (based on the comments on the top).
 And while IN[] can never be modified, TEMP[] certainly can -- why have
 we never seen this issue with temps? Is that what the ACP tracking is
 for? (What does ACP stand for? A** Copy Propagation?)

ACP comes from Steven Muchnick's Advanced Compiler Design and
Implementation, page 357.  It's the name of the variable used for the
set in his pseudocode that explains the algorithm.

The book typically introduces short names like ACP and AEB without
ever explaining what they stand for.

My guess is Assignment.

 Perhaps you should do inst-src[0].file != PROGRAM_ARRAY ?

That seems better to me - propagating inputs is almost certainly very
important.

  ---
   src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 1 +
   1 file changed, 1 insertion(+)
 
  diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp 
b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
  index db69a08..829644f 100644
  --- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
  +++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
  @@ -3938,6 +3938,7 @@ glsl_to_tgsi_visitor::copy_propagate(void)
inst-dst[0].index == inst-src[0].index) 
 !inst-dst[0].reladdr 
 !inst-saturate 
  +  inst-src[0].file == PROGRAM_TEMPORARY 
 !inst-src[0].reladdr 
 !inst-src[0].reladdr2 
 !inst-src[0].negate) {
  --
  2.1.0
 
  ___
  mesa-dev mailing list
  mesa-dev@lists.freedesktop.org
  http://lists.freedesktop.org/mailman/listinfo/mesa-dev
 ___
 mesa-dev mailing list
 mesa-dev@lists.freedesktop.org
 http://lists.freedesktop.org/mailman/listinfo/mesa-dev
 


signature.asc
Description: This is a digitally signed message part.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH v2 1/6] nir: Add nir_builder helpers for creating load_const intrinsics.

2015-03-25 Thread Connor Abbott

Except for one comment on patch 6, patches 1-2 and 4-6 are

Reviewed-by: Connor Abbott cwabbo...@gmail.com

I don't know enough about Mesa IR to fully review patch 3, although on
a quick read-through I couldn't find anything to improve.

On Wed, Mar 25, 2015 at 7:21 PM, Kenneth Graunke kenn...@whitecape.org wrote:
 Both prog-nir and tgsi-nir will want to use these.

 Signed-off-by: Kenneth Graunke kenn...@whitecape.org
 ---
  src/glsl/nir/nir_builder.h | 35 +++
  1 file changed, 35 insertions(+)

 diff --git a/src/glsl/nir/nir_builder.h b/src/glsl/nir/nir_builder.h
 index 7c4f7fd..fe6cb37 100644
 --- a/src/glsl/nir/nir_builder.h
 +++ b/src/glsl/nir/nir_builder.h
 @@ -47,6 +47,41 @@ nir_builder_insert_after_cf_list(nir_builder *build,
 build-cf_node_list = cf_node_list;
  }

 +static inline nir_ssa_def *
 +nir_build_imm(nir_builder *build, unsigned num_components, nir_const_value 
 value)
 +{
 +   nir_load_const_instr *load_const =
 +  nir_load_const_instr_create(build-shader, num_components);
 +   if (!load_const)
 +  return NULL;
 +
 +   load_const-value = value;
 +
 +   nir_instr_insert_after_cf_list(build-cf_node_list, load_const-instr);
 +
 +   return load_const-def;
 +}
 +
 +static inline nir_ssa_def *
 +nir_imm_float(nir_builder *build, float x)
 +{
 +   nir_const_value v = { { .f = {x, 0, 0, 0} } };
 +   return nir_build_imm(build, 1, v);
 +}
 +
 +static inline nir_ssa_def *
 +nir_imm_vec4(nir_builder *build, float x, float y, float z, float w)
 +{
 +   nir_const_value v = { { .f = {x, y, z, w} } };
 +   return nir_build_imm(build, 4, v);
 +}
 +
 +static inline nir_ssa_def *
 +nir_imm_int(nir_builder *build, int x)
 +{
 +   nir_const_value v = { { .i = {x, 0, 0, 0} } };
 +   return nir_build_imm(build, 1, v);
 +}

  static inline nir_ssa_def *
  nir_build_alu(nir_builder *build, nir_op op, nir_ssa_def *src0,
 --
 2.3.4

 ___
 mesa-dev mailing list
 mesa-dev@lists.freedesktop.org
 http://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 05/11] i965/inst: Add notify and gateway_subfuncid fields

On Sun, Mar 22, 2015 at 06:49:15PM -0700, Jordan Justen wrote:
 These fields will be used when emitting a send for the barrier function.
 
 Reference: IVB PRM Volume 4, Part 2, Section 1.1.1 Message Descriptor
 
 Signed-off-by: Jordan Justen jordan.l.jus...@intel.com
 Reviewed-by: Chris Forbes chr...@ijw.co.nz
 ---
  src/mesa/drivers/dri/i965/brw_inst.h | 18 +++---
  1 file changed, 15 insertions(+), 3 deletions(-)
 
 diff --git a/src/mesa/drivers/dri/i965/brw_inst.h 
 b/src/mesa/drivers/dri/i965/brw_inst.h
 index 372aa2b..8701771 100644
 --- a/src/mesa/drivers/dri/i965/brw_inst.h
 +++ b/src/mesa/drivers/dri/i965/brw_inst.h
 @@ -322,6 +322,9 @@ FJ(gen4_jump_count, 111,  96, brw-gen  6)
  FC(gen4_pop_count,  115, 112, brw-gen  6)
  /** @} */
  
 +/* Message descriptor bits */
 +#define MD(x) (x + 96)
 +
  /**
   * Fields for SEND messages:
   *  @{
 @@ -347,6 +350,12 @@ FF(header_present,
 /* 6:   */ 115, 115,
 /* 7:   */ 115, 115,
 /* 8:   */ 115, 115)
 +FF(notify,
 +   /* 4: doesn't exist */ -1, -1, -1, -1,
 +   /* 5: doesn't exist */ -1, -1,
 +   /* 6: doesn't exist */ -1, -1,
 +   /* 7:   */ MD(16), MD(15),
 +   /* 8:   */ MD(16), MD(15))

I'm pretty sure notify has existed for much longer than Gen7. I understand that
you don't implement it, but doesn't exist is at least a little confusing.
(Also, if it does exist all the way back, you could potentially just use F())

If you end up modifying stuff, should you throw in AckReq?

  FF(function_control,
 /* 4:   */ 111,  96,
 /* 4.5: */ 111,  96,
 @@ -354,6 +363,12 @@ FF(function_control,
 /* 6:   */ 114,  96,
 /* 7:   */ 114,  96,
 /* 8:   */ 114,  96)
 +FF(gateway_subfuncid,
 +   /* 4: doesn't exist */  -1, -1, -1, -1,
 +   /* 5: doesn't exist */  -1, -1,
 +   /* 6: doesn't exist */  -1, -1,
 +   /* 7:   */  MD(2),  MD(0),
 +   /* 8:   */  MD(2),  MD(0))
  FF(sfid,
 /* 4:   */ 123, 120, /* called msg_target */
 /* 4.5  */ 123, 120,
 @@ -364,9 +379,6 @@ FF(sfid,
  FC(base_mrf,   27,  24, brw-gen  6);
  /** @} */
  
 -/* Message descriptor bits */
 -#define MD(x) (x + 96)
 -
  /**
   * URB message function control bits:
   *  @{

I am not a huge fan of MD(x) but I suppose you didn't create that yourself. I'd
be in favor of killing it at some point.

Patches up through this one are:
Reviewed-by: Ben Widawsky b...@bwidawsk.net

(I think 1  2 make more sense as a single patch, but meh)

-- 
Ben Widawsky, Intel Open Source Technology Center
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH] r600g/sb: Update last_cf for loops

2015-03-25 Thread Glenn Kennard

CF_END could end up emitted in the middle of a shader on cayman
when there was a loop at the very end.

Fixes glsl-1.50-geometry-end-primitive and
ext_transform_feedback-geometry-shaders-basic piglit tests.

Signed-off-by: Glenn Kennard glenn.kenn...@gmail.com
---
Bug exposed by [PATCH] r600g/sb: Enable SB for geometry shaders

 src/gallium/drivers/r600/sb/sb_bc_finalize.cpp | 8 
 1 file changed, 8 insertions(+)

diff --git a/src/gallium/drivers/r600/sb/sb_bc_finalize.cpp 
b/src/gallium/drivers/r600/sb/sb_bc_finalize.cpp
index 8d0be06..08b7d77 100644
--- a/src/gallium/drivers/r600/sb/sb_bc_finalize.cpp
+++ b/src/gallium/drivers/r600/sb/sb_bc_finalize.cpp
@@ -127,6 +127,14 @@ void bc_finalizer::finalize_loop(region_node* r) {
cf_node *loop_start = sh.create_cf(CF_OP_LOOP_START_DX10);
cf_node *loop_end = sh.create_cf(CF_OP_LOOP_END);
 
+   // Update last_cf, but don't overwrite it if it's outside the current 
loop nest since
+   // it may point to a cf that is later in program order.
+   // The single parent level check is sufficient since finalize_loop() is 
processed in
+   // reverse order from innermost to outermost loop nest level.
+   if (!last_cf || last_cf-get_parent_region() == r) {
+   last_cf = loop_end;
+   }
+
loop_start-jump_after(loop_end);
loop_end-jump_after(loop_start);
 
-- 
1.9.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH v2] egl/dri2: Fix GCC maybe-uninitialized warning.

2015-03-25 Thread Matt Turner

On Wed, Mar 25, 2015 at 4:15 PM, Jan Vesely jan.ves...@rutgers.edu wrote:
 On Wed, 2015-03-25 at 18:55 -0400, Ilia Mirkin wrote:
 On Wed, Mar 25, 2015 at 6:51 PM, Jan Vesely jan.ves...@rutgers.edu wrote:
  On Fri, 2015-03-06 at 23:54 -0800, Vinson Lee wrote:
  egl_dri2.c: In function ‘dri2_bind_tex_image’:
  egl_dri2.c:1240:4: warning: ‘format’ may be used uninitialized in this 
  function [-Wmaybe-uninitialized]
  (*dri2_dpy-tex_buffer-setTexBuffer2)(dri2_ctx-dri_context,
  ^
 
  Suggested-by: Ilia Mirkin imir...@alum.mit.edu
  Signed-off-by: Vinson Lee v...@freedesktop.org
  ---
   src/egl/drivers/dri2/egl_dri2.c | 6 --
   1 file changed, 4 insertions(+), 2 deletions(-)
 
  diff --git a/src/egl/drivers/dri2/egl_dri2.c 
  b/src/egl/drivers/dri2/egl_dri2.c
  index d503196..c5c475d 100644
  --- a/src/egl/drivers/dri2/egl_dri2.c
  +++ b/src/egl/drivers/dri2/egl_dri2.c
  @@ -1226,7 +1226,8 @@ dri2_bind_tex_image(_EGLDriver *drv,
 format = __DRI_TEXTURE_FORMAT_RGBA;
 break;
  default:
  -  assert(0);
  +  _eglError(EGL_BAD_SURFACE, unrecognized format);
  +  return EGL_FALSE;
 
  does using:
  unreachable(unrecognized format);
  instead of
  assert(0);
  fix the warning?

 unreachable is for *truly* unreachable code... it sounded like this
 was reachable with bad input.

 maybe I misunderstood the situation.
 since there is assert(0) I assumed it can never happen.
 combination of assert(0) and return is very confusing:
 either the code is reachable and should have a correct error path (in
 which case there should not be assert(0)),
 or the code is not reachable in which case unreachable does just fine
 and you should not have the error path.

 it looks to me that using assert and return just makes sure that the
 error path is never run on debug build.

 anyway, it was just a suggestion. I won't argue one way or another,
 since I don't work with/understand those parts of the code.

I agree. If the code had an assert(0) it's pretty clearly a case for
unreachable.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH v2] egl/dri2: Fix GCC maybe-uninitialized warning.

On Wed, Mar 25, 2015 at 8:31 PM, Matt Turner matts...@gmail.com wrote:
 On Wed, Mar 25, 2015 at 4:15 PM, Jan Vesely jan.ves...@rutgers.edu wrote:
 On Wed, 2015-03-25 at 18:55 -0400, Ilia Mirkin wrote:
 On Wed, Mar 25, 2015 at 6:51 PM, Jan Vesely jan.ves...@rutgers.edu wrote:
  On Fri, 2015-03-06 at 23:54 -0800, Vinson Lee wrote:
  egl_dri2.c: In function ‘dri2_bind_tex_image’:
  egl_dri2.c:1240:4: warning: ‘format’ may be used uninitialized in this 
  function [-Wmaybe-uninitialized]
  (*dri2_dpy-tex_buffer-setTexBuffer2)(dri2_ctx-dri_context,
  ^
 
  Suggested-by: Ilia Mirkin imir...@alum.mit.edu
  Signed-off-by: Vinson Lee v...@freedesktop.org
  ---
   src/egl/drivers/dri2/egl_dri2.c | 6 --
   1 file changed, 4 insertions(+), 2 deletions(-)
 
  diff --git a/src/egl/drivers/dri2/egl_dri2.c 
  b/src/egl/drivers/dri2/egl_dri2.c
  index d503196..c5c475d 100644
  --- a/src/egl/drivers/dri2/egl_dri2.c
  +++ b/src/egl/drivers/dri2/egl_dri2.c
  @@ -1226,7 +1226,8 @@ dri2_bind_tex_image(_EGLDriver *drv,
 format = __DRI_TEXTURE_FORMAT_RGBA;
 break;
  default:
  -  assert(0);
  +  _eglError(EGL_BAD_SURFACE, unrecognized format);
  +  return EGL_FALSE;
 
  does using:
  unreachable(unrecognized format);
  instead of
  assert(0);
  fix the warning?

 unreachable is for *truly* unreachable code... it sounded like this
 was reachable with bad input.

 maybe I misunderstood the situation.
 since there is assert(0) I assumed it can never happen.
 combination of assert(0) and return is very confusing:
 either the code is reachable and should have a correct error path (in
 which case there should not be assert(0)),
 or the code is not reachable in which case unreachable does just fine
 and you should not have the error path.

 it looks to me that using assert and return just makes sure that the
 error path is never run on debug build.

 anyway, it was just a suggestion. I won't argue one way or another,
 since I don't work with/understand those parts of the code.

 I agree. If the code had an assert(0) it's pretty clearly a case for
 unreachable.

I dunno, I've seen assert's thrown in all over the place where the
assumption was that they'd only trigger on debug builds. Not sure if
this is one of those cases, but I have a hard time convincing myself
that there's no way an unexpected value can get in there. The downside
of unreachable() is that it ends up as an infinite loop or other sorts
of funny control flow, which can be quite difficult to debug (in a
production build).
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH v2 6/6] i965/nir: Use NIR for ARB_vertex_program support on Gen8+.

2015-03-25 Thread Connor Abbott

On Wed, Mar 25, 2015 at 7:21 PM, Kenneth Graunke kenn...@whitecape.org wrote:
 Everything is already in place; we simply have to take the scalar code
 generation path.  This gives us SIMD8 VS programs, instead of SIMD4x2.

 v2: Rebase on the patch that drops brw-gen = 8.

 Signed-off-by: Kenneth Graunke kenn...@whitecape.org
 Reviewed-by: Jordan Justen jordan.l.jus...@intel.com
 ---
  src/mesa/drivers/dri/i965/brw_vec4.cpp | 14 ++
  1 file changed, 10 insertions(+), 4 deletions(-)

 diff --git a/src/mesa/drivers/dri/i965/brw_vec4.cpp 
 b/src/mesa/drivers/dri/i965/brw_vec4.cpp
 index 9d2e375..38fb1c3 100644
 --- a/src/mesa/drivers/dri/i965/brw_vec4.cpp
 +++ b/src/mesa/drivers/dri/i965/brw_vec4.cpp
 @@ -1823,7 +1823,7 @@ brw_vs_emit(struct brw_context *brw,
 if (unlikely(INTEL_DEBUG  DEBUG_VS))
brw_dump_ir(vertex, prog, shader-base, c-vp-program.Base);

 -   if (prog  brw-scalar_vs) {
 +   if (brw-scalar_vs  (prog || getenv(INTEL_USE_NIR))) {

Aren't we using Jason's fancier thing everywhere else for checking
INTEL_USE_NIR? That should also let you turn it on by default, which
seems like a good idea even if it's not on for GLSL yet.

fs_visitor v(brw, mem_ctx, c-key, prog_data, prog, c-vp-program, 
 8);
if (!v.run_vs()) {
   if (prog) {
 @@ -1841,9 +1841,15 @@ brw_vs_emit(struct brw_context *brw,
   c-vp-program.Base, v.promoted_constants,
   v.runtime_check_aads_emit, VS);
if (INTEL_DEBUG  DEBUG_VS) {
 - char *name = ralloc_asprintf(mem_ctx, %s vertex shader %d,
 -  prog-Label ? prog-Label : unnamed,
 -  prog-Name);
 + char *name;
 + if (prog) {
 +name = ralloc_asprintf(mem_ctx, %s vertex shader %d,
 +   prog-Label ? prog-Label : unnamed,
 +   prog-Name);
 + } else {
 +name = ralloc_asprintf(mem_ctx, vertex program %d,
 +   c-vp-program.Base.Id);
 + }
   g.enable_debug(name);
}
g.generate_code(v.cfg, 8);
 --
 2.3.4

 ___
 mesa-dev mailing list
 mesa-dev@lists.freedesktop.org
 http://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH v2 6/6] i965/nir: Use NIR for ARB_vertex_program support on Gen8+.

2015-03-25 Thread Jason Ekstrand

On Mar 25, 2015 7:43 PM, Connor Abbott cwabbo...@gmail.com wrote:

 On Wed, Mar 25, 2015 at 7:21 PM, Kenneth Graunke kenn...@whitecape.org
wrote:
  Everything is already in place; we simply have to take the scalar code
  generation path.  This gives us SIMD8 VS programs, instead of SIMD4x2.
 
  v2: Rebase on the patch that drops brw-gen = 8.
 
  Signed-off-by: Kenneth Graunke kenn...@whitecape.org
  Reviewed-by: Jordan Justen jordan.l.jus...@intel.com
  ---
   src/mesa/drivers/dri/i965/brw_vec4.cpp | 14 ++
   1 file changed, 10 insertions(+), 4 deletions(-)
 
  diff --git a/src/mesa/drivers/dri/i965/brw_vec4.cpp
b/src/mesa/drivers/dri/i965/brw_vec4.cpp
  index 9d2e375..38fb1c3 100644
  --- a/src/mesa/drivers/dri/i965/brw_vec4.cpp
  +++ b/src/mesa/drivers/dri/i965/brw_vec4.cpp
  @@ -1823,7 +1823,7 @@ brw_vs_emit(struct brw_context *brw,
  if (unlikely(INTEL_DEBUG  DEBUG_VS))
 brw_dump_ir(vertex, prog, shader-base, c-vp-program.Base);
 
  -   if (prog  brw-scalar_vs) {
  +   if (brw-scalar_vs  (prog || getenv(INTEL_USE_NIR))) {

 Aren't we using Jason's fancier thing everywhere else for checking
 INTEL_USE_NIR? That should also let you turn it on by default, which
 seems like a good idea even if it's not on for GLSL yet.

Thanks for catching that. Yes, we should use the fancier thing

 fs_visitor v(brw, mem_ctx, c-key, prog_data, prog,
c-vp-program, 8);
 if (!v.run_vs()) {
if (prog) {
  @@ -1841,9 +1841,15 @@ brw_vs_emit(struct brw_context *brw,
c-vp-program.Base, v.promoted_constants,
v.runtime_check_aads_emit, VS);
 if (INTEL_DEBUG  DEBUG_VS) {
  - char *name = ralloc_asprintf(mem_ctx, %s vertex shader %d,
  -  prog-Label ? prog-Label :
unnamed,
  -  prog-Name);
  + char *name;
  + if (prog) {
  +name = ralloc_asprintf(mem_ctx, %s vertex shader %d,
  +   prog-Label ? prog-Label :
unnamed,
  +   prog-Name);
  + } else {
  +name = ralloc_asprintf(mem_ctx, vertex program %d,
  +   c-vp-program.Base.Id);
  + }
g.enable_debug(name);
 }
 g.generate_code(v.cfg, 8);
  --
  2.3.4
 
  ___
  mesa-dev mailing list
  mesa-dev@lists.freedesktop.org
  http://lists.freedesktop.org/mailman/listinfo/mesa-dev
 ___
 mesa-dev mailing list
 mesa-dev@lists.freedesktop.org
 http://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH] st_glsl_to_tgsi: only do mov copy propagation on temps (v2)

On Wed, Mar 25, 2015 at 7:53 PM, Dave Airlie airl...@gmail.com wrote:
 From: Dave Airlie airl...@redhat.com

 Don't propagate ARRAYs

 This should fix:
 https://bugs.freedesktop.org/show_bug.cgi?id=89759

 v2: just specify arrays so we get input propagation
 Signed-off-by: Dave Airlie airl...@redhat.com
 Cc: mesa-sta...@lists.freedesktop.org

It is unclear to me whether this is the correct fix. However this
seems very clearly to be an *improvement* over the current situation,
so probably makes sense for this to go in.

Reviewed-by: Ilia Mirkin imir...@alum.mit.edu

Would be nice if someone who understands what this algo is actually
doing to take a look. Or add some comments as to how it functions.

 ---
  src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 1 +
  1 file changed, 1 insertion(+)

 diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp 
 b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
 index db69a08..b5e6c62 100644
 --- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
 +++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
 @@ -3938,6 +3938,7 @@ glsl_to_tgsi_visitor::copy_propagate(void)
   inst-dst[0].index == inst-src[0].index) 
!inst-dst[0].reladdr 
!inst-saturate 
 +  inst-src[0].file != PROGRAM_ARRAY 
!inst-src[0].reladdr 
!inst-src[0].reladdr2 
!inst-src[0].negate) {
 --
 2.1.0

 ___
 mesa-dev mailing list
 mesa-dev@lists.freedesktop.org
 http://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] New stable-branch 10.5 candidate pushed

Hello list,

The candidate for the Mesa 10.5.2 is now available. The current patch queue
is as follows:
 - 18 queued
 - 5 nominated (outstanding)
 - and 2 rejected (obsolete) patches

This release addresses a couple of glsl bugs affecting all the drivers,
squashes bugs in the libGL and glapi libraries, plus a mix of driver updates
mostly freedreno. The tarball no longer contains hardlinks, has all the haiku
files, builds against musl and no longer requires python/mako in order to build.

Take a look at section Mesa stable queue for more information.

Testing
---
The following results are against piglit 305ecc3ac89.


Changes - classic i965(snb)
---
Fixes:
 - all
+ shaders
   + glsl-opt-0-cmp-xyfail  pass

 - spec
+ arb_buffer_storage
   + bufferstorage-persistent read coherent client-storagefail  pass
+ arb_sync
   + repeat-wait  fail  pass
+ arb_timer_query
   + query gl_timestamp   fail  pass

Regressions*:
 - glsl-1.50
+ execution
   + fragcoord-layout-qualifiers-conflicting-case-7   pass  fail


Changes - swrast classic

Fixes:
 - all
+ shaders
   + glsl-opt-0-cmp-xyfail  pass


Changes - gallium softpipe, llvmpipe (LLVM 3.5.1)
-
Fixes:
 - all
+ shaders
   + glsl-opt-0-cmp-xyfail  pass

Regressions*:
 - glsl-1.50
+ execution
   + fragcoord-layout-qualifiers-conflicting-case-7   pass  fail


* This is a false report as the mesa 10.5.2 contains the fix for
Khronos Bug#12957 while the piglit update has landed after 305ecc3ac89.


Testing reports/general approval

Any testing reports (or general approval of the state of the branch)
will be greatly appreciated.


Trivial merge conflicts
---
None.


The plan is to have 10.5.2 this Friday(27th March).

If you have any questions or comments that you would like to share
before the release, please go ahead.


Cheers,
Emil


Mesa stable queue
-

Nominated (5)
=

Boyan Ding (1):
  i965: Add XRGB format to intel_screen_make_configs

Brian Paul (1):
  configure: don't try to build gallium DRI drivers if --disable-dri is set

Ilia Mirkin (1):
  freedreno/a3xx: fix 3d texture layout

Tom Stellard (2):
  clover: Return CL_BUILD_ERROR for CL_PROGRAM_BUILD_STATUS when 
compilation fails
  clover: Call clBuildProgram() notification function when build completes


Rejected (2)


Mario Kleiner (1):
  glx: Handle out-of-sequence swap completion events correctly.

Tapani Pälli (1):
  i965/fs: in MAD optimizations, switch last argument to be immediate


Queued (18)
===

Anuj Phogat (1):
  glsl: Generate link error for non-matching gl_FragCoord redeclarations

Emil Velikov (5):
  docs: Add sha256 sums for the 10.5.1 release
  automake: add missing egl files to the tarball
  st/egl: don't ship the dri2.c link at the tarball
  loader: include sys/stat.h for non-sysfs builds
  auxiliary/os: fix the android build - s/drm_munmap/os_munmap/

Felix Janda (1):
  c11/threads: Use PTHREAD_MUTEX_RECURSIVE by default

Francisco Jerez (1):
  i965: Set nr_params to the number of uniform components in the VS/GS path.

Ilia Mirkin (2):
  freedreno/a3xx: use the same layer size for all slices
  freedreno: fix slice pitch calculations

Marek Olšák (1):
  radeonsi: increase coords array size for 
radeon_llvm_emit_prepare_cube_coords

Mario Kleiner (2):
  glx: Handle out-of-sequence swap completion events correctly. (v2)
  mapi: Make private copies of name strings provided by client.

Rob Clark (1):
  freedreno: update generated headers

Samuel Iglesias Gonsalvez (2):
  glsl: optimize (0 cmp x + y) into (-x cmp y).
  configure: Introduce new output variable to ax_check_python_mako_module.m4

Tapani Pälli (1):
  glsl: fix names in lower_constant_arrays_to_uniforms

Tom Stellard (1):
  clover: Return 0 as storage size for local kernel args that are not set v2
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH] i965: Add XRGB8888 format to intel_screen_make_configs

2015-03-25 Thread Boyan Ding

Some application, such as drm backend of weston, uses XRGB config as
default. i965 doesn't provide this format, but before commit 65c8965d,
the drm platform of EGL takes ARGB as XRGB. Now that commit
65c8965d makes EGL recognize format correctly so weston won't start
because it can't find XRGB. Add XRGB format to i965 just as
other drivers do.

Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=89689
Signed-off-by: Boyan Ding boyan.j.d...@gmail.com
---
 src/mesa/drivers/dri/i965/intel_screen.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/mesa/drivers/dri/i965/intel_screen.c 
b/src/mesa/drivers/dri/i965/intel_screen.c
index 3640b67..2b82c33 100644
--- a/src/mesa/drivers/dri/i965/intel_screen.c
+++ b/src/mesa/drivers/dri/i965/intel_screen.c
@@ -1126,7 +1126,8 @@ intel_screen_make_configs(__DRIscreen *dri_screen)
 {
static const mesa_format formats[] = {
   MESA_FORMAT_B5G6R5_UNORM,
-  MESA_FORMAT_B8G8R8A8_UNORM
+  MESA_FORMAT_B8G8R8A8_UNORM,
+  MESA_FORMAT_B8G8R8X8_UNORM
};
 
/* GLX_SWAP_COPY_OML is not supported due to page flipping. */
-- 
2.3.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH] i965/skl: Don't use the PMA depth stall workaround

On Wednesday, March 25, 2015 04:52:46 PM Ben Widawsky wrote:
 The PMA depth stall must be enabled (optimization turned off) under certain
 circumstances on gen8. This was supposedly fixed for Gen9, which means we do 
not
 need to check, or toggle the state. The hardware is supposed to enable the
 hardware optimization by default, unlike BDW, so we also don't need to set 
it at
 init. For whatever reason this improves stability on ETQW with the bug 
mentioned
 below.
 
 TODO: This patch still needs to be tested on a full piglit run.
 
 References: https://bugs.freedesktop.org/show_bug.cgi?id=89039 (doesn't fix)
 Cc: Anuj Phogat anuj.pho...@intel.com
 Cc: Eero Tamminen eero.t.tammi...@intel.com
 Signed-off-by: Ben Widawsky b...@bwidawsk.net
 ---
  src/mesa/drivers/dri/i965/gen8_depth_state.c | 7 ++-
  1 file changed, 6 insertions(+), 1 deletion(-)
 
 diff --git a/src/mesa/drivers/dri/i965/gen8_depth_state.c 
b/src/mesa/drivers/dri/i965/gen8_depth_state.c
 index c6494c9..3d126cf 100644
 --- a/src/mesa/drivers/dri/i965/gen8_depth_state.c
 +++ b/src/mesa/drivers/dri/i965/gen8_depth_state.c
 @@ -368,6 +368,10 @@ static void
  gen8_emit_pma_stall_workaround(struct brw_context *brw)
  {
 uint32_t bits = 0;
 +
 +   if (brw-gen = 9)
 +  return;
 +
 if (pma_fix_enable(brw))
bits |= GEN8_HIZ_NP_PMA_FIX_ENABLE | 
GEN8_HIZ_NP_EARLY_Z_FAILS_DISABLE;
  
 @@ -400,7 +404,8 @@ gen8_hiz_exec(struct brw_context *brw, struct 
intel_mipmap_tree *mt,
return;
  
 /* Disable the PMA stall fix since we're about to do a HiZ operation. */
 -   write_pma_stall_bits(brw, 0);
 +   if (brw-gen == 8)
 +  write_pma_stall_bits(brw, 0);
  
 assert(mt-first_level == 0);
 assert(mt-logical_depth0 = 1);
 

Thanks for fixing this.

Reviewed-by: Kenneth Graunke kenn...@whitecape.org


signature.asc
Description: This is a digitally signed message part.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH v2] egl/dri2: Fix GCC maybe-uninitialized warning.

2015-03-25 Thread Jan Vesely

On Fri, 2015-03-06 at 23:54 -0800, Vinson Lee wrote:
 egl_dri2.c: In function ‘dri2_bind_tex_image’:
 egl_dri2.c:1240:4: warning: ‘format’ may be used uninitialized in this 
 function [-Wmaybe-uninitialized]
 (*dri2_dpy-tex_buffer-setTexBuffer2)(dri2_ctx-dri_context,
 ^
 
 Suggested-by: Ilia Mirkin imir...@alum.mit.edu
 Signed-off-by: Vinson Lee v...@freedesktop.org
 ---
  src/egl/drivers/dri2/egl_dri2.c | 6 --
  1 file changed, 4 insertions(+), 2 deletions(-)
 
 diff --git a/src/egl/drivers/dri2/egl_dri2.c b/src/egl/drivers/dri2/egl_dri2.c
 index d503196..c5c475d 100644
 --- a/src/egl/drivers/dri2/egl_dri2.c
 +++ b/src/egl/drivers/dri2/egl_dri2.c
 @@ -1226,7 +1226,8 @@ dri2_bind_tex_image(_EGLDriver *drv,
format = __DRI_TEXTURE_FORMAT_RGBA;
break;
 default:
 -  assert(0);
 +  _eglError(EGL_BAD_SURFACE, unrecognized format);
 +  return EGL_FALSE;

does using:
unreachable(unrecognized format);
instead of
assert(0);
fix the warning?

 }
  
 switch (dri2_surf-base.TextureTarget) {
 @@ -1234,7 +1235,8 @@ dri2_bind_tex_image(_EGLDriver *drv,
target = GL_TEXTURE_2D;
break;
 default:
 -  assert(0);
 +  _eglError(EGL_BAD_SURFACE, unrecognized target);
 +  return EGL_FALSE;
 }
  
 (*dri2_dpy-tex_buffer-setTexBuffer2)(dri2_ctx-dri_context,

-- 
Jan Vesely jan.ves...@rutgers.edu


signature.asc
Description: This is a digitally signed message part
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [Bug 89586] Drivers/DRI/swrast

https://bugs.freedesktop.org/show_bug.cgi?id=89586

--- Comment #38 from Dan Sebald daniel.seb...@ieee.org ---
0.1 tolerance is rather large (I'm guessing that applies to normalized
numbers).  In this case the discrepancy doesn't come close to that big.

I looked up ARB_shader_precision and I'm seeing tolerance on the order of 2 to
3 units of the last place, generally, with a factor larger tolerance (16 ULP)
for the pow() function.  Division is the source of the error in xfactor, which
has 2.5 ULP tolerance.

I can experiment with some of the C next nearest number routines and get a
feeling for the ULP.  Thanks.

-- 
You are receiving this mail because:
You are the QA Contact for the bug.
You are the assignee for the bug.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH v2] egl/dri2: Fix GCC maybe-uninitialized warning.

2015-03-25 Thread Jan Vesely

On Wed, 2015-03-25 at 18:55 -0400, Ilia Mirkin wrote:
 On Wed, Mar 25, 2015 at 6:51 PM, Jan Vesely jan.ves...@rutgers.edu wrote:
  On Fri, 2015-03-06 at 23:54 -0800, Vinson Lee wrote:
  egl_dri2.c: In function ‘dri2_bind_tex_image’:
  egl_dri2.c:1240:4: warning: ‘format’ may be used uninitialized in this 
  function [-Wmaybe-uninitialized]
  (*dri2_dpy-tex_buffer-setTexBuffer2)(dri2_ctx-dri_context,
  ^
 
  Suggested-by: Ilia Mirkin imir...@alum.mit.edu
  Signed-off-by: Vinson Lee v...@freedesktop.org
  ---
   src/egl/drivers/dri2/egl_dri2.c | 6 --
   1 file changed, 4 insertions(+), 2 deletions(-)
 
  diff --git a/src/egl/drivers/dri2/egl_dri2.c 
  b/src/egl/drivers/dri2/egl_dri2.c
  index d503196..c5c475d 100644
  --- a/src/egl/drivers/dri2/egl_dri2.c
  +++ b/src/egl/drivers/dri2/egl_dri2.c
  @@ -1226,7 +1226,8 @@ dri2_bind_tex_image(_EGLDriver *drv,
 format = __DRI_TEXTURE_FORMAT_RGBA;
 break;
  default:
  -  assert(0);
  +  _eglError(EGL_BAD_SURFACE, unrecognized format);
  +  return EGL_FALSE;
 
  does using:
  unreachable(unrecognized format);
  instead of
  assert(0);
  fix the warning?
 
 unreachable is for *truly* unreachable code... it sounded like this
 was reachable with bad input.

maybe I misunderstood the situation.
since there is assert(0) I assumed it can never happen.
combination of assert(0) and return is very confusing:
either the code is reachable and should have a correct error path (in
which case there should not be assert(0)),
or the code is not reachable in which case unreachable does just fine
and you should not have the error path.

it looks to me that using assert and return just makes sure that the
error path is never run on debug build.

anyway, it was just a suggestion. I won't argue one way or another,
since I don't work with/understand those parts of the code.

regards,
jan


 
 
  }
 
  switch (dri2_surf-base.TextureTarget) {
  @@ -1234,7 +1235,8 @@ dri2_bind_tex_image(_EGLDriver *drv,
 target = GL_TEXTURE_2D;
 break;
  default:
  -  assert(0);
  +  _eglError(EGL_BAD_SURFACE, unrecognized target);
  +  return EGL_FALSE;
  }
 
  (*dri2_dpy-tex_buffer-setTexBuffer2)(dri2_ctx-dri_context,
 
  --
  Jan Vesely jan.ves...@rutgers.edu
 
  ___
  mesa-dev mailing list
  mesa-dev@lists.freedesktop.org
  http://lists.freedesktop.org/mailman/listinfo/mesa-dev
 

-- 
Jan Vesely jan.ves...@rutgers.edu


signature.asc
Description: This is a digitally signed message part
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH v2 4/6] nir: Lower subtraction to add with negation when !lower_negate.