date:20131122

[Mesa-dev] [PATCH 1/2] i965: Allocate vec4_visitor's uniform_size and uniform_vector_size arrays dynamically.

2013-11-22 Thread Petri Latvala

Signed-off-by: Petri Latvala petri.latv...@intel.com
---
 src/mesa/drivers/dri/i965/brw_vec4.cpp|  5 +++--
 src/mesa/drivers/dri/i965/brw_vec4.h  | 14 +++---
 src/mesa/drivers/dri/i965/brw_vec4_gs.c   |  2 +-
 src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp | 12 +++-
 src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.h   |  6 --
 src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp|  7 ++-
 src/mesa/drivers/dri/i965/brw_vec4_vs_visitor.cpp |  6 --
 src/mesa/drivers/dri/i965/brw_vs.c|  2 +-
 src/mesa/drivers/dri/i965/brw_vs.h|  6 --
 9 files changed, 41 insertions(+), 19 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_vec4.cpp 
b/src/mesa/drivers/dri/i965/brw_vec4.cpp
index 73f91a0..3da1b4d 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4.cpp
@@ -1558,7 +1558,8 @@ brw_vs_emit(struct brw_context *brw,
 struct brw_vs_compile *c,
 struct brw_vs_prog_data *prog_data,
 void *mem_ctx,
-unsigned *final_assembly_size)
+unsigned *final_assembly_size,
+unsigned param_count)
 {
bool start_busy = false;
float start_time = 0;
@@ -1585,7 +1586,7 @@ brw_vs_emit(struct brw_context *brw,
   }
}
 
-   vec4_vs_visitor v(brw, c, prog_data, prog, shader, mem_ctx);
+   vec4_vs_visitor v(brw, c, prog_data, prog, shader, mem_ctx, param_count);
if (!v.run()) {
   if (prog) {
  prog-LinkStatus = false;
diff --git a/src/mesa/drivers/dri/i965/brw_vec4.h 
b/src/mesa/drivers/dri/i965/brw_vec4.h
index 5cec9f9..552ca55 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4.h
+++ b/src/mesa/drivers/dri/i965/brw_vec4.h
@@ -231,7 +231,8 @@ public:
struct brw_shader *shader,
void *mem_ctx,
 bool debug_flag,
-bool no_spills);
+bool no_spills,
+unsigned param_count);
~vec4_visitor();
 
dst_reg dst_null_f()
@@ -325,8 +326,9 @@ public:
 */
dst_reg output_reg[BRW_VARYING_SLOT_COUNT];
const char *output_reg_annotation[BRW_VARYING_SLOT_COUNT];
-   int uniform_size[MAX_UNIFORMS];
-   int uniform_vector_size[MAX_UNIFORMS];
+   unsigned uniform_param_count;
+   int* uniform_size;
+   int* uniform_vector_size;
int uniforms;
 
src_reg shader_start_time;
@@ -546,6 +548,12 @@ private:
 * If true, then register allocation should fail instead of spilling.
 */
const bool no_spills;
+
+   /*
+* Make noncopyable.
+*/
+   vec4_visitor(const vec4_visitor);
+   vec4_visitor operator=(const vec4_visitor);
 };
 
 
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_gs.c 
b/src/mesa/drivers/dri/i965/brw_vec4_gs.c
index b52d646..b0b77d1 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_gs.c
+++ b/src/mesa/drivers/dri/i965/brw_vec4_gs.c
@@ -213,7 +213,7 @@ do_gs_prog(struct brw_context *brw,
void *mem_ctx = ralloc_context(NULL);
unsigned program_size;
const unsigned *program =
-  brw_gs_emit(brw, prog, c, mem_ctx, program_size);
+  brw_gs_emit(brw, prog, c, mem_ctx, program_size, param_count);
if (program == NULL) {
   ralloc_free(mem_ctx);
   return false;
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp 
b/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp
index adbb1cf..16c05f5 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp
@@ -38,10 +38,11 @@ vec4_gs_visitor::vec4_gs_visitor(struct brw_context *brw,
  struct gl_shader_program *prog,
  struct brw_shader *shader,
  void *mem_ctx,
- bool no_spills)
+ bool no_spills,
+ unsigned param_count)
: vec4_visitor(brw, c-base, c-gp-program.Base, c-key.base,
   c-prog_data.base, prog, shader, mem_ctx,
-  INTEL_DEBUG  DEBUG_GS, no_spills),
+  INTEL_DEBUG  DEBUG_GS, no_spills, param_count),
  c(c)
 {
 }
@@ -539,7 +540,8 @@ brw_gs_emit(struct brw_context *brw,
 struct gl_shader_program *prog,
 struct brw_gs_compile *c,
 void *mem_ctx,
-unsigned *final_assembly_size)
+unsigned *final_assembly_size,
+unsigned param_count)
 {
struct brw_shader *shader =
   (brw_shader *) prog-_LinkedShaders[MESA_SHADER_GEOMETRY];
@@ -556,7 +558,7 @@ brw_gs_emit(struct brw_context *brw,
if (likely(!(INTEL_DEBUG  DEBUG_NO_DUAL_OBJECT_GS))) {
   c-prog_data.dual_instanced_dispatch = false;
 
-  vec4_gs_visitor v(brw, c, prog, shader, mem_ctx, true /* no_spills */);
+  vec4_gs_visitor v(brw, c, prog, shader, mem_ctx, true /* no_spills */, 
param_count);
   if (v.run()) {
  vec4_generator g(brw, prog,

[Mesa-dev] [PATCH 2/2] i965: Assert array index on access to vec4_visitor's arrays.

2013-11-22 Thread Petri Latvala

Signed-off-by: Petri Latvala petri.latv...@intel.com
---
 src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp | 6 ++
 1 file changed, 6 insertions(+)

diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp 
b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
index df38dab..511b080 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
@@ -662,6 +662,7 @@ vec4_visitor::setup_uniform_values(ir_variable *ir)
storage-type-matrix_columns);
 
   for (unsigned s = 0; s  vector_count; s++) {
+ assert(uniforms  uniform_param_count);
  uniform_vector_size[uniforms] = storage-type-vector_elements;
 
  int i;
@@ -685,6 +686,7 @@ vec4_visitor::setup_uniform_clipplane_values()
gl_clip_plane *clip_planes = brw_select_clip_planes(ctx);
 
for (int i = 0; i  key-nr_userclip_plane_consts; ++i) {
+  assert(this-uniforms  uniform_param_count);
   this-uniform_vector_size[this-uniforms] = 4;
   this-userplane[i] = dst_reg(UNIFORM, this-uniforms);
   this-userplane[i].type = BRW_REGISTER_TYPE_F;
@@ -715,6 +717,7 @@ vec4_visitor::setup_builtin_uniform_values(ir_variable *ir)
(gl_state_index *)slots[i].tokens);
   float *values = this-prog-Parameters-ParameterValues[index][0].f;
 
+  assert(this-uniforms  uniform_param_count);
   this-uniform_vector_size[this-uniforms] = 0;
   /* Add each of the unique swizzled channels of the element.
* This will end up matching the size of the glsl_type of this field.
@@ -725,6 +728,7 @@ vec4_visitor::setup_builtin_uniform_values(ir_variable *ir)
 last_swiz = swiz;
 
 prog_data-param[this-uniforms * 4 + j] = values[swiz];
+assert(this-uniforms  uniform_param_count);
 if (swiz = last_swiz)
this-uniform_vector_size[this-uniforms]++;
   }
@@ -983,6 +987,7 @@ vec4_visitor::visit(ir_variable *ir)
   /* Track how big the whole uniform variable is, in case we need to put a
* copy of its data into pull constants for array access.
*/
+  assert(this-uniforms  uniform_param_count);
   this-uniform_size[this-uniforms] = type_size(ir-type);
 
   if (!strncmp(ir-name, gl_, 3)) {
@@ -3197,6 +3202,7 @@ 
vec4_visitor::move_uniform_array_access_to_pull_constants()
 
pull_constant_loc[uniform] = prog_data-nr_pull_params / 4;
 
+   assert(uniform  uniform_param_count);
for (int j = 0; j  uniform_size[uniform] * 4; j++) {
   prog_data-pull_param[prog_data-nr_pull_params++]
   = values[j];
-- 
1.8.4.rc3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 0/2] v2: Fix array overrun with too many uniforms

2013-11-22 Thread Petri Latvala

Second attempt at fixing
https://bugs.freedesktop.org/show_bug.cgi?id=71254

vec4_visitor::uniform_size and vec4_visitor::uniform_vector_size are
now allocated dynamically based on param_count from do_vs_prog.

I also made vec4_visitor noncopyable. It is not copied currently, and
to my understanding copying it would be nonsense anyway.

The passed param_count is saved in vec4_visitor for the asserts in
patch 2. That could be wrapped in #ifndef NDEBUG but I wasn't sure
whether that is desired. It would change the ABI for that class
depending on compile flags, but the only use for it is internal.


Petri Latvala (2):
  i965: Allocate vec4_visitor's uniform_size and uniform_vector_size
arrays dynamically.
  i965: Assert array index on access to vec4_visitor's arrays.

 src/mesa/drivers/dri/i965/brw_vec4.cpp|  5 +++--
 src/mesa/drivers/dri/i965/brw_vec4.h  | 14 +++---
 src/mesa/drivers/dri/i965/brw_vec4_gs.c   |  2 +-
 src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp | 12 +++-
 src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.h   |  6 --
 src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp| 13 -
 src/mesa/drivers/dri/i965/brw_vec4_vs_visitor.cpp |  6 --
 src/mesa/drivers/dri/i965/brw_vs.c|  2 +-
 src/mesa/drivers/dri/i965/brw_vs.h|  6 --
 9 files changed, 47 insertions(+), 19 deletions(-)

-- 
1.8.4.rc3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH] mesa: Remove the ralloc canary on release builds.

2013-11-22 Thread Eric Anholt

The canary is basically just to give a better debugging message when you
ralloc_free() something that wasn't rallocated.  Reduces maximum memory
usage of apitrace replay of the dota2 demo by 60MB on my 64-bit system (so
half that on a real 32-bit dota2 environment).
---
 src/glsl/ralloc.c | 6 ++
 1 file changed, 6 insertions(+)

diff --git a/src/glsl/ralloc.c b/src/glsl/ralloc.c
index e79dad7..36bc61f 100644
--- a/src/glsl/ralloc.c
+++ b/src/glsl/ralloc.c
@@ -53,8 +53,10 @@ _CRTIMP int _vscprintf(const char *format, va_list argptr);
 
 struct ralloc_header
 {
+#ifdef DEBUG
/* A canary value used to determine whether a pointer is ralloc'd. */
unsigned canary;
+#endif
 
struct ralloc_header *parent;
 
@@ -78,7 +80,9 @@ get_header(const void *ptr)
 {
ralloc_header *info = (ralloc_header *) (((char *) ptr) -
sizeof(ralloc_header));
+#ifdef DEBUG
assert(info-canary == CANARY);
+#endif
return info;
 }
 
@@ -117,7 +121,9 @@ ralloc_size(const void *ctx, size_t size)
 
add_child(parent, info);
 
+#ifdef DEBUG
info-canary = CANARY;
+#endif
 
return PTR_FROM_HEADER(info);
 }
-- 
1.8.4.rc3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH] mesa: Remove the ralloc canary on release builds.

2013-11-22 Thread Kenneth Graunke

On 11/22/2013 12:21 AM, Eric Anholt wrote:
 The canary is basically just to give a better debugging message when you
 ralloc_free() something that wasn't rallocated.  Reduces maximum memory
 usage of apitrace replay of the dota2 demo by 60MB on my 64-bit system (so
 half that on a real 32-bit dota2 environment).

Thanks so much for doing the measurement on this.  Obvious improvement,
and frankly I'm embarrassed that I didn't think to do this years ago. :)

Reviewed-by: Kenneth Graunke kenn...@whitecape.org

 ---
  src/glsl/ralloc.c | 6 ++
  1 file changed, 6 insertions(+)
 
 diff --git a/src/glsl/ralloc.c b/src/glsl/ralloc.c
 index e79dad7..36bc61f 100644
 --- a/src/glsl/ralloc.c
 +++ b/src/glsl/ralloc.c
 @@ -53,8 +53,10 @@ _CRTIMP int _vscprintf(const char *format, va_list argptr);
  
  struct ralloc_header
  {
 +#ifdef DEBUG
 /* A canary value used to determine whether a pointer is ralloc'd. */
 unsigned canary;
 +#endif
  
 struct ralloc_header *parent;
  
 @@ -78,7 +80,9 @@ get_header(const void *ptr)
  {
 ralloc_header *info = (ralloc_header *) (((char *) ptr) -
   sizeof(ralloc_header));
 +#ifdef DEBUG
 assert(info-canary == CANARY);
 +#endif
 return info;
  }
  
 @@ -117,7 +121,9 @@ ralloc_size(const void *ctx, size_t size)
  
 add_child(parent, info);
  
 +#ifdef DEBUG
 info-canary = CANARY;
 +#endif
  
 return PTR_FROM_HEADER(info);
  }
 

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH] mesa: Remove the ralloc canary on release builds.

2013-11-22 Thread Kenneth Graunke

On 11/22/2013 12:21 AM, Eric Anholt wrote:
 The canary is basically just to give a better debugging message when you
 ralloc_free() something that wasn't rallocated.  Reduces maximum memory
 usage of apitrace replay of the dota2 demo by 60MB on my 64-bit system (so
 half that on a real 32-bit dota2 environment).

Really, half?  It's an unsigned...that's 4 bytes regardless of 64-bit
vs. 32-bit.  I think this should be 60MB of savings, end of story.

--Ken
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [Bug 50754] Building 32 bit mesa on 64 bit OS fails since change for automake

2013-11-22 Thread bugzilla-daemon

https://bugs.freedesktop.org/show_bug.cgi?id=50754

--- Comment #26 from Tapani Pälli lem...@gmail.com ---
(In reply to comment #25)
 Created attachment 89621 [details] [review]
 Move LT_INIT where it should so it can test correctly (AM_)C(XX)FLAGS and
 others
 
 Updated patch against latest git code.
 Based on previous patch with further explanations.
 
 LT_INIT needs to work with the final content of variables to be able to
 correctly run some tests and determine the host type. So it must be called
 after setting anything we want to set in (AM_)C(XX)FLAGS or (AM_)LDFLAGS.
 
 Fixing LT_INIT must be done independently from modifying environment
 variables or build variables since LT_INIT will check both variables and
 shadow variables if they are set.
 
 Tested on my setup and it works. I'd like someone else to also test it.
 
 Another patch should be made to fix llvm-config, which is another topic.

Yep I verified this on my machine, the patch still works with the comments :)

-- 
You are receiving this mail because:
You are the assignee for the bug.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [Bug 71591] Second Life shaders fail to compile (extension declared in middle of shader)

2013-11-22 Thread bugzilla-daemon

https://bugs.freedesktop.org/show_bug.cgi?id=71591

Eero Tamminen eero.t.tammi...@intel.com changed:

   What|Removed |Added

Summary|Second Life shaders fail to |Second Life shaders fail to
   |compile |compile (extension declared
   ||in middle of shader)

--- Comment #7 from Eero Tamminen eero.t.tammi...@intel.com ---
Got a comment from Ian  Kenneth on what's the Mesa policy for things that are
against GL spec, but accepted by competing driver(s):
---
1. Follow the spec.

2. If nobody follows the spec, get the spec changed.  Goto 1.

With a rare case of:

Add driconf workaround for broken app, but only if:
- People still care about broken app
- App will never get fixed


According to comment 1), ATI driver doesn't follow the spec.
What about Nvidia driver?

-- 
You are receiving this mail because:
You are the assignee for the bug.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH] dri3, i915, i965: Add __DRI_IMAGE_FOURCC_SARGB8888

2013-11-22 Thread Daniel Vetter

On Thu, Nov 21, 2013 at 08:12:04PM -0800, Keith Packard wrote:
 The __DRIimage createImageFromFds function takes a fourcc code, but there was
 no fourcc code that match __DRI_IMAGE_FORMAT_SARGB8. This adds a define for
 that format, adds a translation in DRI3 from __DRI_IMAGE_FORMAT_SARGB8 to
 __DRI_IMAGE_FOURCC_SARGB and then adds translations *back* to
 __IMAGE_FORMAT_SARGB8 in both the i915 and i965 drivers.
 
 I'll refrain from comments on whether I think having two separate sets of
 format defines in dri_interface.h is a good idea or not...
 
 Signed-off-by: Keith Packard kei...@keithp.com

Hm, where do we have the canonical source for all these fourcc codes? I'm
asking since we have our own copy in the kernel as drm_fourcc.h, and that
one is part of the userspace ABI since we use it to pass around
framebuffer formats and format lists.

Just afraid to create long-term maintainance madness here with the
kernel's iron thou-shalt-not-break-userspace-ever rule ... Not likely
we'll ever accept srgb for framebuffers though.
-Daniel

 ---
 
 This gets iceweasel running with the GL compositor enabled.
 
  include/GL/internal/dri_interface.h  | 1 +
  src/glx/dri3_glx.c   | 1 +
  src/mesa/drivers/dri/i915/intel_screen.c | 3 +++
  src/mesa/drivers/dri/i965/intel_screen.c | 3 +++
  4 files changed, 8 insertions(+)
 
 diff --git a/include/GL/internal/dri_interface.h 
 b/include/GL/internal/dri_interface.h
 index b012570..a4387c4 100644
 --- a/include/GL/internal/dri_interface.h
 +++ b/include/GL/internal/dri_interface.h
 @@ -1034,6 +1034,7 @@ struct __DRIdri2ExtensionRec {
  #define __DRI_IMAGE_FOURCC_XRGB  0x34325258
  #define __DRI_IMAGE_FOURCC_ABGR  0x34324241
  #define __DRI_IMAGE_FOURCC_XBGR  0x34324258
 +#define __DRI_IMAGE_FOURCC_SARGB0x83324258
  #define __DRI_IMAGE_FOURCC_YUV4100x39565559
  #define __DRI_IMAGE_FOURCC_YUV4110x31315559
  #define __DRI_IMAGE_FOURCC_YUV4200x32315559
 diff --git a/src/glx/dri3_glx.c b/src/glx/dri3_glx.c
 index b047cc8..5861317 100644
 --- a/src/glx/dri3_glx.c
 +++ b/src/glx/dri3_glx.c
 @@ -890,6 +890,7 @@ image_format_to_fourcc(int format)
  
 /* Convert from __DRI_IMAGE_FORMAT to __DRI_IMAGE_FOURCC (sigh) */
 switch (format) {
 +   case __DRI_IMAGE_FORMAT_SARGB8: return __DRI_IMAGE_FOURCC_SARGB;
 case __DRI_IMAGE_FORMAT_RGB565: return __DRI_IMAGE_FOURCC_RGB565;
 case __DRI_IMAGE_FORMAT_XRGB: return __DRI_IMAGE_FOURCC_XRGB;
 case __DRI_IMAGE_FORMAT_ARGB: return __DRI_IMAGE_FOURCC_ARGB;
 diff --git a/src/mesa/drivers/dri/i915/intel_screen.c 
 b/src/mesa/drivers/dri/i915/intel_screen.c
 index 7f1fc6b..2dd2bc7 100644
 --- a/src/mesa/drivers/dri/i915/intel_screen.c
 +++ b/src/mesa/drivers/dri/i915/intel_screen.c
 @@ -184,6 +184,9 @@ static struct intel_image_format intel_image_formats[] = {
 { __DRI_IMAGE_FOURCC_ARGB, __DRI_IMAGE_COMPONENTS_RGBA, 1,
   { { 0, 0, 0, __DRI_IMAGE_FORMAT_ARGB, 4 } } },
  
 +   { __DRI_IMAGE_FOURCC_SARGB, __DRI_IMAGE_COMPONENTS_RGBA, 1,
 + { { 0, 0, 0, __DRI_IMAGE_FORMAT_SARGB8, 4 } } },
 +
 { __DRI_IMAGE_FOURCC_XRGB, __DRI_IMAGE_COMPONENTS_RGB, 1,
   { { 0, 0, 0, __DRI_IMAGE_FORMAT_XRGB, 4 }, } },
  
 diff --git a/src/mesa/drivers/dri/i965/intel_screen.c 
 b/src/mesa/drivers/dri/i965/intel_screen.c
 index e44d0f6..cf8c3e2 100644
 --- a/src/mesa/drivers/dri/i965/intel_screen.c
 +++ b/src/mesa/drivers/dri/i965/intel_screen.c
 @@ -220,6 +220,9 @@ static struct intel_image_format intel_image_formats[] = {
 { __DRI_IMAGE_FOURCC_ARGB, __DRI_IMAGE_COMPONENTS_RGBA, 1,
   { { 0, 0, 0, __DRI_IMAGE_FORMAT_ARGB, 4 } } },
  
 +   { __DRI_IMAGE_FOURCC_SARGB, __DRI_IMAGE_COMPONENTS_RGBA, 1,
 + { { 0, 0, 0, __DRI_IMAGE_FORMAT_SARGB8, 4 } } },
 +
 { __DRI_IMAGE_FOURCC_XRGB, __DRI_IMAGE_COMPONENTS_RGB, 1,
   { { 0, 0, 0, __DRI_IMAGE_FORMAT_XRGB, 4 }, } },
  
 -- 
 1.8.4.2
 
 ___
 mesa-dev mailing list
 mesa-dev@lists.freedesktop.org
 http://lists.freedesktop.org/mailman/listinfo/mesa-dev

-- 
Daniel Vetter
Software Engineer, Intel Corporation
+41 (0) 79 365 57 48 - http://blog.ffwll.ch
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [Bug 71912] Compounded macros in shaders: huge memory consumption

2013-11-22 Thread bugzilla-daemon

https://bugs.freedesktop.org/show_bug.cgi?id=71912

--- Comment #1 from Kevin Rogovin kevin.rogo...@intel.com ---
Created attachment 89626
  -- https://bugs.freedesktop.org/attachment.cgi?id=89626action=edit
paired fragment shader

-- 
You are receiving this mail because:
You are the assignee for the bug.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH] dri3, i915, i965: Add __DRI_IMAGE_FOURCC_SARGB8888

2013-11-22 Thread Keith Packard

Daniel Vetter dan...@ffwll.ch writes:

 Hm, where do we have the canonical source for all these fourcc codes? I'm
 asking since we have our own copy in the kernel as drm_fourcc.h, and that
 one is part of the userspace ABI since we use it to pass around
 framebuffer formats and format lists.

I think it's the kernel? I really don't know, as the whole notion of
fourcc codes seems crazy to me...

Feel free to steal this code and stick it in the kernel if you like.

 Just afraid to create long-term maintainance madness here with the
 kernel's iron thou-shalt-not-break-userspace-ever rule ... Not likely
 we'll ever accept srgb for framebuffers though.

Would suck to collide with something we do want though.

-- 
keith.pack...@intel.com


pgpf4F9vCQb7I.pgp
Description: PGP signature
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [Bug 71912] New: Compounded macros in shaders: huge memory consumption

2013-11-22 Thread bugzilla-daemon

https://bugs.freedesktop.org/show_bug.cgi?id=71912

  Priority: medium
Bug ID: 71912
  Assignee: mesa-dev@lists.freedesktop.org
   Summary: Compounded macros in shaders: huge memory consumption
  Severity: normal
Classification: Unclassified
OS: All
  Reporter: kevin.rogo...@intel.com
  Hardware: Other
Status: NEW
   Version: 9.1
 Component: Mesa core
   Product: Mesa

Created attachment 89625
  -- https://bugs.freedesktop.org/attachment.cgi?id=89625action=edit
Vertex shader that has deep macro

The attached shader causes literally gigabytes upon gigabytes of memory to be
consumed when submitted to Mesa. The shader cause is because it compounds
macros many levels deep. Mesa is at the stage of applying the preprocessor and
does not get to even parsing the shader, much less compiling it.

The main issue is for WebGL enabled browsers. Indeed, a malicious website could
submit such a shader (which is quite small) and bring the user's system to a
standstill halt almost.

A possible fix is to essentially add additional logic to the preprocessor
computing the complexity multiplier of a macro and if that complexity
multiplies is too high to abort with a message that macro expands into a too
large expression.

-- 
You are receiving this mail because:
You are the assignee for the bug.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [Bug 71912] Compounded macros in shaders: huge memory consumption

2013-11-22 Thread bugzilla-daemon

https://bugs.freedesktop.org/show_bug.cgi?id=71912

--- Comment #2 from Kenneth Graunke kenn...@whitecape.org ---
Nice find.  Are you planning to try and fix this bug?

-- 
You are receiving this mail because:
You are the assignee for the bug.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [Bug 71912] Compounded macros in shaders: huge memory consumption

2013-11-22 Thread bugzilla-daemon

https://bugs.freedesktop.org/show_bug.cgi?id=71912

Kenneth Graunke kenn...@whitecape.org changed:

   What|Removed |Added

   Assignee|mesa-dev@lists.freedesktop. |i...@freedesktop.org
   |org |
 QA Contact||intel-3d-bugs@lists.freedes
   ||ktop.org
  Component|Mesa core   |glsl-compiler

--- Comment #3 from Kenneth Graunke kenn...@whitecape.org ---
Since this is a preprocessor issue, changing the component to glsl-compiler and
reassigning to Carl as a default assignee.

-- 
You are receiving this mail because:
You are the assignee for the bug.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH] llvmpipe: support 8bit subpixel precision

2013-11-22 Thread Roland Scheidegger


On 11/22/2013 05:29 AM, Zack Rusin wrote:

For me too, other than the fixed_position members, looks good.  Thanks for
your perseverance on this Zack!


Thanks! ok, attached is a version that makes position and dx/dy 32bit again, it 
seems to work great. I have a question for you guys if you run the piglits:
./bin/triangle-rasterization-overdraw -max_size -seed 0xA8402F24 -count 1 -auto
on master does it fail for you? It fails for me on master, with and without the 
patch. I'm not sure what to make of it, I might have been looking at 
rasterization for too long. Looking at the rendering it looks correct.

z



Looks good to me.
The test you mentioned certainly fails for me too.

Roland
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH] intel: Track known prime buffers for re-use

2013-11-22 Thread Keith Packard

If the application sends us a file descriptor pointing at a prime
buffer that we've already got, we have to re-use the same bo_gem
structure or chaos will result.

Track the set of all known prime objects and look to see if the kernel
has returned one of those for a new file descriptor.

Signed-off-by: Keith Packard kei...@keithp.com
---

This one took a while to find -- multiple bo_gem structs pointing at
the same gem handle would either cause the object to be destroyed
before we were done using it, or we'd end up sending the same
gem_handle for multiple buffers.

This looks a lot like the named object handling stuff, as one would
expect.

 intel/intel_bufmgr_gem.c | 22 +-
 1 file changed, 21 insertions(+), 1 deletion(-)

diff --git a/intel/intel_bufmgr_gem.c b/intel/intel_bufmgr_gem.c
index df6fcec..2897bb2 100644
--- a/intel/intel_bufmgr_gem.c
+++ b/intel/intel_bufmgr_gem.c
@@ -112,6 +112,7 @@ typedef struct _drm_intel_bufmgr_gem {
 
drmMMListHead named;
drmMMListHead vma_cache;
+drmMMListHead prime;
int vma_count, vma_open, vma_max;
 
uint64_t gtt_size;
@@ -2451,8 +2452,25 @@ drm_intel_bo_gem_create_from_prime(drm_intel_bufmgr 
*bufmgr, int prime_fd, int s
uint32_t handle;
drm_intel_bo_gem *bo_gem;
struct drm_i915_gem_get_tiling get_tiling;
+   drmMMListHead *list;
 
ret = drmPrimeFDToHandle(bufmgr_gem-fd, prime_fd, handle);
+
+   /*
+* See if the kernel has already returned this buffer to us. Just as
+* for named buffers, we must not create two bo's pointing at the same
+* kernel object
+*/
+   for (list = bufmgr_gem-prime.next;
+list != bufmgr_gem-prime;
+list = list-next) {
+   bo_gem = DRMLISTENTRY(drm_intel_bo_gem, list, name_list);
+   if (bo_gem-gem_handle == handle) {
+   drm_intel_gem_bo_reference(bo_gem-bo);
+   return bo_gem-bo;
+   }
+   }
+
if (ret) {
  fprintf(stderr,ret is %d %d\n, ret, errno);
return NULL;
@@ -2487,8 +2505,8 @@ drm_intel_bo_gem_create_from_prime(drm_intel_bufmgr 
*bufmgr, int prime_fd, int s
bo_gem-has_error = false;
bo_gem-reusable = false;
 
-   DRMINITLISTHEAD(bo_gem-name_list);
DRMINITLISTHEAD(bo_gem-vma_list);
+   DRMLISTADDTAIL(bo_gem-name_list, bufmgr_gem-prime);
 
VG_CLEAR(get_tiling);
get_tiling.handle = bo_gem-gem_handle;
@@ -3301,5 +3319,7 @@ drm_intel_bufmgr_gem_init(int fd, int batch_size)
DRMINITLISTHEAD(bufmgr_gem-vma_cache);
bufmgr_gem-vma_max = -1; /* unlimited by default */
 
+   DRMINITLISTHEAD(bufmgr_gem-prime);
+
return bufmgr_gem-bufmgr;
 }
-- 
1.8.4.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH] gallium: Use base.stamp for all drawable invalidation checks.

2013-11-22 Thread Marek Olšák

Reviewed-by: Marek Olšák marek.ol...@amd.com

Marek

On Fri, Nov 22, 2013 at 5:20 AM, Keith Packard kei...@keithp.com wrote:
 Upper levels of the stack use base.stamp to tell when a drawable needs to be
 revalidated, but the dri state tracker was using dPriv-lastStamp. Those two,
 along with dri2.stamp, all get simultaneously incremented when a dri2
 invalidate event was delivered, and so end up containing precisely the same
 value.

 This patch doesn't change the fact that there are three variables, rather it
 switches all of the tests to use only base.stamp, which is functionally
 equivalent to the previous code.

 Then, it passes base.stamp to the image loader getBuffers function so that the
 one which is checked will get updated by the XCB special event queue used by 
 DRI3.

 Signed-off-by: Keith Packard kei...@keithp.com
 ---

 This patch makes sure that drawables get invalidated when the window
 changes size or when SwapBuffers is called; dri3 has only a single
 location to smite when things change, so we need to make sure the
 upper levels all share that location.

 This should permit the elimination of the dri2.stamp and lastStamp
 variables, which would be a nice further cleanup.

  src/gallium/state_trackers/dri/common/dri_drawable.c | 4 ++--
  src/gallium/state_trackers/dri/drm/dri2.c| 2 +-
  2 files changed, 3 insertions(+), 3 deletions(-)

 diff --git a/src/gallium/state_trackers/dri/common/dri_drawable.c 
 b/src/gallium/state_trackers/dri/common/dri_drawable.c
 index f255108..734bca2 100644
 --- a/src/gallium/state_trackers/dri/common/dri_drawable.c
 +++ b/src/gallium/state_trackers/dri/common/dri_drawable.c
 @@ -73,7 +73,7 @@ dri_st_framebuffer_validate(struct st_context_iface *stctx,
  * checked.
  */
 do {
 -  lastStamp = drawable-dPriv-lastStamp;
 +  lastStamp = drawable-base.stamp;
new_stamp = (drawable-texture_stamp != lastStamp);

if (new_stamp || new_mask || screen-broken_invalidate) {
 @@ -91,7 +91,7 @@ dri_st_framebuffer_validate(struct st_context_iface *stctx,
   drawable-texture_stamp = lastStamp;
   drawable-texture_mask = statt_mask;
}
 -   } while (lastStamp != drawable-dPriv-lastStamp);
 +   } while (lastStamp != drawable-base.stamp);

 if (!out)
return TRUE;
 diff --git a/src/gallium/state_trackers/dri/drm/dri2.c 
 b/src/gallium/state_trackers/dri/drm/dri2.c
 index 6a56cd4..c7e4151 100644
 --- a/src/gallium/state_trackers/dri/drm/dri2.c
 +++ b/src/gallium/state_trackers/dri/drm/dri2.c
 @@ -545,7 +545,7 @@ dri_image_allocate_textures(struct dri_context *ctx,

 (*sPriv-image.loader-getBuffers) (dPriv,
 image_format,
 -   dPriv-dri2.stamp,
 +   (uint32_t *) drawable-base.stamp,
 dPriv-loaderPrivate,
 buffer_mask,
 images);
 --
 1.8.4.2

 ___
 mesa-dev mailing list
 mesa-dev@lists.freedesktop.org
 http://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 0/2] Minor __DRIimage fixes for i965

2013-11-22 Thread Keith Packard

While debugging the libdrm duplicate buffer object adventure, I managed to
temporarily understand object lifetimes in the __DRIimage getBuffers path, and
also to compare that to the DRI2 getBuffers path. Here are a couple of small
fixes.

I haven't looked at the i915 code, but I suspect the first one, and parts of
the second one would apply.

 [PATCH 1/2] i965: Correct check for re-bound buffer in

The code was attempting to avoid re-creating the miptree when the loader
handed back the same bufffer we already had, but it was confused about how to
tell -- turns out the object actually shared between the loader and the driver
is the BO, not the region. And so, we compare BO pointers to see if the image
buffer needs to have a new miptree.

 [PATCH 2/2] i965: Set fast color clear mcs_state on newly allocated

Here's a chunk of code that was just missing from the __DRIimage path as a
result of rebasing across some new driver additions.

Both of these fixes are candidates for 10.0 as they affect the __DRIimage
paths now used by Wayland.

-keith
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 1/2] i965: Correct check for re-bound buffer in intel_update_image_buffer

2013-11-22 Thread Keith Packard

The buffer-object is the persistent thing passed through the loader, so when
updating an image buffer, check to see if it is already bound to the provided
bo. The region, on the other hand, is allocated separately for the miptree,
and so will never be the same as that passed back from the loader.

Signed-off-by: Keith Packard kei...@keithp.com
---
 src/mesa/drivers/dri/i965/brw_context.c | 19 +++
 1 file changed, 15 insertions(+), 4 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_context.c 
b/src/mesa/drivers/dri/i965/brw_context.c
index bee98e3..64ff855 100644
--- a/src/mesa/drivers/dri/i965/brw_context.c
+++ b/src/mesa/drivers/dri/i965/brw_context.c
@@ -1339,10 +1339,21 @@ intel_update_image_buffer(struct brw_context *intel,
 
unsigned num_samples = rb-Base.Base.NumSamples;
 
-   if (rb-mt 
-   rb-mt-region 
-   rb-mt-region == region)
-  return;
+   /* Check and see if we're already bound to the right
+* buffer object
+*/
+   if (num_samples == 0) {
+   if (rb-mt 
+   rb-mt-region 
+   rb-mt-region-bo == region-bo)
+  return;
+   } else {
+   if (rb-mt 
+   rb-mt-singlesample_mt 
+   rb-mt-singlesample_mt-region 
+   rb-mt-singlesample_mt-region-bo == region-bo)
+  return;
+   }
 
intel_miptree_release(rb-mt);
rb-mt = intel_miptree_create_for_image_buffer(intel,
-- 
1.8.4.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 2/2] i965: Set fast color clear mcs_state on newly allocated image miptrees

2013-11-22 Thread Keith Packard

Just copying code from the dri2 path to set up the fast color clear state.

This also removes a couple of bogus intel_region_reference calls.

Signed-off-by: Keith Packard kei...@keithp.com
---
 src/mesa/drivers/dri/i965/intel_mipmap_tree.c | 10 +++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c 
b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
index 884ddef..ca78f32 100644
--- a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
+++ b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
@@ -762,7 +762,13 @@ intel_miptree_create_for_image_buffer(struct brw_context 
*intel,
if (!singlesample_mt)
   return NULL;
 
-   intel_region_reference(singlesample_mt-region, region);
+   /* If this miptree is capable of supporting fast color clears, set
+* mcs_state appropriately to ensure that fast clears will occur.
+* Allocation of the MCS miptree will be deferred until the first fast
+* clear actually occurs.
+*/
+   if (intel_is_non_msrt_mcs_buffer_supported(intel, singlesample_mt))
+  singlesample_mt-mcs_state = INTEL_MCS_STATE_RESOLVED;
 
if (num_samples == 0)
   return singlesample_mt;
@@ -780,8 +786,6 @@ intel_miptree_create_for_image_buffer(struct brw_context 
*intel,
multisample_mt-singlesample_mt = singlesample_mt;
multisample_mt-need_downsample = false;
 
-   intel_region_reference(multisample_mt-region, region);
-
if (intel-is_front_buffer_rendering  buffer_type == 
__DRI_IMAGE_BUFFER_FRONT) {
   intel_miptree_upsample(intel, multisample_mt);
}
-- 
1.8.4.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 0/2] Minor __DRIimage fixes for i965

2013-11-22 Thread Paul Berry

On 22 November 2013 05:52, Keith Packard kei...@keithp.com wrote:

 While debugging the libdrm duplicate buffer object adventure, I managed to
 temporarily understand object lifetimes in the __DRIimage getBuffers path,
 and
 also to compare that to the DRI2 getBuffers path. Here are a couple of
 small
 fixes.

 I haven't looked at the i915 code, but I suspect the first one, and parts
 of
 the second one would apply.

  [PATCH 1/2] i965: Correct check for re-bound buffer in

 The code was attempting to avoid re-creating the miptree when the loader
 handed back the same bufffer we already had, but it was confused about how
 to
 tell -- turns out the object actually shared between the loader and the
 driver
 is the BO, not the region. And so, we compare BO pointers to see if the
 image
 buffer needs to have a new miptree.

  [PATCH 2/2] i965: Set fast color clear mcs_state on newly allocated

 Here's a chunk of code that was just missing from the __DRIimage path as a
 result of rebasing across some new driver additions.

 Both of these fixes are candidates for 10.0 as they affect the __DRIimage
 paths now used by Wayland.


For future reference, patches that are candidates for the latest stable
branch should include the line

Cc: mesa-sta...@lists.freedesktop.org

in the patch.  (see
http://lists.freedesktop.org/archives/mesa-dev/2013-August/042667.html)

I've forwarded this email to mesa-sta...@lists.freedesktop.org, which
should also be sufficient.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 2/2] tgsi: Prevent emission of instructions with empty writemask.

2013-11-22 Thread Jose Fonseca

Thanks for reviewing.

- Original Message -
 On 11/21/2013 02:01 PM, jfons...@vmware.com wrote:
  From: José Fonseca jfons...@vmware.com
 
  These degenerate instructions can often be emitted by state trackers
  when the semantics of instructions don't match precisely.
  ---
src/gallium/auxiliary/tgsi/tgsi_ureg.c |  8 
src/gallium/auxiliary/tgsi/tgsi_ureg.h | 22 ++
2 files changed, 30 insertions(+)
 
  diff --git a/src/gallium/auxiliary/tgsi/tgsi_ureg.c
  b/src/gallium/auxiliary/tgsi/tgsi_ureg.c
  index 432ed00..f06858e 100644
  --- a/src/gallium/auxiliary/tgsi/tgsi_ureg.c
  +++ b/src/gallium/auxiliary/tgsi/tgsi_ureg.c
  @@ -1113,6 +1113,10 @@ ureg_insn(struct ureg_program *ureg,
   boolean negate = FALSE;
   unsigned swizzle[4] = { 0 };
 
  +   if (nr_dst  ureg_dst_is_empty(dst[0])) {
  +  return;
  +   }
 
 This is technically not really correct, as the instruction may have
 multiple destinations.
 Not saying we really have any such instructions (or even that we should
 have them), but this helper (and the one below) has code to deal with
 multiple destinations, so that's a bit inconsistent.

The assumption there is only one dst is already there. See the code immediately 
below


   saturate = nr_dst ? dst[0].Saturate : FALSE;
   predicate = nr_dst ? dst[0].Predicate : FALSE;

I rather commit this as is now, as the multiple destination is not a real 
problem ATM, while the empty writemask do seem to arise due to me earlier 
commit.

 Also, there are instructions which have just one dst reg but side
 effects, though it may be restricted to OPCODE_POPA (and I wouldn't mind
 seeing it disappear) (I think the fence/atomic instructions might be
 fine and at first glance I don't see anything else).

POPA is not used anywhere and can be removed.  I'll do that in another commit.

Jose

 
   saturate = nr_dst ? dst[0].Saturate : FALSE;
   predicate = nr_dst ? dst[0].Predicate : FALSE;
   if (predicate) {
  @@ -1162,6 +1166,10 @@ ureg_tex_insn(struct ureg_program *ureg,
   boolean negate = FALSE;
   unsigned swizzle[4] = { 0 };
 
  +   if (nr_dst  ureg_dst_is_empty(dst[0])) {
  +  return;
  +   }
 Same as above.
 
   saturate = nr_dst ? dst[0].Saturate : FALSE;
   predicate = nr_dst ? dst[0].Predicate : FALSE;
   if (predicate) {
  diff --git a/src/gallium/auxiliary/tgsi/tgsi_ureg.h
  b/src/gallium/auxiliary/tgsi/tgsi_ureg.h
  index cf0c75e..d973edb 100644
  --- a/src/gallium/auxiliary/tgsi/tgsi_ureg.h
  +++ b/src/gallium/auxiliary/tgsi/tgsi_ureg.h
  @@ -454,6 +454,16 @@ ureg_imm1i( struct ureg_program *ureg,
   return ureg_DECL_immediate_int( ureg, a, 1 );
}
 
  +/* Where the destination register has a valid file, but an empty
  + * writemask.
  + */
  +static INLINE boolean
  +ureg_dst_is_empty( struct ureg_dst dst )
  +{
  +   return dst.File != TGSI_FILE_NULL 
  +  dst.WriteMask == 0;
  +}
  +
/***
 * Functions for patching up labels
 */
  @@ -650,6 +660,7 @@ static INLINE void ureg_##op( struct ureg_program
  *ureg,\
{   \
   unsigned opcode = TGSI_OPCODE_##op;  \
   struct ureg_emit_insn_result insn;   \
  +   if (ureg_dst_is_empty(dst)) return;  \
   insn = ureg_emit_insn(ureg,  \
 opcode,\
 dst.Saturate,  \
  @@ -673,6 +684,7 @@ static INLINE void ureg_##op( struct ureg_program
  *ureg,\
{   \
   unsigned opcode = TGSI_OPCODE_##op;  \
   struct ureg_emit_insn_result insn;   \
  +   if (ureg_dst_is_empty(dst)) return;  \
   insn = ureg_emit_insn(ureg,  \
 opcode,\
 dst.Saturate,  \
  @@ -697,6 +709,7 @@ static INLINE void ureg_##op( struct ureg_program
  *ureg,\
{   \
   unsigned opcode = TGSI_OPCODE_##op;  \
   struct ureg_emit_insn_result insn;   \
  +   if (ureg_dst_is_empty(dst)) return;  \
   insn = ureg_emit_insn(ureg,  \
 opcode,\
 dst.Saturate,

Re: [Mesa-dev] [PATCH] llvmpipe: support 8bit subpixel precision

2013-11-22 Thread Jose Fonseca



- Original Message -
 On 11/22/2013 05:29 AM, Zack Rusin wrote:
  For me too, other than the fixed_position members, looks good.  Thanks for
  your perseverance on this Zack!
 
  Thanks! ok, attached is a version that makes position and dx/dy 32bit
  again, it seems to work great. I have a question for you guys if you run
  the piglits:
  ./bin/triangle-rasterization-overdraw -max_size -seed 0xA8402F24 -count 1
  -auto
  on master does it fail for you? It fails for me on master, with and without
  the patch. I'm not sure what to make of it, I might have been looking at
  rasterization for too long. Looking at the rendering it looks correct.
 
  z
 
 
 Looks good to me.
 The test you mentioned certainly fails for me too.
 
 Roland
 

I get 

  Got spurious window resize in automatic run (8192,8192 to 1000,1000)

So I can't really run the test.  I think we need a fbo mode for this test too.

Jose
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 2/2] tgsi: Prevent emission of instructions with empty writemask.

2013-11-22 Thread Roland Scheidegger


On 11/22/2013 03:03 PM, Jose Fonseca wrote:

Thanks for reviewing.

- Original Message -

On 11/21/2013 02:01 PM, jfons...@vmware.com wrote:

From: José Fonseca jfons...@vmware.com

These degenerate instructions can often be emitted by state trackers
when the semantics of instructions don't match precisely.
---
   src/gallium/auxiliary/tgsi/tgsi_ureg.c |  8 
   src/gallium/auxiliary/tgsi/tgsi_ureg.h | 22 ++
   2 files changed, 30 insertions(+)

diff --git a/src/gallium/auxiliary/tgsi/tgsi_ureg.c
b/src/gallium/auxiliary/tgsi/tgsi_ureg.c
index 432ed00..f06858e 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_ureg.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_ureg.c
@@ -1113,6 +1113,10 @@ ureg_insn(struct ureg_program *ureg,
  boolean negate = FALSE;
  unsigned swizzle[4] = { 0 };

+   if (nr_dst  ureg_dst_is_empty(dst[0])) {
+  return;
+   }


This is technically not really correct, as the instruction may have
multiple destinations.
Not saying we really have any such instructions (or even that we should
have them), but this helper (and the one below) has code to deal with
multiple destinations, so that's a bit inconsistent.


The assumption there is only one dst is already there. See the code immediately 
below


Oh right! Makes me wonder if we shouldn't get rid of multi-dst business 
completely.





saturate = nr_dst ? dst[0].Saturate : FALSE;
predicate = nr_dst ? dst[0].Predicate : FALSE;

I rather commit this as is now, as the multiple destination is not a real 
problem ATM, while the empty writemask do seem to arise due to me earlier 
commit.


Also, there are instructions which have just one dst reg but side
effects, though it may be restricted to OPCODE_POPA (and I wouldn't mind
seeing it disappear) (I think the fence/atomic instructions might be
fine and at first glance I don't see anything else).


POPA is not used anywhere and can be removed.  I'll do that in another commit.

Ok thanks.

Roland




Jose




  saturate = nr_dst ? dst[0].Saturate : FALSE;
  predicate = nr_dst ? dst[0].Predicate : FALSE;
  if (predicate) {
@@ -1162,6 +1166,10 @@ ureg_tex_insn(struct ureg_program *ureg,
  boolean negate = FALSE;
  unsigned swizzle[4] = { 0 };

+   if (nr_dst  ureg_dst_is_empty(dst[0])) {
+  return;
+   }

Same as above.


  saturate = nr_dst ? dst[0].Saturate : FALSE;
  predicate = nr_dst ? dst[0].Predicate : FALSE;
  if (predicate) {
diff --git a/src/gallium/auxiliary/tgsi/tgsi_ureg.h
b/src/gallium/auxiliary/tgsi/tgsi_ureg.h
index cf0c75e..d973edb 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_ureg.h
+++ b/src/gallium/auxiliary/tgsi/tgsi_ureg.h
@@ -454,6 +454,16 @@ ureg_imm1i( struct ureg_program *ureg,
  return ureg_DECL_immediate_int( ureg, a, 1 );
   }

+/* Where the destination register has a valid file, but an empty
+ * writemask.
+ */
+static INLINE boolean
+ureg_dst_is_empty( struct ureg_dst dst )
+{
+   return dst.File != TGSI_FILE_NULL 
+  dst.WriteMask == 0;
+}
+
   /***
* Functions for patching up labels
*/
@@ -650,6 +660,7 @@ static INLINE void ureg_##op( struct ureg_program
*ureg,\
   {   \
  unsigned opcode = TGSI_OPCODE_##op;  \
  struct ureg_emit_insn_result insn;   \
+   if (ureg_dst_is_empty(dst)) return;  \
  insn = ureg_emit_insn(ureg,  \
opcode,\
dst.Saturate,  \
@@ -673,6 +684,7 @@ static INLINE void ureg_##op( struct ureg_program
*ureg,\
   {   \
  unsigned opcode = TGSI_OPCODE_##op;  \
  struct ureg_emit_insn_result insn;   \
+   if (ureg_dst_is_empty(dst)) return;  \
  insn = ureg_emit_insn(ureg,  \
opcode,\
dst.Saturate,  \
@@ -697,6 +709,7 @@ static INLINE void ureg_##op( struct ureg_program
*ureg,\
   {   \
  unsigned opcode = TGSI_OPCODE_##op;  \
  struct ureg_emit_insn_result insn;   \
+   if (ureg_dst_is_empty(dst)) return;  \
  insn = ureg_emit_insn(ureg,  \
opcode,\

Re: [Mesa-dev] [PATCH] dri3, i915, i965: Add __DRI_IMAGE_FOURCC_SARGB8888

2013-11-22 Thread Daniel Vetter

On Fri, Nov 22, 2013 at 12:01 PM, Keith Packard kei...@keithp.com wrote:
 Daniel Vetter dan...@ffwll.ch writes:

 Hm, where do we have the canonical source for all these fourcc codes? I'm
 asking since we have our own copy in the kernel as drm_fourcc.h, and that
 one is part of the userspace ABI since we use it to pass around
 framebuffer formats and format lists.

 I think it's the kernel? I really don't know, as the whole notion of
 fourcc codes seems crazy to me...

 Feel free to steal this code and stick it in the kernel if you like.

Well, I wasn't ever in favour of using fourcc codes since they're just
not standardized at all, highly redundant in some cases and also miss
lots of stuff we actually need (like all the rgb formats).

Cc'ing the heck out of this to get kernel people to hopefully notice.
Maybe someone takes charge of this ... Otherwise meh.

 Just afraid to create long-term maintainance madness here with the
 kernel's iron thou-shalt-not-break-userspace-ever rule ... Not likely
 we'll ever accept srgb for framebuffers though.

 Would suck to collide with something we do want though.

Yeah, it'd suck. But given how fourcc works we probably have that
already, just haven't noticed yet :(
-Daniel
-- 
Daniel Vetter
Software Engineer, Intel Corporation
+41 (0) 79 365 57 48 - http://blog.ffwll.ch
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 2/5] meta: fix meta clear of layered framebuffers

2013-11-22 Thread Paul Berry

On 19 November 2013 20:47, Paul Berry stereotype...@gmail.com wrote:

 From section 4.4.7 (Layered Framebuffers) of the GLSL 3.2 spec:

 When the Clear or ClearBuffer* commands are used to clear a
 layered framebuffer attachment, all layers of the attachment are
 cleared.

 This patch fixes meta clears to properly clear all layers of a layered
 framebuffer attachment.  We accomplish this by adding a geometry
 shader to the meta clear program which sets gl_Layer to a uniform
 value.  When clearing a layered framebuffer, we execute in a loop,
 setting the uniform to point to each layer in turn.

 Cc: 10.0 mesa-sta...@lists.freedesktop.org


Note: this patch seems to worsen
https://bugs.freedesktop.org/show_bug.cgi?id=71870 (causes it to crash
instead of producing bad rendering), so we shouldn't cherry-pick it to 10.0
until we've fixed the crash.

Provided that additional problems aren't discovered, the remainder of the
series should still be safe to cherry-pick to 10.0.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [v3 6/8] mesa: Fill out ARB_texture_view entry points

2013-11-22 Thread Marek Olšák

Hi Courtney,

The VIEW_CLASS_S3TC_* classes should be added too. See the section
Dependencies on EXT_texture_compression_s3tc.

Marek

On Wed, Nov 20, 2013 at 12:16 AM, Courtney Goeltzenleuchter
court...@lunarg.com wrote:
 Add Mesa TextureView logic.
 Incorporate feedback on ARB_texture_view

 Signed-off-by: Courtney Goeltzenleuchter court...@lunarg.com
 ---
  src/mesa/main/textureview.c | 562 
 +++-
  1 file changed, 561 insertions(+), 1 deletion(-)

 diff --git a/src/mesa/main/textureview.c b/src/mesa/main/textureview.c
 index 4a6bd62..1858465 100644
 --- a/src/mesa/main/textureview.c
 +++ b/src/mesa/main/textureview.c
 @@ -40,8 +40,346 @@
  #include texobj.h
  #include texstorage.h
  #include textureview.h
 +#include stdbool.h
  #include mtypes.h

 +/* Table 3.X.2 (Compatible internal formats for TextureView)
 +
 ---
 +| Class | Internal formats   
  |
 +
 ---
 +| VIEW_CLASS_128_BITS   | RGBA32F, RGBA32UI, RGBA32I 
  |
 +
 ---
 +| VIEW_CLASS_96_BITS| RGB32F, RGB32UI, RGB32I
  |
 +
 ---
 +| VIEW_CLASS_64_BITS| RGBA16F, RG32F, RGBA16UI, RG32UI, RGBA16I, 
  |
 +|   | RG32I, RGBA16, RGBA16_SNORM
  |
 +
 ---
 +| VIEW_CLASS_48_BITS| RGB16, RGB16_SNORM, RGB16F, RGB16UI, RGB16I
  |
 +
 ---
 +| VIEW_CLASS_32_BITS| RG16F, R11F_G11F_B10F, R32F,   
  |
 +|   | RGB10_A2UI, RGBA8UI, RG16UI, R32UI,
  |
 +|   | RGBA8I, RG16I, R32I, RGB10_A2, RGBA8, RG16,
  |
 +|   | RGBA8_SNORM, RG16_SNORM, SRGB8_ALPHA8, RGB9_E5 
  |
 +
 ---
 +| VIEW_CLASS_24_BITS| RGB8, RGB8_SNORM, SRGB8, RGB8UI, RGB8I 
  |
 +
 ---
 +| VIEW_CLASS_16_BITS| R16F, RG8UI, R16UI, RG8I, R16I, RG8, R16,  
  |
 +|   | RG8_SNORM, R16_SNORM   
  |
 +
 ---
 +| VIEW_CLASS_8_BITS | R8UI, R8I, R8, R8_SNORM
  |
 +
 ---
 +| VIEW_CLASS_RGTC1_RED  | COMPRESSED_RED_RGTC1,  
  |
 +|   | COMPRESSED_SIGNED_RED_RGTC1
  |
 +
 ---
 +| VIEW_CLASS_RGTC2_RG   | COMPRESSED_RG_RGTC2,   
  |
 +|   | COMPRESSED_SIGNED_RG_RGTC2 
  |
 +
 ---
 +| VIEW_CLASS_BPTC_UNORM | COMPRESSED_RGBA_BPTC_UNORM,
  |
 +|   | COMPRESSED_SRGB_ALPHA_BPTC_UNORM   
  |
 +
 ---
 +| VIEW_CLASS_BPTC_FLOAT | COMPRESSED_RGB_BPTC_SIGNED_FLOAT,  
  |
 +|   | COMPRESSED_RGB_BPTC_UNSIGNED_FLOAT 
  |
 +
 ---
 + */
 +struct internal_format_class_info {
 +   GLenum view_class;
 +   GLenum internal_format;
 +};
 +#define INFO(c,f) {GL_##c, GL_##f}
 +static const struct internal_format_class_info 
 _compatible_internal_formats[] = {
 +   INFO(VIEW_CLASS_128_BITS, RGBA32F),
 +   INFO(VIEW_CLASS_128_BITS, RGBA32UI),
 +   INFO(VIEW_CLASS_128_BITS, RGBA32I),
 +   INFO(VIEW_CLASS_96_BITS, RGB32F),
 +   INFO(VIEW_CLASS_96_BITS, RGB32UI),
 +   INFO(VIEW_CLASS_96_BITS, RGB32I),
 +   INFO(VIEW_CLASS_64_BITS, RGBA16F),
 +   INFO(VIEW_CLASS_64_BITS, RG32F),
 +   INFO(VIEW_CLASS_64_BITS, RGBA16UI),
 +   INFO(VIEW_CLASS_64_BITS, RG32UI),
 +   INFO(VIEW_CLASS_64_BITS, RGBA16I),
 +   INFO(VIEW_CLASS_64_BITS, RG32I),
 +   INFO(VIEW_CLASS_64_BITS, RGBA16),
 +   INFO(VIEW_CLASS_64_BITS, RGBA16_SNORM),
 +   INFO(VIEW_CLASS_48_BITS, RGB16),
 +   INFO(VIEW_CLASS_48_BITS, RGB16_SNORM),
 +   INFO(VIEW_CLASS_48_BITS, RGB16F),
 +   INFO(VIEW_CLASS_48_BITS, RGB16UI),
 +   INFO(VIEW_CLASS_48_BITS, RGB16I),
 +   INFO(VIEW_CLASS_32_BITS, RG16F),
 +   INFO(VIEW_CLASS_32_BITS, R11F_G11F_B10F),
 +   INFO(VIEW_CLASS_32_BITS, R32F),

Re: [Mesa-dev] [PATCH] mesa: Remove the ralloc canary on release builds.

2013-11-22 Thread Eric Anholt

Kenneth Graunke kenn...@whitecape.org writes:

 On 11/22/2013 12:21 AM, Eric Anholt wrote:
 The canary is basically just to give a better debugging message when you
 ralloc_free() something that wasn't rallocated.  Reduces maximum memory
 usage of apitrace replay of the dota2 demo by 60MB on my 64-bit system (so
 half that on a real 32-bit dota2 environment).

 Really, half?  It's an unsigned...that's 4 bytes regardless of 64-bit
 vs. 32-bit.  I think this should be 60MB of savings, end of story.

Scalar types get aligned to their size, so since it's followed by a
pointer, there's 4 bytes of pad in between.

For anyone that hasn't seen this tool before, check out pahole from the
dwarves package.  Run it on a .o file you think might be sucking up a
bunch of memory, and see your structs like:

class fs_inst : public backend_instruction {
public:

/* class backend_instruction ancestor; */  /* 032 */

/* XXX last struct has 7 bytes of padding */

class fs_reg  dst;   /*3248 */
/* --- cacheline 1 boundary (64 bytes) was 16 bytes ago --- */
class fs_reg  src[3];/*80   144 */
/* --- cacheline 3 boundary (192 bytes) was 32 bytes ago --- */
bool   saturate; /*   224 1 */

/* XXX 3 bytes hole, try to pack */

intconditional_mod;  /*   228 4 */
uint8_tflag_subreg;  /*   232 1 */

/* XXX 3 bytes hole, try to pack */

intmlen; /*   236 4 */
intregs_written; /*   240 4 */
intbase_mrf; /*   244 4 */
uint32_t   texture_offset;   /*   248 4 */
intsampler;  /*   252 4 */
/* --- cacheline 4 boundary (256 bytes) --- */
inttarget;   /*   256 4 */
bool   eot;  /*   260 1 */
bool   header_present;   /*   261 1 */
bool   shadow_compare;   /*   262 1 */
bool   force_uncompressed;   /*   263 1 */
bool   force_sechalf;/*   264 1 */
bool   force_writemask_all;  /*   265 1 */

...

/* size: 288, cachelines: 5, members: 21 */
/* sum members: 280, holes: 3, sum holes: 8 */
/* paddings: 1, sum paddings: 7 */
/* last cacheline: 32 bytes */
};


pgp2KcnqfB6bf.pgp
Description: PGP signature
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH] dri3, i915, i965: Add __DRI_IMAGE_FOURCC_SARGB8888

2013-11-22 Thread Ville Syrjälä

On Fri, Nov 22, 2013 at 05:17:37PM +0100, Daniel Vetter wrote:
 On Fri, Nov 22, 2013 at 12:01 PM, Keith Packard kei...@keithp.com wrote:
  Daniel Vetter dan...@ffwll.ch writes:
 
  Hm, where do we have the canonical source for all these fourcc codes? I'm
  asking since we have our own copy in the kernel as drm_fourcc.h, and that
  one is part of the userspace ABI since we use it to pass around
  framebuffer formats and format lists.
 
  I think it's the kernel? I really don't know, as the whole notion of
  fourcc codes seems crazy to me...
 
  Feel free to steal this code and stick it in the kernel if you like.
 
 Well, I wasn't ever in favour of using fourcc codes since they're just
 not standardized at all, highly redundant in some cases and also miss
 lots of stuff we actually need (like all the rgb formats).

I also argued against them, but some people wanted them for whatever
reason. And since I didn't want to argue for several years about the
subject, I just gave in and made the drm pixel formats fourcss. But
given that I just pulled the fourccs out of my ass, we can't really
cross use them between different subsystems anyway. So if we just
consider all the different fourcc namespaces totally distinct, we're
not going to have any problems.

Personally I can promise that I will _not_ be checking Mesa/whatever
code for conflicting fourccs when I need to add a new one to drm_fourcc.h.
There, now I've given fair warning and if things explode later it won't be
my fault.

However if someone wants to emulate the drm fourcc style for whatever
reason, there is a distinct pattern how I cooked them up. Well, a few
different patterns depending whether it's RGB,YUV,packed,planar etc.

 
 Cc'ing the heck out of this to get kernel people to hopefully notice.
 Maybe someone takes charge of this ... Otherwise meh.
 
  Just afraid to create long-term maintainance madness here with the
  kernel's iron thou-shalt-not-break-userspace-ever rule ... Not likely
  we'll ever accept srgb for framebuffers though.
 
  Would suck to collide with something we do want though.
 
 Yeah, it'd suck. But given how fourcc works we probably have that
 already, just haven't noticed yet :(
 -Daniel
 -- 
 Daniel Vetter
 Software Engineer, Intel Corporation
 +41 (0) 79 365 57 48 - http://blog.ffwll.ch
 ___
 dri-devel mailing list
 dri-de...@lists.freedesktop.org
 http://lists.freedesktop.org/mailman/listinfo/dri-devel

-- 
Ville Syrjälä
Intel OTC
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 2/3] Create untiled buffers in get_back_bo when needed.

2013-11-22 Thread Marek Olšák

Commits should be prefixed with the name of the component they're
changing. This one should be egl: 

Marek

On Thu, Nov 7, 2013 at 5:13 PM, Axel Davy axel.d...@ens.fr wrote:
 We must send to the compositor buffer it is able to read.

 Since tiling modes are different on graphic cards, we have
 to disable tiling when creating the buffers if we render
 with a different graphic card than the compositor.

 Signed-off-by: Axel Davy axel.d...@ens.fr
 ---
  src/egl/drivers/dri2/platform_wayland.c | 17 +++--
  1 file changed, 11 insertions(+), 6 deletions(-)

 diff --git a/src/egl/drivers/dri2/platform_wayland.c 
 b/src/egl/drivers/dri2/platform_wayland.c
 index 709df36..b922ff5 100644
 --- a/src/egl/drivers/dri2/platform_wayland.c
 +++ b/src/egl/drivers/dri2/platform_wayland.c
 @@ -283,12 +283,17 @@ get_back_bo(struct dri2_egl_surface *dri2_surf, 
 __DRIbuffer *buffer)
 if (dri2_surf-back == NULL)
return -1;
 if (dri2_surf-back-dri_image == NULL) {
 -  dri2_surf-back-dri_image =
 - dri2_dpy-image-createImage(dri2_dpy-dri_screen,
 -  dri2_surf-base.Width,
 -  dri2_surf-base.Height,
 -  __DRI_IMAGE_FORMAT_ARGB,
 -  __DRI_IMAGE_USE_SHARE,
 +  unsigned int use_flags = __DRI_IMAGE_USE_SHARE;
 +
 +  if (!dri2_dpy-enable_tiling)
 + use_flags |= __DRI_IMAGE_USE_LINEAR;
 +
 +   dri2_surf-back-dri_image =
 +  dri2_dpy-image-createImage(dri2_dpy-dri_screen,
 +   dri2_surf-base.Width,
 +   dri2_surf-base.Height,
 +   __DRI_IMAGE_FORMAT_ARGB,
 +  use_flags,
NULL);
dri2_surf-back-age = 0;
 }
 --
 1.8.1.2

 ___
 mesa-dev mailing list
 mesa-dev@lists.freedesktop.org
 http://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 1/2] i965: Allocate vec4_visitor's uniform_size and uniform_vector_size arrays dynamically.

2013-11-22 Thread Francisco Jerez

Petri Latvala petri.latv...@intel.com writes:

[...]
 @@ -325,8 +326,9 @@ public:
  */
 dst_reg output_reg[BRW_VARYING_SLOT_COUNT];
 const char *output_reg_annotation[BRW_VARYING_SLOT_COUNT];
 -   int uniform_size[MAX_UNIFORMS];
 -   int uniform_vector_size[MAX_UNIFORMS];
 +   unsigned uniform_param_count;
 +   int* uniform_size;
 +   int* uniform_vector_size;
 int uniforms;
  
All the code around you uses the 'type *identifier' convention, please
keep it consistent.

 src_reg shader_start_time;
[...]
 @@ -556,7 +558,7 @@ brw_gs_emit(struct brw_context *brw,
 if (likely(!(INTEL_DEBUG  DEBUG_NO_DUAL_OBJECT_GS))) {
c-prog_data.dual_instanced_dispatch = false;
  
 -  vec4_gs_visitor v(brw, c, prog, shader, mem_ctx, true /* no_spills */);
 +  vec4_gs_visitor v(brw, c, prog, shader, mem_ctx, true /* no_spills */, 
 param_count);

Another possibility would be to set 'c.prog_data.base.nr_params =
param_count' here and in brw_vs_emit(), so you'd have the same value
available from the constructor of vec4_visitor without all the parameter
changes.

It would be possible to do something similar in the fs_visitor code, but
it seems that it would be slightly more difficult because the fs_visitor
(ab-)uses nr_params to keep track of the most recently allocated uniform
index, you'd need to add a private counter variable to the fs_visitor
similar to what vec4_visitor does.

With the upcoming ARB_shader_image_load_store changes we might get more
than 4 * MAX_UNIFORM uniform allocations in some cases because image
uniforms can take up several slots for the image meta-data -- I think it
would make sense to extend this change to the FS back-end too.

Thanks.

if (v.run()) {
   vec4_generator g(brw, prog, c-gp-program.Base, 
 c-prog_data.base,
mem_ctx, INTEL_DEBUG  DEBUG_GS);
[...]


pgpt3d5HdM9K6.pgp
Description: PGP signature
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] More projects for newbies

2013-11-22 Thread Ian Romanick

I've posted a wiki with some project suggestions for people wanting to
get into Mesa development:

http://wiki.freedesktop.org/dri/NewbieProjects/

These projects are a little bit larger than the one I previously posted
to the mailing list
(http://lists.freedesktop.org/archives/mesa-dev/2013-October/046374.html),
but I've tried to be very detailed.

I have two hopes for this wiki:

1. People already experienced with Mesa development will avoid these
projects. :)

2. Other people will add projects with similar scope and level of
detail.  For example, I think someone could probably add a project for
GL_ARB_buffer_storage and GL_ARB_clear_texture.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 1/2] i965: Allocate vec4_visitor's uniform_size and uniform_vector_size arrays dynamically.

2013-11-22 Thread Ian Romanick

On 11/22/2013 12:09 AM, Petri Latvala wrote:
 Signed-off-by: Petri Latvala petri.latv...@intel.com
 ---
  src/mesa/drivers/dri/i965/brw_vec4.cpp|  5 +++--
  src/mesa/drivers/dri/i965/brw_vec4.h  | 14 +++---
  src/mesa/drivers/dri/i965/brw_vec4_gs.c   |  2 +-
  src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp | 12 +++-
  src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.h   |  6 --
  src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp|  7 ++-
  src/mesa/drivers/dri/i965/brw_vec4_vs_visitor.cpp |  6 --
  src/mesa/drivers/dri/i965/brw_vs.c|  2 +-
  src/mesa/drivers/dri/i965/brw_vs.h|  6 --
  9 files changed, 41 insertions(+), 19 deletions(-)
 
 diff --git a/src/mesa/drivers/dri/i965/brw_vec4.cpp 
 b/src/mesa/drivers/dri/i965/brw_vec4.cpp
 index 73f91a0..3da1b4d 100644
 --- a/src/mesa/drivers/dri/i965/brw_vec4.cpp
 +++ b/src/mesa/drivers/dri/i965/brw_vec4.cpp
 @@ -1558,7 +1558,8 @@ brw_vs_emit(struct brw_context *brw,
  struct brw_vs_compile *c,
  struct brw_vs_prog_data *prog_data,
  void *mem_ctx,
 -unsigned *final_assembly_size)
 +unsigned *final_assembly_size,
 +unsigned param_count)
  {
 bool start_busy = false;
 float start_time = 0;
 @@ -1585,7 +1586,7 @@ brw_vs_emit(struct brw_context *brw,
}
 }
  
 -   vec4_vs_visitor v(brw, c, prog_data, prog, shader, mem_ctx);
 +   vec4_vs_visitor v(brw, c, prog_data, prog, shader, mem_ctx, param_count);
 if (!v.run()) {
if (prog) {
   prog-LinkStatus = false;
 diff --git a/src/mesa/drivers/dri/i965/brw_vec4.h 
 b/src/mesa/drivers/dri/i965/brw_vec4.h
 index 5cec9f9..552ca55 100644
 --- a/src/mesa/drivers/dri/i965/brw_vec4.h
 +++ b/src/mesa/drivers/dri/i965/brw_vec4.h
 @@ -231,7 +231,8 @@ public:
   struct brw_shader *shader,
   void *mem_ctx,
  bool debug_flag,
 -bool no_spills);
 +bool no_spills,
 +unsigned param_count);
 ~vec4_visitor();
  
 dst_reg dst_null_f()
 @@ -325,8 +326,9 @@ public:
  */
 dst_reg output_reg[BRW_VARYING_SLOT_COUNT];
 const char *output_reg_annotation[BRW_VARYING_SLOT_COUNT];
 -   int uniform_size[MAX_UNIFORMS];
 -   int uniform_vector_size[MAX_UNIFORMS];
 +   unsigned uniform_param_count;
 +   int* uniform_size;
 +   int* uniform_vector_size;
 int uniforms;
  
 src_reg shader_start_time;
 @@ -546,6 +548,12 @@ private:
  * If true, then register allocation should fail instead of spilling.
  */
 const bool no_spills;
 +
 +   /*
 +* Make noncopyable.
 +*/
 +   vec4_visitor(const vec4_visitor);
 +   vec4_visitor operator=(const vec4_visitor);

I think this should be a separate patch with it's own justification.
I'm also not sure it's strictly necessary.  I'm not a C++ expert (and
most people on the project aren't either), so bear with me a bit.  The
usual reason to make a class non-copyable is because the default
copy-constructor will only make a shallow copy of pointer fields.  This
can lead to either double-frees or dereferencing freed memory.  However,
since we're using ralloc, and the first construction of a vec4_visitor
object will out-live any possible copies, neither of these situations
can occur.  Is that a fair assessment?

Now... we probably should do this anyway... and there are a lot of
classes in the i965 back-end where this should occur.  I don't know if
we want to make a macro to generate this boiler-plate code or if we just
want to manually add it to every class.  Perhaps Ken, Paul, or Curro
will have an opinion...

  };
  
  
 diff --git a/src/mesa/drivers/dri/i965/brw_vec4_gs.c 
 b/src/mesa/drivers/dri/i965/brw_vec4_gs.c
 index b52d646..b0b77d1 100644
 --- a/src/mesa/drivers/dri/i965/brw_vec4_gs.c
 +++ b/src/mesa/drivers/dri/i965/brw_vec4_gs.c
 @@ -213,7 +213,7 @@ do_gs_prog(struct brw_context *brw,
 void *mem_ctx = ralloc_context(NULL);
 unsigned program_size;
 const unsigned *program =
 -  brw_gs_emit(brw, prog, c, mem_ctx, program_size);
 +  brw_gs_emit(brw, prog, c, mem_ctx, program_size, param_count);
 if (program == NULL) {
ralloc_free(mem_ctx);
return false;
 diff --git a/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp 
 b/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp
 index adbb1cf..16c05f5 100644
 --- a/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp
 +++ b/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp
 @@ -38,10 +38,11 @@ vec4_gs_visitor::vec4_gs_visitor(struct brw_context *brw,
   struct gl_shader_program *prog,
   struct brw_shader *shader,
   void *mem_ctx,
 - bool no_spills)
 + bool no_spills,
 + unsigned param_count)
 :

Re: [Mesa-dev] [PATCH 1/2] i965: Allocate vec4_visitor's uniform_size and uniform_vector_size arrays dynamically.

2013-11-22 Thread Ian Romanick

-BEGIN PGP SIGNED MESSAGE-
Hash: SHA1

On 11/22/2013 10:05 AM, Francisco Jerez wrote:
 Petri Latvala petri.latv...@intel.com writes:
 
 [...] @@ -325,8 +326,9 @@ public: */ dst_reg
 output_reg[BRW_VARYING_SLOT_COUNT]; const char
 *output_reg_annotation[BRW_VARYING_SLOT_COUNT]; -   int
 uniform_size[MAX_UNIFORMS]; -   int
 uniform_vector_size[MAX_UNIFORMS]; +   unsigned
 uniform_param_count; +   int* uniform_size; +   int*
 uniform_vector_size; int uniforms;
 
 All the code around you uses the 'type *identifier' convention,
 please keep it consistent.
 
 src_reg shader_start_time; [...] @@ -556,7 +558,7 @@
 brw_gs_emit(struct brw_context *brw, if (likely(!(INTEL_DEBUG 
 DEBUG_NO_DUAL_OBJECT_GS))) { c-prog_data.dual_instanced_dispatch
 = false;
 
 -  vec4_gs_visitor v(brw, c, prog, shader, mem_ctx, true /*
 no_spills */); +  vec4_gs_visitor v(brw, c, prog, shader,
 mem_ctx, true /* no_spills */, param_count);
 
 Another possibility would be to set 'c.prog_data.base.nr_params = 
 param_count' here and in brw_vs_emit(), so you'd have the same
 value available from the constructor of vec4_visitor without all
 the parameter changes.
 
 It would be possible to do something similar in the fs_visitor
 code, but it seems that it would be slightly more difficult because
 the fs_visitor (ab-)uses nr_params to keep track of the most
 recently allocated uniform index, you'd need to add a private
 counter variable to the fs_visitor similar to what vec4_visitor
 does.
 
 With the upcoming ARB_shader_image_load_store changes we might get
 more than 4 * MAX_UNIFORM uniform allocations in some cases because
 image uniforms can take up several slots for the image meta-data --
 I think it would make sense to extend this change to the FS
 back-end too.

Do you think that should get done now or as a follow-on patch?  This
fixes an existing problem, and I think we want a fix that can get
picked to the 10.0 branch and possibly earlier stable branches.

 Thanks.
 
 if (v.run()) { vec4_generator g(brw, prog, c-gp-program.Base,
 c-prog_data.base, mem_ctx, INTEL_DEBUG  DEBUG_GS); [...] 
 ___ mesa-dev mailing
 list mesa-dev@lists.freedesktop.org 
 http://lists.freedesktop.org/mailman/listinfo/mesa-dev

-BEGIN PGP SIGNATURE-
Version: GnuPG v1.4.14 (GNU/Linux)

iEYEARECAAYFAlKPr+0ACgkQX1gOwKyEAw9vzQCdE0DAxuZF85y7wFtw1Ypg+ejo
Ft4An3msa4mkI4HU/39n9vlXoY7LrEJ2
=sUh4
-END PGP SIGNATURE-
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 1/2] i965: Allocate vec4_visitor's uniform_size and uniform_vector_size arrays dynamically.

2013-11-22 Thread Francisco Jerez

Ian Romanick i...@freedesktop.org writes:

 -BEGIN PGP SIGNED MESSAGE-
 Hash: SHA1

 On 11/22/2013 10:05 AM, Francisco Jerez wrote:
[...]
 With the upcoming ARB_shader_image_load_store changes we might get
 more than 4 * MAX_UNIFORM uniform allocations in some cases because
 image uniforms can take up several slots for the image meta-data --
 I think it would make sense to extend this change to the FS
 back-end too.

 Do you think that should get done now or as a follow-on patch?  This
 fixes an existing problem, and I think we want a fix that can get
 picked to the 10.0 branch and possibly earlier stable branches.


I'm fine either way.  If you'd like to keep the bugfix as simple as
possible for the stable branch I can take care of the fs_visitor change
myself as a separate patch.


pgpzvRoVmbRBv.pgp
Description: PGP signature
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 1/2] i965: Allocate vec4_visitor's uniform_size and uniform_vector_size arrays dynamically.

2013-11-22 Thread Kenneth Graunke

On 11/22/2013 11:27 AM, Ian Romanick wrote:
 On 11/22/2013 12:09 AM, Petri Latvala wrote:
 Signed-off-by: Petri Latvala petri.latv...@intel.com
 ---
  src/mesa/drivers/dri/i965/brw_vec4.cpp|  5 +++--
  src/mesa/drivers/dri/i965/brw_vec4.h  | 14 +++---
  src/mesa/drivers/dri/i965/brw_vec4_gs.c   |  2 +-
  src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp | 12 +++-
  src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.h   |  6 --
  src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp|  7 ++-
  src/mesa/drivers/dri/i965/brw_vec4_vs_visitor.cpp |  6 --
  src/mesa/drivers/dri/i965/brw_vs.c|  2 +-
  src/mesa/drivers/dri/i965/brw_vs.h|  6 --
  9 files changed, 41 insertions(+), 19 deletions(-)

 diff --git a/src/mesa/drivers/dri/i965/brw_vec4.cpp 
 b/src/mesa/drivers/dri/i965/brw_vec4.cpp
 index 73f91a0..3da1b4d 100644
 --- a/src/mesa/drivers/dri/i965/brw_vec4.cpp
 +++ b/src/mesa/drivers/dri/i965/brw_vec4.cpp
 @@ -1558,7 +1558,8 @@ brw_vs_emit(struct brw_context *brw,
  struct brw_vs_compile *c,
  struct brw_vs_prog_data *prog_data,
  void *mem_ctx,
 -unsigned *final_assembly_size)
 +unsigned *final_assembly_size,
 +unsigned param_count)
  {
 bool start_busy = false;
 float start_time = 0;
 @@ -1585,7 +1586,7 @@ brw_vs_emit(struct brw_context *brw,
}
 }
  
 -   vec4_vs_visitor v(brw, c, prog_data, prog, shader, mem_ctx);
 +   vec4_vs_visitor v(brw, c, prog_data, prog, shader, mem_ctx, param_count);
 if (!v.run()) {
if (prog) {
   prog-LinkStatus = false;
 diff --git a/src/mesa/drivers/dri/i965/brw_vec4.h 
 b/src/mesa/drivers/dri/i965/brw_vec4.h
 index 5cec9f9..552ca55 100644
 --- a/src/mesa/drivers/dri/i965/brw_vec4.h
 +++ b/src/mesa/drivers/dri/i965/brw_vec4.h
 @@ -231,7 +231,8 @@ public:
  struct brw_shader *shader,
  void *mem_ctx,
  bool debug_flag,
 -bool no_spills);
 +bool no_spills,
 +unsigned param_count);
 ~vec4_visitor();
  
 dst_reg dst_null_f()
 @@ -325,8 +326,9 @@ public:
  */
 dst_reg output_reg[BRW_VARYING_SLOT_COUNT];
 const char *output_reg_annotation[BRW_VARYING_SLOT_COUNT];
 -   int uniform_size[MAX_UNIFORMS];
 -   int uniform_vector_size[MAX_UNIFORMS];
 +   unsigned uniform_param_count;
 +   int* uniform_size;
 +   int* uniform_vector_size;
 int uniforms;
  
 src_reg shader_start_time;
 @@ -546,6 +548,12 @@ private:
  * If true, then register allocation should fail instead of spilling.
  */
 const bool no_spills;
 +
 +   /*
 +* Make noncopyable.
 +*/
 +   vec4_visitor(const vec4_visitor);
 +   vec4_visitor operator=(const vec4_visitor);
 
 I think this should be a separate patch with it's own justification.
 I'm also not sure it's strictly necessary.  I'm not a C++ expert (and
 most people on the project aren't either), so bear with me a bit.  The
 usual reason to make a class non-copyable is because the default
 copy-constructor will only make a shallow copy of pointer fields.  This
 can lead to either double-frees or dereferencing freed memory.  However,
 since we're using ralloc, and the first construction of a vec4_visitor
 object will out-live any possible copies, neither of these situations
 can occur.  Is that a fair assessment?
 
 Now... we probably should do this anyway... and there are a lot of
 classes in the i965 back-end where this should occur.  I don't know if
 we want to make a macro to generate this boiler-plate code or if we just
 want to manually add it to every class.  Perhaps Ken, Paul, or Curro
 will have an opinion...

Or you could just be sensible and just not make copies of things where
it makes no sense to do so...

I would drop this hunk...it's unrelated and unnecessary.

--Ken
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 1/2] i965: Allocate vec4_visitor's uniform_size and uniform_vector_size arrays dynamically.

2013-11-22 Thread Francisco Jerez

Ian Romanick i...@freedesktop.org writes:

 On 11/22/2013 12:09 AM, Petri Latvala wrote:
[...]
 @@ -546,6 +548,12 @@ private:
  * If true, then register allocation should fail instead of spilling.
  */
 const bool no_spills;
 +
 +   /*
 +* Make noncopyable.
 +*/
 +   vec4_visitor(const vec4_visitor);
 +   vec4_visitor operator=(const vec4_visitor);

 I think this should be a separate patch with it's own justification.
 I'm also not sure it's strictly necessary.  I'm not a C++ expert (and
 most people on the project aren't either), so bear with me a bit.  The
 usual reason to make a class non-copyable is because the default
 copy-constructor will only make a shallow copy of pointer fields.  This
 can lead to either double-frees or dereferencing freed memory.  However,
 since we're using ralloc, and the first construction of a vec4_visitor
 object will out-live any possible copies, neither of these situations
 can occur.  Is that a fair assessment?

 Now... we probably should do this anyway... and there are a lot of
 classes in the i965 back-end where this should occur.  I don't know if
 we want to make a macro to generate this boiler-plate code or if we just
 want to manually add it to every class.  Perhaps Ken, Paul, or Curro
 will have an opinion...


This hunk looks perfectly fine to me (well, aside from the -placement).
All singleton classes should have its copy-constructor declared private
to avoid the situations you have described, and adding a member variable
that needs special handling on copy construction seems like the right
time to do so.  I'm OK with using the private copy-constructor and
assignment operator idiom explicitly as in Petri's code or with having a
macro for the same purpose to save typing -- A third possibility that I
find slightly more elegant would be to define a noncopyable class all
singleton objects could inherit from, like:

| class noncopyable {
|private:
|   noncopyable(noncopyable );
|   noncopyable operator=(const noncopyable );
| };

Because macros that declare class members and need to use access
specifiers can have unexpected side-effects.

Thanks.


pgpU793roSig1.pgp
Description: PGP signature
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 1/2] i965: Allocate vec4_visitor's uniform_size and uniform_vector_size arrays dynamically.

2013-11-22 Thread Francisco Jerez

Kenneth Graunke kenn...@whitecape.org writes:

 On 11/22/2013 11:27 AM, Ian Romanick wrote:
 On 11/22/2013 12:09 AM, Petri Latvala wrote:
[...]
 @@ -546,6 +548,12 @@ private:
  * If true, then register allocation should fail instead of spilling.
  */
 const bool no_spills;
 +
 +   /*
 +* Make noncopyable.
 +*/
 +   vec4_visitor(const vec4_visitor);
 +   vec4_visitor operator=(const vec4_visitor);
[...]
 Or you could just be sensible and just not make copies of things where
 it makes no sense to do so...

 I would drop this hunk...it's unrelated and unnecessary.


It makes it less likely to make mistakes, doesn't it?  E.g. in cases
where you pass an argument by value where you really meant to pass it by
reference.  I think our code would benefit from doing this consistently,
not only because it tells the compiler which classes it makes sense to
copy and which ones don't, but also because it makes the semantics of
the class clearer to developers.


pgplF1DNM67jH.pgp
Description: PGP signature
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 1/2] i965: Allocate vec4_visitor's uniform_size and uniform_vector_size arrays dynamically.

2013-11-22 Thread Kenneth Graunke

On 11/22/2013 12:23 PM, Francisco Jerez wrote:
 Kenneth Graunke kenn...@whitecape.org writes:
 
 On 11/22/2013 11:27 AM, Ian Romanick wrote:
 On 11/22/2013 12:09 AM, Petri Latvala wrote:
 [...]
 @@ -546,6 +548,12 @@ private:
  * If true, then register allocation should fail instead of spilling.
  */
 const bool no_spills;
 +
 +   /*
 +* Make noncopyable.
 +*/
 +   vec4_visitor(const vec4_visitor);
 +   vec4_visitor operator=(const vec4_visitor);
 [...]
 Or you could just be sensible and just not make copies of things where
 it makes no sense to do so...

 I would drop this hunk...it's unrelated and unnecessary.

 
 It makes it less likely to make mistakes, doesn't it?  E.g. in cases
 where you pass an argument by value where you really meant to pass it by
 reference.  I think our code would benefit from doing this consistently,
 not only because it tells the compiler which classes it makes sense to
 copy and which ones don't, but also because it makes the semantics of
 the class clearer to developers.

Ah, I see...the point is that you might accidentally do:

void foo(vec4_visitor v, int bar);

and end up copying the world, when you meant to pass by reference.  I
guess that's useful.  I probably wouldn't NAK that.

I was thinking you were trying to prevent things like:

vec4_visitor v2(v);
vec4_visitor v3 = v;

which suggests a fundamental misunderstanding of the code which a
compiler error is unlikely to help you overcome.

At any rate, it's really a separate change, so it should be a separate
patch (as Ian mentioned).  Each patch should ideally only make one
specific change.  Smaller patches are almost always better.

--Ken
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 1/2] mesa: Add gl_formats to cover all GLUser provided format/type combinations

2013-11-22 Thread Marek Olšák

On Fri, Nov 15, 2013 at 9:58 PM, Mark Mueller markkmuel...@gmail.com wrote:



 On Fri, Nov 15, 2013 at 9:52 AM, Marek Olšák mar...@gmail.com wrote:

 I don't understand this and I don't think this is the right way to
 implement hw-accelerated TexImage. Some of the formats are unsupported
 by all hardware I know, others just don't make any sense (e.g.
 RGBA5999_REV).


 Please check out st_choose_matching_format. This is what the function
 comment says:
 /**
  * Given an OpenGL user-requested format and type, and swapBytes state,
  * return the format which exactly matches those parameters, so that
  * a memcpy-based transfer can be done.
  *
  * If no format is supported, return PIPE_FORMAT_NONE.
  */

 My patch allows for similar functionality without using Gallium
 PIPE_FORMATs, which aren't supported in the i965 driver. RGPA5999_REV is
 there because it is used by a the glean test case pixelFormats. Having
 hardware support for the added formats is not relevant.

Why is it not relevant? If there is no hardware support, adding those
formats is a waste of time. Will the formats be unpacked by shaders or
what? The MESA_* formats have only one purpose: to be used as OpenGL
textures and renderbuffers.

Marek
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 15/18] mesa: Add ARB_viewport_array plumbing

2013-11-22 Thread Courtney Goeltzenleuchter

Hi Chris,

I'm using this version of the spec:
http://www.opengl.org/registry/specs/ARB/viewport_array.txt

On Thu, Nov 21, 2013 at 4:41 PM, Chris Forbes chr...@ijw.co.nz wrote:

 I was just comparing to the list in the ARB_viewport_array spec.


 On Fri, Nov 22, 2013 at 11:33 AM, Courtney Goeltzenleuchter 
 court...@lunarg.com wrote:

 Hi Chris,

 Where are you getting your defines?
 I copied them from include/GL/gl.h
 #define GL_VIEWPORT 0x0BA2
 /* Scissor box */
 #define GL_SCISSOR_BOX 0x0C10
 #define GL_SCISSOR_TEST 0x0C11
 #define GL_SCISSOR_TEST 0x0C11
 #define GL_DEPTH_RANGE 0x0B70

 Ah, FIRST_VERTEX looks different.
 #define GL_FIRST_VERTEX_CONVENTION0x8E4D

 I'll add PROVOKING_VERTEX

 Looks like UNDEFINED_VERTEX was wrong as well.
 (include/GL/glext.h) #define GL_UNDEFINED_VERTEX   0x8260

  I was modelling one of the other extension xml files and they had
 similar defines, though I could see no effect including or excluding them.

 Should I just get rid of the definitions for values that already exist in
 gl.h or glext.h?

 Courtney


 On Thu, Nov 21, 2013 at 1:00 PM, Chris Forbes chr...@ijw.co.nz wrote:

 I'm surprised the build system accepts the conflicting second definition
 of SCISSOR_BOX at all, actually -- that's weird.


 On Fri, Nov 22, 2013 at 8:55 AM, Chris Forbes chr...@ijw.co.nz wrote:

 I mean some of the values don't match the spec :)


 On Fri, Nov 22, 2013 at 7:52 AM, Courtney Goeltzenleuchter 
 court...@lunarg.com wrote:



 On Wed, Nov 20, 2013 at 7:28 PM, Chris Forbes chr...@ijw.co.nzwrote:

 Oops -- the 8E4E is obviously correct. Artifact of me switching how I
 was commenting halfway through.

 On Thu, Nov 21, 2013 at 3:25 PM, Chris Forbes chr...@ijw.co.nz
 wrote:
  These are bogus:
 
  +enum name=SCISSOR_BOX value=0x0C10/
  +enum name=VIEWPORT value=0x0BA2/
  +enum name=DEPTH_RANGE value=0x0B70/
  +enum name=SCISSOR_TEST value=0x0C11/
  +enum name=FIRST_VERTEX_CONVENTION value=0x0C10/


In the spec I'm using I see:

New Tokens

Accepted by the pname parameter of GetBooleanv, GetIntegerv, GetFloatv,
GetDoublev and GetInteger64v:

MAX_VIEWPORTS   0x825B
VIEWPORT_SUBPIXEL_BITS  0x825C
VIEWPORT_BOUNDS_RANGE   0x825D
LAYER_PROVOKING_VERTEX  0x825E
VIEWPORT_INDEX_PROVOKING_VERTEX 0x825F

Accepted by the pname parameter of GetIntegeri_v:

*SCISSOR_BOX 0x0C10*

Accepted by the pname parameter of GetFloati_v:

*VIEWPORT0x0BA2*

Accepted by the pname parameter of GetDoublei_v:

*DEPTH_RANGE 0x0B70*

Accepted by the pname parameter of Enablei, Disablei, and IsEnabledi:

*SCISSOR_TEST0x0C11*

Thus my confusion regarding bogus values.

Returned in the data parameter from a Get query with a pname of
LAYER_PROVOKING_VERTEX or VIEWPORT_INDEX_PROVOKING_VERTEX:

FIRST_VERTEX_CONVENTION 0x8E4D
LAST_VERTEX_CONVENTION  0x8E4E
PROVOKING_VERTEX0x8E4F
UNDEFINED_VERTEX0x8260





 What do you mean by bogus?
 I was emulating other extension xml files. Are these not needed
 because they are already defined in gl_ext.h?


 
  0x8E4D
 
  +enum name=LAST_VERTEX_CONVENTION value=0x8E4E/
 
  0x8E4E
 
  add: enum name=PROVOKING_VERTEX value=0x8E4F/
 
  +enum name=UNDEFINED_VERTEX value=0x8E4F/
 
  0x8260




 --
 Courtney Goeltzenleuchter
 LunarG






 --
 Courtney Goeltzenleuchter
 LunarG





-- 
Courtney Goeltzenleuchter
LunarG
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 1/9] gallium/dri: Support DRI Image extension version 6

2013-11-22 Thread Axel Davy


+ void *loaderPrivate)
+{
+   __DRIimage *img;
+   struct gl_context *ctx = ((struct st_context *)dri_context(context))-ctx;
+   struct gl_texture_object *obj;
+   struct pipe_resource *tex;
+   GLuint face = 0;


This part doesn't work.
A dri_context can be converted to a st_context.

To get the pipe_resource, we should use the st_context_iface in dri_context, 
and the function get_resource_for_egl_image.

Unfortunately, this function isn't implemented in mesa.

I've found this three years old patch, which implements it: 
http://marc.info/?l=mesa3d-devm=128431813411436
The get_resource_for_egl_image we need is at the end of the patch, and the 
implementation look correct to me.

Could a state_tracker expert review it and push the part of the patch we need?


+   img-dri_format = driGLFormatToImageFormat(obj-_BufferObjectFormat);


This part doesn't work either: it doesn't give the correct format.


Getting this function right would really help to write glamor dri3 helpers

Axel Davy


___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 8/9] gallium/radeon: Implement hooks for DRI Image 7

2013-11-22 Thread Axel Davy


+size = lseek(whandle-handle, SEEK_END, 0);
+/*
+ * Could check errno to determine whether the kernel is new enough, but
+ * it doesn't really matter why this failed, just that it failed.
+ */
+if (size == (off_t)-1) {
+FREE(bo);
+goto fail;
+}
+lseek(whandle-handle, SEEK_SET, 0);



The lseek arguments are not in the right order



+args.handle = bo-handle;
+r = drmCommandWriteRead(bo-rws-fd, DRM_RADEON_GEM_BUSY, args, 
sizeof(args));
+if (r) {
+fprintf(stderr, radeon: Failed to find initial domain for imported 
bo\n);
+radeon_bo_destroy(bo-base);
+return NULL;
+}


the ioctl should be called again until it doesn't return -EBUSY.


Axel Davy

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 1/9] gallium/dri: Support DRI Image extension version 6

2013-11-22 Thread Axel Davy


A dri_context can be converted to a st_context.

Sorry for the typo. It should be A dri_context can't be converted to a 
st_context.


Axel Davy
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 1/2] glsl: Fix lowering of direct assignment in lower_clip_distance.

2013-11-22 Thread Paul Berry

In commit 065da16 (glsl: Convert lower_clip_distance_visitor to be an
ir_rvalue_visitor), we failed to notice that since
lower_clip_distance_visitor overrides visit_leave(ir_assignment *),
ir_rvalue_visitor::visit_leave(ir_assignment *) wasn't getting called.
As a result, clip distance dereferences appearing directly on the
right hand side of an assignment (not in a subexpression) weren't
getting properly lowered.  This caused an ir_dereference_variable node
to be left in the IR that referred to the old gl_ClipDistance
variable.  However, since the lowering pass replaces gl_ClipDistance
with gl_ClipDistanceMESA, this turned into a dangling pointer when the
IR got reparented.

Prior to the introduction of geometry shaders, this bug was unlikely
to arise, because (a) reading from gl_ClipDistance[i] in the fragment
shader was rare, and (b) when it happened, it was likely that it would
either appear in a subexpression, or be hoisted into a subexpression
by tree grafting.

However, in a geometry shader, we're likely to see a statement like
this, which would trigger the bug:

gl_ClipDistance[i] = gl_in[j].gl_ClipDistance[i];

This patch causes
lower_clip_distance_visitor::visit_leave(ir_assignment *) to call the
base class visitor, so that the right hand side of the assignment is
properly lowered.

Fixes piglit test:
- spec/glsl-1.50/execution/geometry/clip-distance-itemized-copy

Cc: Ian Romanick i...@freedesktop.org
Cc: 9.2 mesa-sta...@lists.freedesktop.org
Cc: 10.0 mesa-sta...@lists.freedesktop.org
---
 src/glsl/lower_clip_distance.cpp | 5 +
 1 file changed, 5 insertions(+)

diff --git a/src/glsl/lower_clip_distance.cpp b/src/glsl/lower_clip_distance.cpp
index 682c8fd..04fa6d4 100644
--- a/src/glsl/lower_clip_distance.cpp
+++ b/src/glsl/lower_clip_distance.cpp
@@ -381,6 +381,11 @@ lower_clip_distance_visitor::fix_lhs(ir_assignment *ir)
 ir_visitor_status
 lower_clip_distance_visitor::visit_leave(ir_assignment *ir)
 {
+   /* First invoke the base class visitor.  This causes handle_rvalue() to be
+* called on ir-rhs and ir-condition.
+*/
+   ir_rvalue_visitor::visit_leave(ir);
+
if (this-is_clip_distance_vec8(ir-lhs) ||
this-is_clip_distance_vec8(ir-rhs)) {
   /* LHS or RHS of the assignment is the entire 1D gl_ClipDistance array
-- 
1.8.4.2

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 2/2] glsl/linker: Validate IR just before reparenting.

2013-11-22 Thread Paul Berry

If reparent_ir() is called on invalid IR, then there's a danger that
it will fail to reparent all of the necessary nodes.  For example, if
the IR contains an ir_dereference_variable which refers to an
ir_variable that's not in the tree, that ir_variable won't get
reparented, resulting in subtle use-after-free bugs once the
non-reparented nodes are freed.  (This is exactly what happened in the
bug fixed by the previous commit).

This patch makes this kind of bug far easier to track down, by
transforming it from a use-after-free bug into an explicit IR
validation error.
---
 src/glsl/linker.cpp | 5 +
 1 file changed, 5 insertions(+)

diff --git a/src/glsl/linker.cpp b/src/glsl/linker.cpp
index fac186a..1366077 100644
--- a/src/glsl/linker.cpp
+++ b/src/glsl/linker.cpp
@@ -2396,6 +2396,11 @@ done:
   if (prog-_LinkedShaders[i] == NULL)
 continue;
 
+  /* Do a final validation step to make sure that the IR wasn't
+   * invalidated by any modifications performed after intrastage linking.
+   */
+  validate_ir_tree(prog-_LinkedShaders[i]-ir);
+
   /* Retain any live IR, but trash the rest. */
   reparent_ir(prog-_LinkedShaders[i]-ir, prog-_LinkedShaders[i]-ir);
 
-- 
1.8.4.2

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 1/2] mesa: Add gl_formats to cover all GLUser provided format/type combinations

2013-11-22 Thread Mark Mueller

On Fri, Nov 22, 2013 at 12:36 PM, Marek Olšák mar...@gmail.com wrote:

 On Fri, Nov 15, 2013 at 9:58 PM, Mark Mueller markkmuel...@gmail.com
 wrote:
 
 
 
  On Fri, Nov 15, 2013 at 9:52 AM, Marek Olšák mar...@gmail.com wrote:
 
  I don't understand this and I don't think this is the right way to
  implement hw-accelerated TexImage. Some of the formats are unsupported
  by all hardware I know, others just don't make any sense (e.g.
  RGBA5999_REV).
 
 
  Please check out st_choose_matching_format. This is what the function
  comment says:
  /**
   * Given an OpenGL user-requested format and type, and swapBytes state,
   * return the format which exactly matches those parameters, so that
   * a memcpy-based transfer can be done.
   *
   * If no format is supported, return PIPE_FORMAT_NONE.
   */
 
  My patch allows for similar functionality without using Gallium
  PIPE_FORMATs, which aren't supported in the i965 driver. RGPA5999_REV is
  there because it is used by a the glean test case pixelFormats. Having
  hardware support for the added formats is not relevant.

 Why is it not relevant? If there is no hardware support, adding those
 formats is a waste of time. Will the formats be unpacked by shaders or
 what? The MESA_* formats have only one purpose: to be used as OpenGL
 textures and renderbuffers.

 Yes, that's the nature of GPU based texture uploading. The driver
generates a shader and the GPU does the necessary unpacking from a cached
memcpy of the application texture and stuffs it into the final mip tree in
its target tiled format. In order to do that, the driver must know the
original format of the texture, which can't be done from the current
choices of gl_formats, thus I'm adding more. Recently I've made some local
improvements so the i965 driver that can produce more information from
fewer gl_formats, thus I'll be posting an updated patch soon.

If there's any question of the merits, here's an example - let's say the
application uploads a GL_RGB format and asks for a GL_UNSIGNED_SHORT_5_6_5
internal format. Tests on Ivy Bridge show that when i965 uses the CPU to
upload a 256 x 256 texture it does so at ~28 - 77 MB/sec, whereas when i965
uses the GPU, the same texture uploads at ~2400 MB/sec - including memcpy
and all. Even the most basic cases with no repacking still show a 2x - 3x
improvement because the GPU inherently handles tiling much faster. In
general the overhead of the memcpy is easily absorbed by the fact that the
graphics pipe doesn't have to stall while the CPU does the unpacking,
tiling, and repacking in the single driver thread.

Mark
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 15/18] mesa: Add ARB_viewport_array plumbing

2013-11-22 Thread Chris Forbes

It's just that last block that were messed up -- rest was context.

Sorry for any confusion.


On Sat, Nov 23, 2013 at 10:06 AM, Courtney Goeltzenleuchter 
court...@lunarg.com wrote:

 Hi Chris,

 I'm using this version of the spec:
 http://www.opengl.org/registry/specs/ARB/viewport_array.txt

 On Thu, Nov 21, 2013 at 4:41 PM, Chris Forbes chr...@ijw.co.nz wrote:

 I was just comparing to the list in the ARB_viewport_array spec.


 On Fri, Nov 22, 2013 at 11:33 AM, Courtney Goeltzenleuchter 
 court...@lunarg.com wrote:

 Hi Chris,

 Where are you getting your defines?
 I copied them from include/GL/gl.h
 #define GL_VIEWPORT 0x0BA2
 /* Scissor box */
 #define GL_SCISSOR_BOX 0x0C10
 #define GL_SCISSOR_TEST 0x0C11
 #define GL_SCISSOR_TEST 0x0C11
 #define GL_DEPTH_RANGE 0x0B70

 Ah, FIRST_VERTEX looks different.
 #define GL_FIRST_VERTEX_CONVENTION0x8E4D

 I'll add PROVOKING_VERTEX

 Looks like UNDEFINED_VERTEX was wrong as well.
 (include/GL/glext.h) #define GL_UNDEFINED_VERTEX   0x8260

  I was modelling one of the other extension xml files and they had
 similar defines, though I could see no effect including or excluding them.

 Should I just get rid of the definitions for values that already exist
 in gl.h or glext.h?

 Courtney


 On Thu, Nov 21, 2013 at 1:00 PM, Chris Forbes chr...@ijw.co.nz wrote:

 I'm surprised the build system accepts the conflicting second
 definition of SCISSOR_BOX at all, actually -- that's weird.


 On Fri, Nov 22, 2013 at 8:55 AM, Chris Forbes chr...@ijw.co.nz wrote:

 I mean some of the values don't match the spec :)


 On Fri, Nov 22, 2013 at 7:52 AM, Courtney Goeltzenleuchter 
 court...@lunarg.com wrote:



 On Wed, Nov 20, 2013 at 7:28 PM, Chris Forbes chr...@ijw.co.nzwrote:

 Oops -- the 8E4E is obviously correct. Artifact of me switching how I
 was commenting halfway through.

 On Thu, Nov 21, 2013 at 3:25 PM, Chris Forbes chr...@ijw.co.nz
 wrote:
  These are bogus:
 
  +enum name=SCISSOR_BOX value=0x0C10/
  +enum name=VIEWPORT value=0x0BA2/
  +enum name=DEPTH_RANGE value=0x0B70/
  +enum name=SCISSOR_TEST value=0x0C11/
  +enum name=FIRST_VERTEX_CONVENTION value=0x0C10/


 In the spec I'm using I see:

 New Tokens

 Accepted by the pname parameter of GetBooleanv, GetIntegerv, GetFloatv,
 GetDoublev and GetInteger64v:

 MAX_VIEWPORTS   0x825B
 VIEWPORT_SUBPIXEL_BITS  0x825C
 VIEWPORT_BOUNDS_RANGE   0x825D
 LAYER_PROVOKING_VERTEX  0x825E
 VIEWPORT_INDEX_PROVOKING_VERTEX 0x825F

 Accepted by the pname parameter of GetIntegeri_v:

 *SCISSOR_BOX 0x0C10*

 Accepted by the pname parameter of GetFloati_v:

 *VIEWPORT0x0BA2*

 Accepted by the pname parameter of GetDoublei_v:

 *DEPTH_RANGE 0x0B70*

 Accepted by the pname parameter of Enablei, Disablei, and IsEnabledi:

 *SCISSOR_TEST0x0C11*

 Thus my confusion regarding bogus values.

 Returned in the data parameter from a Get query with a pname of
 LAYER_PROVOKING_VERTEX or VIEWPORT_INDEX_PROVOKING_VERTEX:

 FIRST_VERTEX_CONVENTION 0x8E4D
 LAST_VERTEX_CONVENTION  0x8E4E
 PROVOKING_VERTEX0x8E4F
 UNDEFINED_VERTEX0x8260





 What do you mean by bogus?
 I was emulating other extension xml files. Are these not needed
 because they are already defined in gl_ext.h?


 
  0x8E4D
 
  +enum name=LAST_VERTEX_CONVENTION value=0x8E4E/
 
  0x8E4E
 
  add: enum name=PROVOKING_VERTEX value=0x8E4F/
 
  +enum name=UNDEFINED_VERTEX value=0x8E4F/
 
  0x8260




 --
 Courtney Goeltzenleuchter
 LunarG






 --
 Courtney Goeltzenleuchter
 LunarG





 --
 Courtney Goeltzenleuchter
 LunarG


___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [Intel-gfx] [PATCH] dri3, i915, i965: Add __DRI_IMAGE_FOURCC_SARGB8888

2013-11-22 Thread Kristian Høgsberg

On Fri, Nov 22, 2013 at 05:17:37PM +0100, Daniel Vetter wrote:
 On Fri, Nov 22, 2013 at 12:01 PM, Keith Packard kei...@keithp.com wrote:
  Daniel Vetter dan...@ffwll.ch writes:
 
  Hm, where do we have the canonical source for all these fourcc codes? I'm
  asking since we have our own copy in the kernel as drm_fourcc.h, and that
  one is part of the userspace ABI since we use it to pass around
  framebuffer formats and format lists.
 
  I think it's the kernel? I really don't know, as the whole notion of
  fourcc codes seems crazy to me...
 
  Feel free to steal this code and stick it in the kernel if you like.
 
 Well, I wasn't ever in favour of using fourcc codes since they're just
 not standardized at all, highly redundant in some cases and also miss
 lots of stuff we actually need (like all the rgb formats).

These drm codes are not fourcc codes in any other way than that
they're defined by creating a 32 bit value by picking four characters.
I don't know what PTSD triggers people have from hearing fourcc, but
it seems severe.  Forget all that, these codes are DRM specific
defines that are not inteded to match anything anybody else does.  It
doesn't matter if these match of conflict with v4l, fourcc.org,
wikipedia.org or what the amiga did.  They're just tokens that let us
define succintly what the pixel format of a kms framebuffer is and
tell the kernel.

I don't know what else you'd propose?  Pass an X visual in the ioctl?
An EGL config?  This is our name space, we can add stuff as we need
(as Keith is doing here). include/uapi/drm/drm_fourcc.h is the
canonical source for these values and we should add
DRM_FORMAT_SARGB there to make sure we don't clash.

Why are these codes in mesa (and gbm and wl_drm protocol) then?
Because it turns out that once you have an stable and established
namespace for pixel formats (and a kernel userspace header is about as
stable and established as it gets) it makes a lot of sense to reuse
those values.

I already explained to Keith why we use different sets of format codes
in the DRI interface, but it's always fun to slam other peoples code.
Anyway, it's pretty simple, the __DRI_IMAGE_FORMAT_* defines predate
the introduction of drm_fourcc.h.  When we later added suport for
planar YUV __DRIimages, the kernel had picked up drm_fourcc.h after a
long sad bikeshedding flamewar, which included the planar formats we
needed.  At this point we could continue using our custom
__DRI_IMAGE_FORMAT_* defines or we could switch to the tokens that we
had finally converged on.  But don't let me ruin a good old snide remark.

 Cc'ing the heck out of this to get kernel people to hopefully notice.
 Maybe someone takes charge of this ... Otherwise meh.

I don't know what you want to change.  These values are already kernel
ABI, we use them in drmAddFB2, and again, I don't understand what
problem you're seeing.

Kristian

  Just afraid to create long-term maintainance madness here with the
  kernel's iron thou-shalt-not-break-userspace-ever rule ... Not likely
  we'll ever accept srgb for framebuffers though.
 
  Would suck to collide with something we do want though.
 
 Yeah, it'd suck. But given how fourcc works we probably have that
 already, just haven't noticed yet :(
 -Daniel
 -- 
 Daniel Vetter
 Software Engineer, Intel Corporation
 +41 (0) 79 365 57 48 - http://blog.ffwll.ch
 ___
 Intel-gfx mailing list
 intel-...@lists.freedesktop.org
 http://lists.freedesktop.org/mailman/listinfo/intel-gfx
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 15/18] mesa: Add ARB_viewport_array plumbing

2013-11-22 Thread Courtney Goeltzenleuchter

Got it.


On Fri, Nov 22, 2013 at 2:55 PM, Chris Forbes chr...@ijw.co.nz wrote:

 It's just that last block that were messed up -- rest was context.

 Sorry for any confusion.


 On Sat, Nov 23, 2013 at 10:06 AM, Courtney Goeltzenleuchter 
 court...@lunarg.com wrote:

 Hi Chris,

 I'm using this version of the spec:
 http://www.opengl.org/registry/specs/ARB/viewport_array.txt

 On Thu, Nov 21, 2013 at 4:41 PM, Chris Forbes chr...@ijw.co.nz wrote:

 I was just comparing to the list in the ARB_viewport_array spec.


 On Fri, Nov 22, 2013 at 11:33 AM, Courtney Goeltzenleuchter 
 court...@lunarg.com wrote:

 Hi Chris,

 Where are you getting your defines?
 I copied them from include/GL/gl.h
 #define GL_VIEWPORT 0x0BA2
 /* Scissor box */
 #define GL_SCISSOR_BOX 0x0C10
 #define GL_SCISSOR_TEST 0x0C11
 #define GL_SCISSOR_TEST 0x0C11
 #define GL_DEPTH_RANGE 0x0B70

 Ah, FIRST_VERTEX looks different.
 #define GL_FIRST_VERTEX_CONVENTION0x8E4D

 I'll add PROVOKING_VERTEX

 Looks like UNDEFINED_VERTEX was wrong as well.
 (include/GL/glext.h) #define GL_UNDEFINED_VERTEX   0x8260

  I was modelling one of the other extension xml files and they had
 similar defines, though I could see no effect including or excluding them.

 Should I just get rid of the definitions for values that already exist
 in gl.h or glext.h?

 Courtney


 On Thu, Nov 21, 2013 at 1:00 PM, Chris Forbes chr...@ijw.co.nz wrote:

 I'm surprised the build system accepts the conflicting second
 definition of SCISSOR_BOX at all, actually -- that's weird.


 On Fri, Nov 22, 2013 at 8:55 AM, Chris Forbes chr...@ijw.co.nzwrote:

 I mean some of the values don't match the spec :)


 On Fri, Nov 22, 2013 at 7:52 AM, Courtney Goeltzenleuchter 
 court...@lunarg.com wrote:



 On Wed, Nov 20, 2013 at 7:28 PM, Chris Forbes chr...@ijw.co.nzwrote:

 Oops -- the 8E4E is obviously correct. Artifact of me switching how
 I
 was commenting halfway through.

 On Thu, Nov 21, 2013 at 3:25 PM, Chris Forbes chr...@ijw.co.nz
 wrote:
  These are bogus:
 
  +enum name=SCISSOR_BOX value=0x0C10/
  +enum name=VIEWPORT value=0x0BA2/
  +enum name=DEPTH_RANGE value=0x0B70/
  +enum name=SCISSOR_TEST value=0x0C11/
  +enum name=FIRST_VERTEX_CONVENTION value=0x0C10/


 In the spec I'm using I see:

 New Tokens

 Accepted by the pname parameter of GetBooleanv, GetIntegerv, GetFloatv,
 GetDoublev and GetInteger64v:

 MAX_VIEWPORTS   0x825B
 VIEWPORT_SUBPIXEL_BITS  0x825C
 VIEWPORT_BOUNDS_RANGE   0x825D
 LAYER_PROVOKING_VERTEX  0x825E
 VIEWPORT_INDEX_PROVOKING_VERTEX 0x825F

 Accepted by the pname parameter of GetIntegeri_v:

 *SCISSOR_BOX 0x0C10*

 Accepted by the pname parameter of GetFloati_v:

 *VIEWPORT0x0BA2*

 Accepted by the pname parameter of GetDoublei_v:

 *DEPTH_RANGE 0x0B70*

 Accepted by the pname parameter of Enablei, Disablei, and IsEnabledi:

 *SCISSOR_TEST0x0C11*

 Thus my confusion regarding bogus values.

 Returned in the data parameter from a Get query with a pname of
 LAYER_PROVOKING_VERTEX or VIEWPORT_INDEX_PROVOKING_VERTEX:

 FIRST_VERTEX_CONVENTION 0x8E4D
 LAST_VERTEX_CONVENTION  0x8E4E
 PROVOKING_VERTEX0x8E4F
 UNDEFINED_VERTEX0x8260





 What do you mean by bogus?
 I was emulating other extension xml files. Are these not needed
 because they are already defined in gl_ext.h?


 
  0x8E4D
 
  +enum name=LAST_VERTEX_CONVENTION value=0x8E4E/
 
  0x8E4E
 
  add: enum name=PROVOKING_VERTEX value=0x8E4F/
 
  +enum name=UNDEFINED_VERTEX value=0x8E4F/
 
  0x8260




 --
 Courtney Goeltzenleuchter
 LunarG






 --
 Courtney Goeltzenleuchter
 LunarG





 --
 Courtney Goeltzenleuchter
 LunarG





-- 
Courtney Goeltzenleuchter
LunarG
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [Intel-gfx] [PATCH] dri3, i915, i965: Add __DRI_IMAGE_FOURCC_SARGB8888

2013-11-22 Thread Ville Syrjälä

On Fri, Nov 22, 2013 at 02:12:13PM -0800, Kristian Høgsberg wrote:
 On Fri, Nov 22, 2013 at 05:17:37PM +0100, Daniel Vetter wrote:
  On Fri, Nov 22, 2013 at 12:01 PM, Keith Packard kei...@keithp.com wrote:
   Daniel Vetter dan...@ffwll.ch writes:
  
   Hm, where do we have the canonical source for all these fourcc codes? I'm
   asking since we have our own copy in the kernel as drm_fourcc.h, and that
   one is part of the userspace ABI since we use it to pass around
   framebuffer formats and format lists.
  
   I think it's the kernel? I really don't know, as the whole notion of
   fourcc codes seems crazy to me...
  
   Feel free to steal this code and stick it in the kernel if you like.
  
  Well, I wasn't ever in favour of using fourcc codes since they're just
  not standardized at all, highly redundant in some cases and also miss
  lots of stuff we actually need (like all the rgb formats).
 
 These drm codes are not fourcc codes in any other way than that
 they're defined by creating a 32 bit value by picking four characters.
 I don't know what PTSD triggers people have from hearing fourcc, but
 it seems severe.  Forget all that, these codes are DRM specific
 defines that are not inteded to match anything anybody else does.  It
 doesn't matter if these match of conflict with v4l, fourcc.org,
 wikipedia.org or what the amiga did.  They're just tokens that let us
 define succintly what the pixel format of a kms framebuffer is and
 tell the kernel.
 
 I don't know what else you'd propose?  Pass an X visual in the ioctl?
 An EGL config?  This is our name space, we can add stuff as we need
 (as Keith is doing here). include/uapi/drm/drm_fourcc.h is the
 canonical source for these values and we should add
 DRM_FORMAT_SARGB there to make sure we don't clash.

What is this format anyway? -ENODOCS

If its just an srgb version of ARGB, then I wouldn't really want it
in drm_fourcc.h. I expect colorspacy stuff will be handled by various
crtc/plane properties in the kernel so we don't need to encode that
stuff into the fb format.

-- 
Ville Syrjälä
Intel OTC
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 1/2] mesa: Add gl_formats to cover all GLUser provided format/type combinations

2013-11-22 Thread Marek Olšák

On Fri, Nov 22, 2013 at 10:45 PM, Mark Mueller markkmuel...@gmail.com wrote:



 On Fri, Nov 22, 2013 at 12:36 PM, Marek Olšák mar...@gmail.com wrote:

 On Fri, Nov 15, 2013 at 9:58 PM, Mark Mueller markkmuel...@gmail.com
 wrote:
 
 
 
  On Fri, Nov 15, 2013 at 9:52 AM, Marek Olšák mar...@gmail.com wrote:
 
  I don't understand this and I don't think this is the right way to
  implement hw-accelerated TexImage. Some of the formats are unsupported
  by all hardware I know, others just don't make any sense (e.g.
  RGBA5999_REV).
 
 
  Please check out st_choose_matching_format. This is what the function
  comment says:
  /**
   * Given an OpenGL user-requested format and type, and swapBytes state,
   * return the format which exactly matches those parameters, so that
   * a memcpy-based transfer can be done.
   *
   * If no format is supported, return PIPE_FORMAT_NONE.
   */
 
  My patch allows for similar functionality without using Gallium
  PIPE_FORMATs, which aren't supported in the i965 driver. RGPA5999_REV is
  there because it is used by a the glean test case pixelFormats. Having
  hardware support for the added formats is not relevant.

 Why is it not relevant? If there is no hardware support, adding those
 formats is a waste of time. Will the formats be unpacked by shaders or
 what? The MESA_* formats have only one purpose: to be used as OpenGL
 textures and renderbuffers.

 Yes, that's the nature of GPU based texture uploading. The driver generates
 a shader and the GPU does the necessary unpacking from a cached memcpy of
 the application texture and stuffs it into the final mip tree in its target
 tiled format. In order to do that, the driver must know the original format
 of the texture, which can't be done from the current choices of gl_formats,

This is not true. Drivers have access to the parameters of TexImage
and are allowed to do with them whatever they want, so they do know
the exact format of user data. I don't see why you want new texture
formats while all you need is a shader that can read raw bytes and do
the unpacking. If you used a blit instead of a shader and your
hardware had fixed-function circuitry to read the new formats, that
would be an entirely different story.

If I implemented shader-based unpacking, I would probably use a
texture buffer object and common formats like R8UI, R16UI, R32UI, same
for RG, RGB, and RGBA and also the signed variants, but not much else.

 thus I'm adding more. Recently I've made some local improvements so the i965
 driver that can produce more information from fewer gl_formats, thus I'll be
 posting an updated patch soon.

 If there's any question of the merits, here's an example - let's say the
 application uploads a GL_RGB format and asks for a GL_UNSIGNED_SHORT_5_6_5
 internal format. Tests on Ivy Bridge show that when i965 uses the CPU to
 upload a 256 x 256 texture it does so at ~28 - 77 MB/sec, whereas when i965
 uses the GPU, the same texture uploads at ~2400 MB/sec - including memcpy
 and all. Even the most basic cases with no repacking still show a 2x - 3x
 improvement because the GPU inherently handles tiling much faster. In
 general the overhead of the memcpy is easily absorbed by the fact that the
 graphics pipe doesn't have to stall while the CPU does the unpacking,
 tiling, and repacking in the single driver thread.

I understand this very well. I did the same thing for Gallium. Also,
avoiding CPU-GPU stalls has nothing to do with packing and unpacking
the pixels. It's a completely independent issue.

Marek
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] Spec interpretation question: layered framebuffers with mismatched layer counts

2013-11-22 Thread Paul Berry

The ARB_geometry_shader4 spec says, in the list of conditions necessary for
framebuffer completeness:

  * If any framebuffer attachment is layered, all attachments must have
the same layer count.  For three-dimensional textures, the layer
count
is the depth of the attached volume.  For cube map textures, the
layer
count is always six.  For one- and two-dimensional array textures,
the
layer count is simply the number of layers in the array texture.
{ FRAMEBUFFER_INCOMPLETE_LAYER_COUNT_ARB }


When geometry shaders were adopted into desktop GL, this condition was
dropped.  The constant FRAMEBUFFER_INCOMPLETE_LAYER_COUNT doesn't appear at
all in the desktop GL spec.  Instead, GL 3.2 says (in section 4.4.7
(Layered Framebuffers)):

When fragments are written to a layered framebuffer, the fragment’s layer
number selects an image from the array of images at each attachment point
to use for the stencil test (see section 4.1.5), depth buffer test (see
section 4.1.6), and for blending and color buffer writes (see section
4.1.8). If the fragment’s layer number is negative, or greater than the
minimum number of layers of any attachment, the effects of the fragment on
the framebuffer contents are undefined.

(note: presumably where the spec says greater, greater than equal is
actually intended).

In other words, a framebuffer is allowed to have layers with mismatched
layer counts, but if so then the client may only reliably render to layer
numbers that are present in all attachments.

Mesa currently implements the rule in ARB_geometry_shader4.  Technically,
this is a bug, since Mesa is trying to implement geometry shaders as
specified in GL 3.2, not as specified in ARB_geometry_shader4.

However, there are two mitigating factors:

1. If we follow the GL 3.2 spec, then it's not clear what should happen if
the client calls glClear() on a framebuffer with mismatched layer counts.
For example, if I have a color attachment with 4 layers and a color
attachment with 2 layers, should all 4 layers of the color attachment with
4 layers be cleared, or just the first 2?  Or is it undefined?  If we're
required to clear all 4 layers, that's going to make the Meta
implementation of glClear() a lot more clumsy.

2. The Nvidia proprietary drivers for Linux (version 313.18) follows the
ARB_geometry_shader4 rule (returning FRAMEBUFFER_INCOMPLETE_LAYER_COUNT_ARB
from glCheckFramebufferStatus()), even when an OpenGL 3.2+ context is used.


Currently, I'm inclined to leave Mesa as is, and file a spec bug with
Khronos encouraging them to adopt the ARB_geometry_shader4 text into
OpenGL.  I'm curious to hear other people's opinions.

Thanks,

Paul
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 0/3] Enable GL 3.2 support for i965, bump Mesa version.

2013-11-22 Thread Clemens Eisserer

Hi,

 I believe geometry shaders are all we would have to implement for Sandy
 Bridge.  Unfortunately, geometry shaders work pretty differently on Sandy
 Bridge, so getting them to work won't be a slam dunk.  Here's roughly what
 would have to be done:

This is too sad - I had the impression mesa-10 would bring opengl-3.3
to my SNB laptop I bought last year.
I usually don't play computer games, but recently bought two games
refusing to start (dear esther and oil rush) because both require ogl
= 3.2.

Regards, Clemens
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] Spec interpretation question: layered framebuffers with mismatched layer counts

2013-11-22 Thread Marek Olšák

The number of layers can be specified for each framebuffer attachment
independently in Gallium, Direct3D, and also our hardware. OpenGL
seems to be the only weirdo here. ;) I think our hardware handles
writes to non-existent layers independently of other attachments. It
either clamps the layer index or throws the fragments going to
non-existent layers away.

I would silently allow mismatched layer counts and pass them to the
hardware. It must be able to deal with it in its own way, otherwise it
wouldn't support it in the first place.

Marek

On Sat, Nov 23, 2013 at 12:08 AM, Paul Berry stereotype...@gmail.com wrote:
 The ARB_geometry_shader4 spec says, in the list of conditions necessary for
 framebuffer completeness:

   * If any framebuffer attachment is layered, all attachments must have
 the same layer count.  For three-dimensional textures, the layer
 count
 is the depth of the attached volume.  For cube map textures, the
 layer
 count is always six.  For one- and two-dimensional array textures,
 the
 layer count is simply the number of layers in the array texture.
 { FRAMEBUFFER_INCOMPLETE_LAYER_COUNT_ARB }


 When geometry shaders were adopted into desktop GL, this condition was
 dropped.  The constant FRAMEBUFFER_INCOMPLETE_LAYER_COUNT doesn't appear at
 all in the desktop GL spec.  Instead, GL 3.2 says (in section 4.4.7 (Layered
 Framebuffers)):

 When fragments are written to a layered framebuffer, the fragment’s layer
 number selects an image from the array of images at each attachment point to
 use for the stencil test (see section 4.1.5), depth buffer test (see section
 4.1.6), and for blending and color buffer writes (see section 4.1.8). If the
 fragment’s layer number is negative, or greater than the minimum number of
 layers of any attachment, the effects of the fragment on the framebuffer
 contents are undefined.

 (note: presumably where the spec says greater, greater than equal is
 actually intended).

 In other words, a framebuffer is allowed to have layers with mismatched
 layer counts, but if so then the client may only reliably render to layer
 numbers that are present in all attachments.

 Mesa currently implements the rule in ARB_geometry_shader4.  Technically,
 this is a bug, since Mesa is trying to implement geometry shaders as
 specified in GL 3.2, not as specified in ARB_geometry_shader4.

 However, there are two mitigating factors:

 1. If we follow the GL 3.2 spec, then it's not clear what should happen if
 the client calls glClear() on a framebuffer with mismatched layer counts.
 For example, if I have a color attachment with 4 layers and a color
 attachment with 2 layers, should all 4 layers of the color attachment with 4
 layers be cleared, or just the first 2?  Or is it undefined?  If we're
 required to clear all 4 layers, that's going to make the Meta implementation
 of glClear() a lot more clumsy.

 2. The Nvidia proprietary drivers for Linux (version 313.18) follows the
 ARB_geometry_shader4 rule (returning FRAMEBUFFER_INCOMPLETE_LAYER_COUNT_ARB
 from glCheckFramebufferStatus()), even when an OpenGL 3.2+ context is used.


 Currently, I'm inclined to leave Mesa as is, and file a spec bug with
 Khronos encouraging them to adopt the ARB_geometry_shader4 text into OpenGL.
 I'm curious to hear other people's opinions.

 Thanks,

 Paul

 ___
 mesa-dev mailing list
 mesa-dev@lists.freedesktop.org
 http://lists.freedesktop.org/mailman/listinfo/mesa-dev

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH] R600: Expand vector FABS

2013-11-22 Thread Tom Stellard

From: Tom Stellard thomas.stell...@amd.com

NOTE: This is a candidate for the 3.4 branch.
---
 lib/Target/R600/AMDGPUISelLowering.cpp |  1 +
 test/CodeGen/R600/fabs.ll  | 36 --
 2 files changed, 35 insertions(+), 2 deletions(-)

diff --git a/lib/Target/R600/AMDGPUISelLowering.cpp 
b/lib/Target/R600/AMDGPUISelLowering.cpp
index f2a6aab..c4d75ff 100644
--- a/lib/Target/R600/AMDGPUISelLowering.cpp
+++ b/lib/Target/R600/AMDGPUISelLowering.cpp
@@ -179,6 +179,7 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(TargetMachine 
TM) :
 
   for (unsigned int x = 0; x  NumFloatTypes; ++x) {
 MVT::SimpleValueType VT = FloatTypes[x];
+setOperationAction(ISD::FABS, VT, Expand);
 setOperationAction(ISD::FADD, VT, Expand);
 setOperationAction(ISD::FDIV, VT, Expand);
 setOperationAction(ISD::FFLOOR, VT, Expand);
diff --git a/test/CodeGen/R600/fabs.ll b/test/CodeGen/R600/fabs.ll
index 23ab468..346e4e9 100644
--- a/test/CodeGen/R600/fabs.ll
+++ b/test/CodeGen/R600/fabs.ll
@@ -5,10 +5,10 @@
 ; (fabs (f32 bitcast (i32 a))) = (f32 bitcast (and (i32 a), 0x7FFF))
 ; unless isFabsFree returns true
 
-; R600-CHECK: @fabs_free
+; R600-CHECK-LABEL: @fabs_free
 ; R600-CHECK-NOT: AND
 ; R600-CHECK: |PV.{{[XYZW]}}|
-; SI-CHECK: @fabs_free
+; SI-CHECK-LABEL: @fabs_free
 ; SI-CHECK: V_ADD_F32_e64 v{{[0-9]}}, s{{[0-9]}}, 0, 1, 0, 0, 0
 
 define void @fabs_free(float addrspace(1)* %out, i32 %in) {
@@ -19,4 +19,36 @@ entry:
   ret void
 }
 
+; R600-CHECK-LABEL: @fabs_v2
+; R600-CHECK: |{{(PV|T[0-9])\.[XYZW]}}|
+; R600-CHECK: |{{(PV|T[0-9])\.[XYZW]}}|
+; SI-CHECK-LABEL: @fabs_v2
+; SI-CHECK: V_ADD_F32_e64 VGPR{{[0-9]}}, SGPR{{[0-9]}}, 0, 1, 0, 0, 0
+; SI-CHECK: V_ADD_F32_e64 VGPR{{[0-9]}}, SGPR{{[0-9]}}, 0, 1, 0, 0, 0
+define void @fabs_v2(2 x float addrspace(1)* %out, 2 x float %in) {
+entry:
+  %0 = call 2 x float @llvm.fabs.v2f32(2 x float %in)
+  store 2 x float %0, 2 x float addrspace(1)* %out
+  ret void
+}
+
+; R600-CHECK-LABEL: @fabs_v4
+; R600-CHECK: |{{(PV|T[0-9])\.[XYZW]}}|
+; R600-CHECK: |{{(PV|T[0-9])\.[XYZW]}}|
+; R600-CHECK: |{{(PV|T[0-9])\.[XYZW]}}|
+; R600-CHECK: |{{(PV|T[0-9])\.[XYZW]}}|
+; SI-CHECK-LABEL: @fabs_v4
+; SI-CHECK: V_ADD_F32_e64 VGPR{{[0-9]}}, SGPR{{[0-9]}}, 0, 1, 0, 0, 0
+; SI-CHECK: V_ADD_F32_e64 VGPR{{[0-9]}}, SGPR{{[0-9]}}, 0, 1, 0, 0, 0
+; SI-CHECK: V_ADD_F32_e64 VGPR{{[0-9]}}, SGPR{{[0-9]}}, 0, 1, 0, 0, 0
+; SI-CHECK: V_ADD_F32_e64 VGPR{{[0-9]}}, SGPR{{[0-9]}}, 0, 1, 0, 0, 0
+define void @fabs_v4(4 x float addrspace(1)* %out, 4 x float %in) {
+entry:
+  %0 = call 4 x float @llvm.fabs.v4f32(4 x float %in)
+  store 4 x float %0, 4 x float addrspace(1)* %out
+  ret void
+}
+
 declare float @fabs(float ) readnone
+declare 2 x float @llvm.fabs.v2f32(2 x float ) readnone
+declare 4 x float @llvm.fabs.v4f32(4 x float ) readnone
-- 
1.8.1.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [Intel-gfx] [PATCH] dri3, i915, i965: Add __DRI_IMAGE_FOURCC_SARGB8888

2013-11-22 Thread Keith Packard

Kristian Høgsberg hoegsb...@gmail.com writes:

 I already explained to Keith why we use different sets of format codes
 in the DRI interface, but it's always fun to slam other peoples code.

As we discussed, my complaint isn't so much about __DRI_IMAGE_FOURCC,
but the fact that the __DRIimage interfaces use *both*
__DRI_IMAGE_FOURCC and __DRI_IMAGE_FORMAT at different times.

Ok, here's a fine thing we can actually fix -- the pattern that mesa
uses all over the place in converting formats looks like this (not to
pick on anyone, it's repeated everywhere, this is just the first one I
found in gbm_dri.c):

static uint32_t
gbm_dri_to_gbm_format(uint32_t dri_format)
{
   uint32_t ret = 0;

   switch (dri_format) {
   case __DRI_IMAGE_FORMAT_RGB565:
  ret = GBM_FORMAT_RGB565;
  break;
   case __DRI_IMAGE_FORMAT_XRGB:
  ret = GBM_FORMAT_XRGB;
  break;
   case __DRI_IMAGE_FORMAT_ARGB:
  ret = GBM_FORMAT_ARGB;
  break;
   case __DRI_IMAGE_FORMAT_ABGR:
  ret = GBM_FORMAT_ABGR;
  break;
   default:
  ret = 0;
  break;
   }

   return ret;
}

The problem here is that any unknown incoming formats get translated to
garbage (0) outgoing. With fourcc codes, there is the slight advantage
that 0 is never a legal value, but it sure would be nice to print a
warning or even abort if you get a format code you don't understand as
there's no way 0 is ever going to do what you want.

Anyone have a preference? Abort? Print an error? Silently continue to do
the wrong thing?

-- 
keith.pack...@intel.com


pgpmBS_oO7m0p.pgp
Description: PGP signature
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [Intel-gfx] [PATCH] dri3, i915, i965: Add __DRI_IMAGE_FOURCC_SARGB8888

2013-11-22 Thread Keith Packard

Ville Syrjälä ville.syrj...@linux.intel.com writes:

 What is this format anyway? -ENODOCS

Same as MESA_FORMAT_SARGB8 and __DRI_IMAGE_FORMAT_SARGB8 :-)

 If its just an srgb version of ARGB, then I wouldn't really want it
 in drm_fourcc.h. I expect colorspacy stuff will be handled by various
 crtc/plane properties in the kernel so we don't need to encode that
 stuff into the fb format.

It's not any different from splitting YUV codes from RGB codes; a
different color encoding with the same bitfields. And, for mesa, it's
necessary to differentiate between ARGB and SARGB within the same format
code given how the API is structured. So, we have choices:

 1) Let Mesa define it's own fourcc codes and risk future accidental
collisions
 
 2) Rewrite piles of mesa code to split out the color encoding from the
bitfield information and pass it separately.

 3) Add reasonable format codes like this to the kernel fourcc list.

-- 
keith.pack...@intel.com


pgpyYJXhOW5b4.pgp
Description: PGP signature
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] Spec interpretation question: layered framebuffers with mismatched layer counts

2013-11-22 Thread Roland Scheidegger

It is however illegal in d3d10 to have multiple render targets with
different dimensions (including array size):
http://msdn.microsoft.com/en-us/library/windows/desktop/bb205131%28v=vs.85%29.aspx
I don't know what happens if you try to bind such rendertargets.
FWIW when I implemented layered rendering in llvmpipe, I simply clamped
rendering to the minimum max layer of all attachments. My reading of the
GL spec was that just about anything is allowed to happen if you write
to such a layer, so clamping to the minimum max layer (and thus write to
the wrong layer even for adjustments which do have that layer) looked
alright to me.
It would be quite unpractical if that weren't allowed (well other
options such as dropping rendering completely on the floor would be
doable too, but something like writing the correct layers for
attachments which have them and drop rendering or clamp layer for the
others would be HIGHLY unpractical).

Roland


Am 23.11.2013 00:24, schrieb Marek Olšák:
 The number of layers can be specified for each framebuffer attachment
 independently in Gallium, Direct3D, and also our hardware. OpenGL
 seems to be the only weirdo here. ;) I think our hardware handles
 writes to non-existent layers independently of other attachments. It
 either clamps the layer index or throws the fragments going to
 non-existent layers away.
 
 I would silently allow mismatched layer counts and pass them to the
 hardware. It must be able to deal with it in its own way, otherwise it
 wouldn't support it in the first place.
 
 Marek
 
 On Sat, Nov 23, 2013 at 12:08 AM, Paul Berry stereotype...@gmail.com wrote:
 The ARB_geometry_shader4 spec says, in the list of conditions necessary for
 framebuffer completeness:

   * If any framebuffer attachment is layered, all attachments must have
 the same layer count.  For three-dimensional textures, the layer
 count
 is the depth of the attached volume.  For cube map textures, the
 layer
 count is always six.  For one- and two-dimensional array textures,
 the
 layer count is simply the number of layers in the array texture.
 { FRAMEBUFFER_INCOMPLETE_LAYER_COUNT_ARB }


 When geometry shaders were adopted into desktop GL, this condition was
 dropped.  The constant FRAMEBUFFER_INCOMPLETE_LAYER_COUNT doesn't appear at
 all in the desktop GL spec.  Instead, GL 3.2 says (in section 4.4.7 (Layered
 Framebuffers)):

 When fragments are written to a layered framebuffer, the fragment’s layer
 number selects an image from the array of images at each attachment point to
 use for the stencil test (see section 4.1.5), depth buffer test (see section
 4.1.6), and for blending and color buffer writes (see section 4.1.8). If the
 fragment’s layer number is negative, or greater than the minimum number of
 layers of any attachment, the effects of the fragment on the framebuffer
 contents are undefined.

 (note: presumably where the spec says greater, greater than equal is
 actually intended).

 In other words, a framebuffer is allowed to have layers with mismatched
 layer counts, but if so then the client may only reliably render to layer
 numbers that are present in all attachments.

 Mesa currently implements the rule in ARB_geometry_shader4.  Technically,
 this is a bug, since Mesa is trying to implement geometry shaders as
 specified in GL 3.2, not as specified in ARB_geometry_shader4.

 However, there are two mitigating factors:

 1. If we follow the GL 3.2 spec, then it's not clear what should happen if
 the client calls glClear() on a framebuffer with mismatched layer counts.
 For example, if I have a color attachment with 4 layers and a color
 attachment with 2 layers, should all 4 layers of the color attachment with 4
 layers be cleared, or just the first 2?  Or is it undefined?  If we're
 required to clear all 4 layers, that's going to make the Meta implementation
 of glClear() a lot more clumsy.

 2. The Nvidia proprietary drivers for Linux (version 313.18) follows the
 ARB_geometry_shader4 rule (returning FRAMEBUFFER_INCOMPLETE_LAYER_COUNT_ARB
 from glCheckFramebufferStatus()), even when an OpenGL 3.2+ context is used.


 Currently, I'm inclined to leave Mesa as is, and file a spec bug with
 Khronos encouraging them to adopt the ARB_geometry_shader4 text into OpenGL.
 I'm curious to hear other people's opinions.

 Thanks,

 Paul

 ___
 mesa-dev mailing list
 mesa-dev@lists.freedesktop.org
 http://lists.freedesktop.org/mailman/listinfo/mesa-dev

 ___
 mesa-dev mailing list
 mesa-dev@lists.freedesktop.org
 http://lists.freedesktop.org/mailman/listinfo/mesa-dev
 
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 0/3] Enable GL 3.2 support for i965, bump Mesa version.

2013-11-22 Thread Kenneth Graunke

On 11/22/2013 03:15 PM, Clemens Eisserer wrote:
 Hi,
 
 I believe geometry shaders are all we would have to implement for Sandy
 Bridge.  Unfortunately, geometry shaders work pretty differently on Sandy
 Bridge, so getting them to work won't be a slam dunk.  Here's roughly what
 would have to be done:
 
 This is too sad - I had the impression mesa-10 would bring opengl-3.3
 to my SNB laptop I bought last year.
 I usually don't play computer games, but recently bought two games
 refusing to start (dear esther and oil rush) because both require ogl
 = 3.2.
 
 Regards, Clemens

Many applications require OpenGL 3.2 since it's the lowest 3.x version
supported by Mac OS X.  But they don't actually need the new features.
For such applications, you can try setting some environment variables:

export MESA_GL_VERSION_OVERRIDE=3.2
export MESA_GLSL_VERSION_OVERRIDE=150

These make the driver claim OpenGL 3.2 and GLSL 1.50 support, even
though it's incomplete.

OilRush ought to work fine with those set.

I haven't tried Dear Esther since it doesn't appear that there's a
native Linux client in Steam yet.  But you could give it a try.

--Ken
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH] Fix --enable-XX-bit flags by moving LT_INIT where it should

2013-11-22 Thread Alexandre Demers

Moving LT_INIT after setting completely (AM_)C(XX)FLAGS and LDFLAGS.
LT_INIT needs them as they are expected to be used all along 
the compilation when the macro runs its tests to determine among other 
things the host type.

For info, see 
http://www.gnu.org/software/libtool/manual/html_node/LT_005fINIT.html

Fixes https://bugs.freedesktop.org/show_bug.cgi?id=50754

Signed-off-by: Alexandre Demers alexandre.f.dem...@gmail.com
Tested-by: Tapani Palli lem...@gmail.com
---
 configure.ac | 11 ---
 1 file changed, 8 insertions(+), 3 deletions(-)

diff --git a/configure.ac b/configure.ac
index fb16338..d41595d 100644
--- a/configure.ac
+++ b/configure.ac
@@ -51,9 +51,6 @@ AX_PYTHON_MODULE([libxml2], [needed])
 AC_PROG_SED
 AC_PROG_MKDIR_P
 
-LT_PREREQ([2.2])
-LT_INIT([disable-static])
-
 AX_PROG_BISON([],
   AS_IF([test ! -f $srcdir/src/glsl/glcpp/glcpp-parse.c],
 [AC_MSG_ERROR([bison not found - unable to compile 
glcpp-parse.y])]))
@@ -1956,6 +1953,14 @@ dnl Add user CFLAGS and CXXFLAGS
 CFLAGS=$CFLAGS $USER_CFLAGS
 CXXFLAGS=$CXXFLAGS $USER_CXXFLAGS
 
+dnl
+dnl LT_INIT adds tests to determine host based on some variables like 
(AM_)C(XX)FLAGS and (AM_)LDFLAGS.
+dnl They need to be set before calling LT_INIT so the macro can configure 
things correctly when cross_compiling.
+dnl This will allow --enable-xx-bit to work as expected.
+dnl
+LT_PREREQ([2.2])
+LT_INIT([disable-static])
+
 dnl Substitute the config
 AC_CONFIG_FILES([Makefile
src/Makefile
-- 
1.8.4.2

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [Intel-gfx] [PATCH] dri3, i915, i965: Add __DRI_IMAGE_FOURCC_SARGB8888

2013-11-22 Thread Ville Syrjälä

On Fri, Nov 22, 2013 at 03:43:13PM -0800, Keith Packard wrote:
 Ville Syrjälä ville.syrj...@linux.intel.com writes:
 
  What is this format anyway? -ENODOCS
 
 Same as MESA_FORMAT_SARGB8 and __DRI_IMAGE_FORMAT_SARGB8 :-)
 
  If its just an srgb version of ARGB, then I wouldn't really want it
  in drm_fourcc.h. I expect colorspacy stuff will be handled by various
  crtc/plane properties in the kernel so we don't need to encode that
  stuff into the fb format.
 
 It's not any different from splitting YUV codes from RGB codes;

Not really. Saying something is YUV (or rather Y'CbCr) doesn't
actually tell you the color space. It just tells you whether the
information is encoded as R+G+B or Y+Cb+Cr. How you convert between
them is another matter. You need to know the gamma, color primaries,
chroma siting for sub-sampled YCbCr formats, etc.

-- 
Ville Syrjälä
Intel OTC
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 2/2] i965/gs: Fix gl_PrimitiveIDIn when using SW primitive restart.

2013-11-22 Thread Eric Anholt

Paul Berry stereotype...@gmail.com writes:

 Ivy Bridge hardware doesn't support primitve restart under all
 circumstances--when it doesn't, we emulate it in software by splitting
 up each logical draw operation into multiple 3DPRIMITIVE commands.
 This causes the hardware's primitive ID counter to be reset to 0,
 producing incorrect values for gl_PrimitiveIDIn.

 To work around that problem, we create a hidden uniform which is added
 to the hardware's primitive ID counter at the top of the geometry
 shader; the software primitive restart mechanism ensures that this
 uniform always contains the number of primitives that were sent down
 the pipeline by previous 3DPRIMITIVE commands within the same logical
 draw operation.

 To reduce the performance impact of the workaround, we only compile it
 into the shader when the hardware is Ivy Bridge and the shader
 accesses gl_PrimitiveIDIn.  To reduce unnecessary state updates, we
 only flag _NEW_PROGRAM_CONSTANTS when when the workaround is compiled
 in *and* software primitive restart is active.

 On Ivy Bridge, fixes all variants of
 glsl-1.50-geometry-primitive-id-restart except the GL_LINE_LOOP
 variants, which are broken due to an unrelated hardware limitation.

 diff --git a/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp 
 b/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp
 index 96636e8..50feb89 100644
 --- a/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp
 +++ b/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp
 @@ -116,9 +116,39 @@ vec4_gs_visitor::setup_payload()
  }
  
  
 +/**
 + * When using software primitive restart, Ivy Bridge resets the primitive ID
 + * on each restart.  Work around this by creating a hidden uniform whose 
 value
 + * will be added to the incoming primitive ID.
 + */
 +void
 +vec4_gs_visitor::primitive_id_workaround()
 +{
 +   /* Set up the uniform */
 +   this-uniform_vector_size[this-uniforms] = 1;
 +   dst_reg primitive_id_offset(UNIFORM, this-uniforms);

Or
src_reg primitive_id_offset(UNIFORM, this-uniforms, glsl_type::uint_type);

That'll get you a reasonable swizzle that won't be pulling in other
weird stuff when the uniform gets repacked.

 +   primitive_id_offset.type = BRW_REGISTER_TYPE_UD;
 +   for (int i = 0; i  4; i++) {
 +  prog_data-param[this-uniforms * 4 + i] =
 + reinterpret_castconst float *(brw-prim_restart.sw_prim_counter);
 +   }

In other cases param components beyond uniform_vector_size have had a
pointer to a static zero, but I don't see a harm from doing this,
either.

(I would have used a normal C cast, but oh well)

 +   this-uniforms++;
 +
 +   /* Emit code to add the uniform to primitive ID */
 +   this-current_annotation = primitive ID workaround;
 +   dst_reg dst(ATTR, VARYING_SLOT_PRIMITIVE_ID);
 +   dst.type = BRW_REGISTER_TYPE_UD;
 +   emit(ADD(dst, src_reg(dst), src_reg(primitive_id_offset)));

Note: This overwrites .yzw of g1, but it looks like those are delivered
as zero, and I suspect they're not used afterwards.  You may want to use
dst(ATTR, VARYING_SLOT_PRIMITIVE_ID, glsl_type::uint_type, WRITEMASK_X)
for sanity.

 +   this-current_annotation = NULL;
 +}

I don't think any of this feedback has an actual runtime effect.  Either
way, these two patches are:

Reviewed-by: Eric Anholt e...@anholt.net



pgpoEszrmNyb8.pgp
Description: PGP signature
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 1/2] mesa: Add gl_formats to cover all GLUser provided format/type combinations

2013-11-22 Thread Mark Mueller

On Fri, Nov 22, 2013 at 3:06 PM, Marek Olšák mar...@gmail.com wrote:

 On Fri, Nov 22, 2013 at 10:45 PM, Mark Mueller markkmuel...@gmail.com
 wrote:
 
 
 
  On Fri, Nov 22, 2013 at 12:36 PM, Marek Olšák mar...@gmail.com wrote:
 
  On Fri, Nov 15, 2013 at 9:58 PM, Mark Mueller markkmuel...@gmail.com
  wrote:
  
  
  
   On Fri, Nov 15, 2013 at 9:52 AM, Marek Olšák mar...@gmail.com
 wrote:
  
   I don't understand this and I don't think this is the right way to
   implement hw-accelerated TexImage. Some of the formats are
 unsupported
   by all hardware I know, others just don't make any sense (e.g.
   RGBA5999_REV).
  
  
   Please check out st_choose_matching_format. This is what the function
   comment says:
   /**
* Given an OpenGL user-requested format and type, and swapBytes
 state,
* return the format which exactly matches those parameters, so that
* a memcpy-based transfer can be done.
*
* If no format is supported, return PIPE_FORMAT_NONE.
*/
  
   My patch allows for similar functionality without using Gallium
   PIPE_FORMATs, which aren't supported in the i965 driver. RGPA5999_REV
 is
   there because it is used by a the glean test case pixelFormats. Having
   hardware support for the added formats is not relevant.
 
  Why is it not relevant? If there is no hardware support, adding those
  formats is a waste of time. Will the formats be unpacked by shaders or
  what? The MESA_* formats have only one purpose: to be used as OpenGL
  textures and renderbuffers.
 
  Yes, that's the nature of GPU based texture uploading. The driver
 generates
  a shader and the GPU does the necessary unpacking from a cached memcpy of
  the application texture and stuffs it into the final mip tree in its
 target
  tiled format. In order to do that, the driver must know the original
 format
  of the texture, which can't be done from the current choices of
 gl_formats,

 This is not true. Drivers have access to the parameters of TexImage
 and are allowed to do with them whatever they want, so they do know
 the exact format of user data. I don't see why you want new texture
 formats while all you need is a shader that can read raw bytes and do
 the unpacking. If you used a blit instead of a shader and your
 hardware had fixed-function circuitry to read the new formats, that
 would be an entirely different story.

 If I implemented shader-based unpacking, I would probably use a
 texture buffer object and common formats like R8UI, R16UI, R32UI, same
 for RG, RGB, and RGBA and also the signed variants, but not much else.


How about let's forget the whole concept of GPU loading for a moment as
that is only clouding the issue. As you said: the mesa_* formats have one
purpose: to be used as OpenGL textures When I read the OGL spec for
glTexImage2D it lists GL_BGR as a format. Yet looking at mesa_* formats
there is only one BGR format represented: MESA_FORMAT_BGR888. There are 8
other valid OGL BGR textures that don't have MESA_FORMATs while all of the
RGBs ones _are_ all there? ie. these are the OGL BGR formats that are
missing:

   MESA_FORMAT_BGR_INT8,
   MESA_FORMAT_BGR_UINT8,
   MESA_FORMAT_BGR_INT16,
   MESA_FORMAT_BGR_UINT16,
   MESA_FORMAT_BGR_FLOAT16,
   MESA_FORMAT_BGR_INT32,
   MESA_FORMAT_BGR_UINT32,
   MESA_FORMAT_BGR_FLOAT32

There are also RGB, RGBA, and BGRA formats missing. Completeness seems like
justification enough for their inclusion:

   /* Red Green Blue */
   MESA_FORMAT_RGB233_REV,
   MESA_FORMAT_RGB10_REV,

   /* Red Green Blue Alpha */
   MESA_FORMAT_RGBA1010102,
   MESA_FORMAT_RGBA2101010_REV,
   MESA_FORMAT_RGBA5999_REV,
   MESA_FORMAT_RGBA,
   MESA_FORMAT_RGBA_REV,
   MESA_FORMAT_RGBA1555_REV,

   /* Blue Green Red Alpha */
   MESA_FORMAT_BGRA_INT8,
   MESA_FORMAT_BGRA_INT8_REV,
   MESA_FORMAT_BGRA_UINT8,
   MESA_FORMAT_BGRA_INT16,
   MESA_FORMAT_BGRA_UINT16,
   MESA_FORMAT_BGRA_FLOAT16,
   MESA_FORMAT_BGRA_INT32,
   MESA_FORMAT_BGRA_UINT32,
   MESA_FORMAT_BGRA_FLOAT32,
   MESA_FORMAT_BGRA,
   MESA_FORMAT_BGRA_REV,
   MESA_FORMAT_BGRA5551,
   MESA_FORMAT_BGRA1555_REV,
   MESA_FORMAT_BGRA1010102,
   MESA_FORMAT_BGRA2101010_REV,
   MESA_FORMAT_BGRA5999_REV,

Mark
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 0/3] Implement DRI_PRIME support for Wayland

2013-11-22 Thread Axel Davy

On 11/22/2013 01:16 AM, Kristian Høgsberg wrote:
 I'm not sold on the nested compositor for this use case.  It
 introduces another context switch between the game and the main
 compositor and more input and output lag.  Instead I think we need to
 consider whether we want a new __DRIimage entry point like:

   (*blitImage)(__DRIcontext *ctx, __DRIimage *src, __DRIimage *ctx)

 and then do the copy in platform_wayland.c when we have non-matching
 tile-formats.

 Kristian


Thanks for the comments.

There are advantages to both possibilities:
using a nested compositor or doing the copy inside Mesa.

I imagine doing a blit could be the default,
and rendering directly to the linear buffer could be an option
set by an env var, or driconf.

I'm deeply convinced we should allow to render to the linear
buffer without copy, and the nested compositor use-case makes sense.

For the blit, a function

(*blitImage)(__DRIcontext *ctx, __DRIimage *src, __DRIimage *dst)

makes sense, but we would need another function
(not related specifically to Prime):

(*throttle) (__DRIcontext *ctx)

Because rendering something heavy on non-intel card (intel cards to
throttling automatically)
cause input lag (and It is solved by forcing throttling at every swap).

And ideally, we could have more control on tiling,
for example if the computer has two AMD cards of the same
generation with same tiling modes, we could always use tiling.


I would like the compositor to still send the classic drm device in
the wl_drm.device event.  The client can then use stat(2) to stat it
and defer the corresponding render node from that by adding 128 to the
minor.  This way we don't break older mesa versions by sending them a
render node that they'll then fail to authenticate.

I do not agree on this: if the compositor does run under a render-node,
there are high chances it can't authenticate clients wanting to run
on the not-render-node device.
Moreover, I believe clients shouldn't use render-nodes by default if
they can avoid it.

I don't get the point of older mesa versions: why would you use an older
Mesa version inside a more recent Mesa version?


Some arguments in favor of allowing the nested compositor case to render
without copy
on another card:

. XWayland inside would run as if the main device is the device of the
nested compositor. (I can't say how X dri3 will support Prime, so I
can't say yet if this is a big advantage or not). For example if Xrender is
slow on the main card, you can with this system use Xrender on the other
card.

. In case you are under system compositors (like would KWin), you would
like to be able to render your whole desktop on the card you want,
without an additional copy.
. We could imagine having outputs on different card, the compositor
under system compositors would connect to multiple system compositors
running on each card (and giving access to different outputs). The
compositor would use card X: the system compositor on card X would have
tiled buffers without copies, whereas the other system compositors would
have untiled buffers without copies.

. The nested compositor could allow the user to choose between capping
the compositing to 60 fps
or not. When we would cap the compositing to 60 fps, we would avoid some
useless copies (while adding a very small latency between when the frame
is sent and when it is displayed)

(. The nested compositor could have additional features like recording
using the acceleration of the other card )

All these arguments can be put in short in: more flexibility


For an heavy game  60 fps, I agree it makes much more sense to do the
copy inside Mesa, than using a nested compositor.


Axel Davy
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

65 matches

Mail list logo