[Mesa-dev] [PATCH 1/2] i965: Allocate vec4_visitor's uniform_size and uniform_vector_size arrays dynamically.
Signed-off-by: Petri Latvala petri.latv...@intel.com --- src/mesa/drivers/dri/i965/brw_vec4.cpp| 5 +++-- src/mesa/drivers/dri/i965/brw_vec4.h | 14 +++--- src/mesa/drivers/dri/i965/brw_vec4_gs.c | 2 +- src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp | 12 +++- src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.h | 6 -- src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp| 7 ++- src/mesa/drivers/dri/i965/brw_vec4_vs_visitor.cpp | 6 -- src/mesa/drivers/dri/i965/brw_vs.c| 2 +- src/mesa/drivers/dri/i965/brw_vs.h| 6 -- 9 files changed, 41 insertions(+), 19 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_vec4.cpp b/src/mesa/drivers/dri/i965/brw_vec4.cpp index 73f91a0..3da1b4d 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4.cpp @@ -1558,7 +1558,8 @@ brw_vs_emit(struct brw_context *brw, struct brw_vs_compile *c, struct brw_vs_prog_data *prog_data, void *mem_ctx, -unsigned *final_assembly_size) +unsigned *final_assembly_size, +unsigned param_count) { bool start_busy = false; float start_time = 0; @@ -1585,7 +1586,7 @@ brw_vs_emit(struct brw_context *brw, } } - vec4_vs_visitor v(brw, c, prog_data, prog, shader, mem_ctx); + vec4_vs_visitor v(brw, c, prog_data, prog, shader, mem_ctx, param_count); if (!v.run()) { if (prog) { prog-LinkStatus = false; diff --git a/src/mesa/drivers/dri/i965/brw_vec4.h b/src/mesa/drivers/dri/i965/brw_vec4.h index 5cec9f9..552ca55 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4.h +++ b/src/mesa/drivers/dri/i965/brw_vec4.h @@ -231,7 +231,8 @@ public: struct brw_shader *shader, void *mem_ctx, bool debug_flag, -bool no_spills); +bool no_spills, +unsigned param_count); ~vec4_visitor(); dst_reg dst_null_f() @@ -325,8 +326,9 @@ public: */ dst_reg output_reg[BRW_VARYING_SLOT_COUNT]; const char *output_reg_annotation[BRW_VARYING_SLOT_COUNT]; - int uniform_size[MAX_UNIFORMS]; - int uniform_vector_size[MAX_UNIFORMS]; + unsigned uniform_param_count; + int* uniform_size; + int* uniform_vector_size; int uniforms; src_reg shader_start_time; @@ -546,6 +548,12 @@ private: * If true, then register allocation should fail instead of spilling. */ const bool no_spills; + + /* +* Make noncopyable. +*/ + vec4_visitor(const vec4_visitor); + vec4_visitor operator=(const vec4_visitor); }; diff --git a/src/mesa/drivers/dri/i965/brw_vec4_gs.c b/src/mesa/drivers/dri/i965/brw_vec4_gs.c index b52d646..b0b77d1 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_gs.c +++ b/src/mesa/drivers/dri/i965/brw_vec4_gs.c @@ -213,7 +213,7 @@ do_gs_prog(struct brw_context *brw, void *mem_ctx = ralloc_context(NULL); unsigned program_size; const unsigned *program = - brw_gs_emit(brw, prog, c, mem_ctx, program_size); + brw_gs_emit(brw, prog, c, mem_ctx, program_size, param_count); if (program == NULL) { ralloc_free(mem_ctx); return false; diff --git a/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp index adbb1cf..16c05f5 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp @@ -38,10 +38,11 @@ vec4_gs_visitor::vec4_gs_visitor(struct brw_context *brw, struct gl_shader_program *prog, struct brw_shader *shader, void *mem_ctx, - bool no_spills) + bool no_spills, + unsigned param_count) : vec4_visitor(brw, c-base, c-gp-program.Base, c-key.base, c-prog_data.base, prog, shader, mem_ctx, - INTEL_DEBUG DEBUG_GS, no_spills), + INTEL_DEBUG DEBUG_GS, no_spills, param_count), c(c) { } @@ -539,7 +540,8 @@ brw_gs_emit(struct brw_context *brw, struct gl_shader_program *prog, struct brw_gs_compile *c, void *mem_ctx, -unsigned *final_assembly_size) +unsigned *final_assembly_size, +unsigned param_count) { struct brw_shader *shader = (brw_shader *) prog-_LinkedShaders[MESA_SHADER_GEOMETRY]; @@ -556,7 +558,7 @@ brw_gs_emit(struct brw_context *brw, if (likely(!(INTEL_DEBUG DEBUG_NO_DUAL_OBJECT_GS))) { c-prog_data.dual_instanced_dispatch = false; - vec4_gs_visitor v(brw, c, prog, shader, mem_ctx, true /* no_spills */); + vec4_gs_visitor v(brw, c, prog, shader, mem_ctx, true /* no_spills */, param_count); if (v.run()) { vec4_generator g(brw, prog,
[Mesa-dev] [PATCH 2/2] i965: Assert array index on access to vec4_visitor's arrays.
Signed-off-by: Petri Latvala petri.latv...@intel.com --- src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp | 6 ++ 1 file changed, 6 insertions(+) diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp index df38dab..511b080 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp @@ -662,6 +662,7 @@ vec4_visitor::setup_uniform_values(ir_variable *ir) storage-type-matrix_columns); for (unsigned s = 0; s vector_count; s++) { + assert(uniforms uniform_param_count); uniform_vector_size[uniforms] = storage-type-vector_elements; int i; @@ -685,6 +686,7 @@ vec4_visitor::setup_uniform_clipplane_values() gl_clip_plane *clip_planes = brw_select_clip_planes(ctx); for (int i = 0; i key-nr_userclip_plane_consts; ++i) { + assert(this-uniforms uniform_param_count); this-uniform_vector_size[this-uniforms] = 4; this-userplane[i] = dst_reg(UNIFORM, this-uniforms); this-userplane[i].type = BRW_REGISTER_TYPE_F; @@ -715,6 +717,7 @@ vec4_visitor::setup_builtin_uniform_values(ir_variable *ir) (gl_state_index *)slots[i].tokens); float *values = this-prog-Parameters-ParameterValues[index][0].f; + assert(this-uniforms uniform_param_count); this-uniform_vector_size[this-uniforms] = 0; /* Add each of the unique swizzled channels of the element. * This will end up matching the size of the glsl_type of this field. @@ -725,6 +728,7 @@ vec4_visitor::setup_builtin_uniform_values(ir_variable *ir) last_swiz = swiz; prog_data-param[this-uniforms * 4 + j] = values[swiz]; +assert(this-uniforms uniform_param_count); if (swiz = last_swiz) this-uniform_vector_size[this-uniforms]++; } @@ -983,6 +987,7 @@ vec4_visitor::visit(ir_variable *ir) /* Track how big the whole uniform variable is, in case we need to put a * copy of its data into pull constants for array access. */ + assert(this-uniforms uniform_param_count); this-uniform_size[this-uniforms] = type_size(ir-type); if (!strncmp(ir-name, gl_, 3)) { @@ -3197,6 +3202,7 @@ vec4_visitor::move_uniform_array_access_to_pull_constants() pull_constant_loc[uniform] = prog_data-nr_pull_params / 4; + assert(uniform uniform_param_count); for (int j = 0; j uniform_size[uniform] * 4; j++) { prog_data-pull_param[prog_data-nr_pull_params++] = values[j]; -- 1.8.4.rc3 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 0/2] v2: Fix array overrun with too many uniforms
Second attempt at fixing https://bugs.freedesktop.org/show_bug.cgi?id=71254 vec4_visitor::uniform_size and vec4_visitor::uniform_vector_size are now allocated dynamically based on param_count from do_vs_prog. I also made vec4_visitor noncopyable. It is not copied currently, and to my understanding copying it would be nonsense anyway. The passed param_count is saved in vec4_visitor for the asserts in patch 2. That could be wrapped in #ifndef NDEBUG but I wasn't sure whether that is desired. It would change the ABI for that class depending on compile flags, but the only use for it is internal. Petri Latvala (2): i965: Allocate vec4_visitor's uniform_size and uniform_vector_size arrays dynamically. i965: Assert array index on access to vec4_visitor's arrays. src/mesa/drivers/dri/i965/brw_vec4.cpp| 5 +++-- src/mesa/drivers/dri/i965/brw_vec4.h | 14 +++--- src/mesa/drivers/dri/i965/brw_vec4_gs.c | 2 +- src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp | 12 +++- src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.h | 6 -- src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp| 13 - src/mesa/drivers/dri/i965/brw_vec4_vs_visitor.cpp | 6 -- src/mesa/drivers/dri/i965/brw_vs.c| 2 +- src/mesa/drivers/dri/i965/brw_vs.h| 6 -- 9 files changed, 47 insertions(+), 19 deletions(-) -- 1.8.4.rc3 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH] mesa: Remove the ralloc canary on release builds.
The canary is basically just to give a better debugging message when you ralloc_free() something that wasn't rallocated. Reduces maximum memory usage of apitrace replay of the dota2 demo by 60MB on my 64-bit system (so half that on a real 32-bit dota2 environment). --- src/glsl/ralloc.c | 6 ++ 1 file changed, 6 insertions(+) diff --git a/src/glsl/ralloc.c b/src/glsl/ralloc.c index e79dad7..36bc61f 100644 --- a/src/glsl/ralloc.c +++ b/src/glsl/ralloc.c @@ -53,8 +53,10 @@ _CRTIMP int _vscprintf(const char *format, va_list argptr); struct ralloc_header { +#ifdef DEBUG /* A canary value used to determine whether a pointer is ralloc'd. */ unsigned canary; +#endif struct ralloc_header *parent; @@ -78,7 +80,9 @@ get_header(const void *ptr) { ralloc_header *info = (ralloc_header *) (((char *) ptr) - sizeof(ralloc_header)); +#ifdef DEBUG assert(info-canary == CANARY); +#endif return info; } @@ -117,7 +121,9 @@ ralloc_size(const void *ctx, size_t size) add_child(parent, info); +#ifdef DEBUG info-canary = CANARY; +#endif return PTR_FROM_HEADER(info); } -- 1.8.4.rc3 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH] mesa: Remove the ralloc canary on release builds.
On 11/22/2013 12:21 AM, Eric Anholt wrote: The canary is basically just to give a better debugging message when you ralloc_free() something that wasn't rallocated. Reduces maximum memory usage of apitrace replay of the dota2 demo by 60MB on my 64-bit system (so half that on a real 32-bit dota2 environment). Thanks so much for doing the measurement on this. Obvious improvement, and frankly I'm embarrassed that I didn't think to do this years ago. :) Reviewed-by: Kenneth Graunke kenn...@whitecape.org --- src/glsl/ralloc.c | 6 ++ 1 file changed, 6 insertions(+) diff --git a/src/glsl/ralloc.c b/src/glsl/ralloc.c index e79dad7..36bc61f 100644 --- a/src/glsl/ralloc.c +++ b/src/glsl/ralloc.c @@ -53,8 +53,10 @@ _CRTIMP int _vscprintf(const char *format, va_list argptr); struct ralloc_header { +#ifdef DEBUG /* A canary value used to determine whether a pointer is ralloc'd. */ unsigned canary; +#endif struct ralloc_header *parent; @@ -78,7 +80,9 @@ get_header(const void *ptr) { ralloc_header *info = (ralloc_header *) (((char *) ptr) - sizeof(ralloc_header)); +#ifdef DEBUG assert(info-canary == CANARY); +#endif return info; } @@ -117,7 +121,9 @@ ralloc_size(const void *ctx, size_t size) add_child(parent, info); +#ifdef DEBUG info-canary = CANARY; +#endif return PTR_FROM_HEADER(info); } ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH] mesa: Remove the ralloc canary on release builds.
On 11/22/2013 12:21 AM, Eric Anholt wrote: The canary is basically just to give a better debugging message when you ralloc_free() something that wasn't rallocated. Reduces maximum memory usage of apitrace replay of the dota2 demo by 60MB on my 64-bit system (so half that on a real 32-bit dota2 environment). Really, half? It's an unsigned...that's 4 bytes regardless of 64-bit vs. 32-bit. I think this should be 60MB of savings, end of story. --Ken ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [Bug 50754] Building 32 bit mesa on 64 bit OS fails since change for automake
https://bugs.freedesktop.org/show_bug.cgi?id=50754 --- Comment #26 from Tapani Pälli lem...@gmail.com --- (In reply to comment #25) Created attachment 89621 [details] [review] Move LT_INIT where it should so it can test correctly (AM_)C(XX)FLAGS and others Updated patch against latest git code. Based on previous patch with further explanations. LT_INIT needs to work with the final content of variables to be able to correctly run some tests and determine the host type. So it must be called after setting anything we want to set in (AM_)C(XX)FLAGS or (AM_)LDFLAGS. Fixing LT_INIT must be done independently from modifying environment variables or build variables since LT_INIT will check both variables and shadow variables if they are set. Tested on my setup and it works. I'd like someone else to also test it. Another patch should be made to fix llvm-config, which is another topic. Yep I verified this on my machine, the patch still works with the comments :) -- You are receiving this mail because: You are the assignee for the bug. ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [Bug 71591] Second Life shaders fail to compile (extension declared in middle of shader)
https://bugs.freedesktop.org/show_bug.cgi?id=71591 Eero Tamminen eero.t.tammi...@intel.com changed: What|Removed |Added Summary|Second Life shaders fail to |Second Life shaders fail to |compile |compile (extension declared ||in middle of shader) --- Comment #7 from Eero Tamminen eero.t.tammi...@intel.com --- Got a comment from Ian Kenneth on what's the Mesa policy for things that are against GL spec, but accepted by competing driver(s): --- 1. Follow the spec. 2. If nobody follows the spec, get the spec changed. Goto 1. With a rare case of: Add driconf workaround for broken app, but only if: - People still care about broken app - App will never get fixed According to comment 1), ATI driver doesn't follow the spec. What about Nvidia driver? -- You are receiving this mail because: You are the assignee for the bug. ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH] dri3, i915, i965: Add __DRI_IMAGE_FOURCC_SARGB8888
On Thu, Nov 21, 2013 at 08:12:04PM -0800, Keith Packard wrote: The __DRIimage createImageFromFds function takes a fourcc code, but there was no fourcc code that match __DRI_IMAGE_FORMAT_SARGB8. This adds a define for that format, adds a translation in DRI3 from __DRI_IMAGE_FORMAT_SARGB8 to __DRI_IMAGE_FOURCC_SARGB and then adds translations *back* to __IMAGE_FORMAT_SARGB8 in both the i915 and i965 drivers. I'll refrain from comments on whether I think having two separate sets of format defines in dri_interface.h is a good idea or not... Signed-off-by: Keith Packard kei...@keithp.com Hm, where do we have the canonical source for all these fourcc codes? I'm asking since we have our own copy in the kernel as drm_fourcc.h, and that one is part of the userspace ABI since we use it to pass around framebuffer formats and format lists. Just afraid to create long-term maintainance madness here with the kernel's iron thou-shalt-not-break-userspace-ever rule ... Not likely we'll ever accept srgb for framebuffers though. -Daniel --- This gets iceweasel running with the GL compositor enabled. include/GL/internal/dri_interface.h | 1 + src/glx/dri3_glx.c | 1 + src/mesa/drivers/dri/i915/intel_screen.c | 3 +++ src/mesa/drivers/dri/i965/intel_screen.c | 3 +++ 4 files changed, 8 insertions(+) diff --git a/include/GL/internal/dri_interface.h b/include/GL/internal/dri_interface.h index b012570..a4387c4 100644 --- a/include/GL/internal/dri_interface.h +++ b/include/GL/internal/dri_interface.h @@ -1034,6 +1034,7 @@ struct __DRIdri2ExtensionRec { #define __DRI_IMAGE_FOURCC_XRGB 0x34325258 #define __DRI_IMAGE_FOURCC_ABGR 0x34324241 #define __DRI_IMAGE_FOURCC_XBGR 0x34324258 +#define __DRI_IMAGE_FOURCC_SARGB0x83324258 #define __DRI_IMAGE_FOURCC_YUV4100x39565559 #define __DRI_IMAGE_FOURCC_YUV4110x31315559 #define __DRI_IMAGE_FOURCC_YUV4200x32315559 diff --git a/src/glx/dri3_glx.c b/src/glx/dri3_glx.c index b047cc8..5861317 100644 --- a/src/glx/dri3_glx.c +++ b/src/glx/dri3_glx.c @@ -890,6 +890,7 @@ image_format_to_fourcc(int format) /* Convert from __DRI_IMAGE_FORMAT to __DRI_IMAGE_FOURCC (sigh) */ switch (format) { + case __DRI_IMAGE_FORMAT_SARGB8: return __DRI_IMAGE_FOURCC_SARGB; case __DRI_IMAGE_FORMAT_RGB565: return __DRI_IMAGE_FOURCC_RGB565; case __DRI_IMAGE_FORMAT_XRGB: return __DRI_IMAGE_FOURCC_XRGB; case __DRI_IMAGE_FORMAT_ARGB: return __DRI_IMAGE_FOURCC_ARGB; diff --git a/src/mesa/drivers/dri/i915/intel_screen.c b/src/mesa/drivers/dri/i915/intel_screen.c index 7f1fc6b..2dd2bc7 100644 --- a/src/mesa/drivers/dri/i915/intel_screen.c +++ b/src/mesa/drivers/dri/i915/intel_screen.c @@ -184,6 +184,9 @@ static struct intel_image_format intel_image_formats[] = { { __DRI_IMAGE_FOURCC_ARGB, __DRI_IMAGE_COMPONENTS_RGBA, 1, { { 0, 0, 0, __DRI_IMAGE_FORMAT_ARGB, 4 } } }, + { __DRI_IMAGE_FOURCC_SARGB, __DRI_IMAGE_COMPONENTS_RGBA, 1, + { { 0, 0, 0, __DRI_IMAGE_FORMAT_SARGB8, 4 } } }, + { __DRI_IMAGE_FOURCC_XRGB, __DRI_IMAGE_COMPONENTS_RGB, 1, { { 0, 0, 0, __DRI_IMAGE_FORMAT_XRGB, 4 }, } }, diff --git a/src/mesa/drivers/dri/i965/intel_screen.c b/src/mesa/drivers/dri/i965/intel_screen.c index e44d0f6..cf8c3e2 100644 --- a/src/mesa/drivers/dri/i965/intel_screen.c +++ b/src/mesa/drivers/dri/i965/intel_screen.c @@ -220,6 +220,9 @@ static struct intel_image_format intel_image_formats[] = { { __DRI_IMAGE_FOURCC_ARGB, __DRI_IMAGE_COMPONENTS_RGBA, 1, { { 0, 0, 0, __DRI_IMAGE_FORMAT_ARGB, 4 } } }, + { __DRI_IMAGE_FOURCC_SARGB, __DRI_IMAGE_COMPONENTS_RGBA, 1, + { { 0, 0, 0, __DRI_IMAGE_FORMAT_SARGB8, 4 } } }, + { __DRI_IMAGE_FOURCC_XRGB, __DRI_IMAGE_COMPONENTS_RGB, 1, { { 0, 0, 0, __DRI_IMAGE_FORMAT_XRGB, 4 }, } }, -- 1.8.4.2 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev -- Daniel Vetter Software Engineer, Intel Corporation +41 (0) 79 365 57 48 - http://blog.ffwll.ch ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [Bug 71912] Compounded macros in shaders: huge memory consumption
https://bugs.freedesktop.org/show_bug.cgi?id=71912 --- Comment #1 from Kevin Rogovin kevin.rogo...@intel.com --- Created attachment 89626 -- https://bugs.freedesktop.org/attachment.cgi?id=89626action=edit paired fragment shader -- You are receiving this mail because: You are the assignee for the bug. ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH] dri3, i915, i965: Add __DRI_IMAGE_FOURCC_SARGB8888
Daniel Vetter dan...@ffwll.ch writes: Hm, where do we have the canonical source for all these fourcc codes? I'm asking since we have our own copy in the kernel as drm_fourcc.h, and that one is part of the userspace ABI since we use it to pass around framebuffer formats and format lists. I think it's the kernel? I really don't know, as the whole notion of fourcc codes seems crazy to me... Feel free to steal this code and stick it in the kernel if you like. Just afraid to create long-term maintainance madness here with the kernel's iron thou-shalt-not-break-userspace-ever rule ... Not likely we'll ever accept srgb for framebuffers though. Would suck to collide with something we do want though. -- keith.pack...@intel.com pgpf4F9vCQb7I.pgp Description: PGP signature ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [Bug 71912] New: Compounded macros in shaders: huge memory consumption
https://bugs.freedesktop.org/show_bug.cgi?id=71912 Priority: medium Bug ID: 71912 Assignee: mesa-dev@lists.freedesktop.org Summary: Compounded macros in shaders: huge memory consumption Severity: normal Classification: Unclassified OS: All Reporter: kevin.rogo...@intel.com Hardware: Other Status: NEW Version: 9.1 Component: Mesa core Product: Mesa Created attachment 89625 -- https://bugs.freedesktop.org/attachment.cgi?id=89625action=edit Vertex shader that has deep macro The attached shader causes literally gigabytes upon gigabytes of memory to be consumed when submitted to Mesa. The shader cause is because it compounds macros many levels deep. Mesa is at the stage of applying the preprocessor and does not get to even parsing the shader, much less compiling it. The main issue is for WebGL enabled browsers. Indeed, a malicious website could submit such a shader (which is quite small) and bring the user's system to a standstill halt almost. A possible fix is to essentially add additional logic to the preprocessor computing the complexity multiplier of a macro and if that complexity multiplies is too high to abort with a message that macro expands into a too large expression. -- You are receiving this mail because: You are the assignee for the bug. ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [Bug 71912] Compounded macros in shaders: huge memory consumption
https://bugs.freedesktop.org/show_bug.cgi?id=71912 --- Comment #2 from Kenneth Graunke kenn...@whitecape.org --- Nice find. Are you planning to try and fix this bug? -- You are receiving this mail because: You are the assignee for the bug. ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [Bug 71912] Compounded macros in shaders: huge memory consumption
https://bugs.freedesktop.org/show_bug.cgi?id=71912 Kenneth Graunke kenn...@whitecape.org changed: What|Removed |Added Assignee|mesa-dev@lists.freedesktop. |i...@freedesktop.org |org | QA Contact||intel-3d-bugs@lists.freedes ||ktop.org Component|Mesa core |glsl-compiler --- Comment #3 from Kenneth Graunke kenn...@whitecape.org --- Since this is a preprocessor issue, changing the component to glsl-compiler and reassigning to Carl as a default assignee. -- You are receiving this mail because: You are the assignee for the bug. ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH] llvmpipe: support 8bit subpixel precision
On 11/22/2013 05:29 AM, Zack Rusin wrote: For me too, other than the fixed_position members, looks good. Thanks for your perseverance on this Zack! Thanks! ok, attached is a version that makes position and dx/dy 32bit again, it seems to work great. I have a question for you guys if you run the piglits: ./bin/triangle-rasterization-overdraw -max_size -seed 0xA8402F24 -count 1 -auto on master does it fail for you? It fails for me on master, with and without the patch. I'm not sure what to make of it, I might have been looking at rasterization for too long. Looking at the rendering it looks correct. z Looks good to me. The test you mentioned certainly fails for me too. Roland ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH] intel: Track known prime buffers for re-use
If the application sends us a file descriptor pointing at a prime buffer that we've already got, we have to re-use the same bo_gem structure or chaos will result. Track the set of all known prime objects and look to see if the kernel has returned one of those for a new file descriptor. Signed-off-by: Keith Packard kei...@keithp.com --- This one took a while to find -- multiple bo_gem structs pointing at the same gem handle would either cause the object to be destroyed before we were done using it, or we'd end up sending the same gem_handle for multiple buffers. This looks a lot like the named object handling stuff, as one would expect. intel/intel_bufmgr_gem.c | 22 +- 1 file changed, 21 insertions(+), 1 deletion(-) diff --git a/intel/intel_bufmgr_gem.c b/intel/intel_bufmgr_gem.c index df6fcec..2897bb2 100644 --- a/intel/intel_bufmgr_gem.c +++ b/intel/intel_bufmgr_gem.c @@ -112,6 +112,7 @@ typedef struct _drm_intel_bufmgr_gem { drmMMListHead named; drmMMListHead vma_cache; +drmMMListHead prime; int vma_count, vma_open, vma_max; uint64_t gtt_size; @@ -2451,8 +2452,25 @@ drm_intel_bo_gem_create_from_prime(drm_intel_bufmgr *bufmgr, int prime_fd, int s uint32_t handle; drm_intel_bo_gem *bo_gem; struct drm_i915_gem_get_tiling get_tiling; + drmMMListHead *list; ret = drmPrimeFDToHandle(bufmgr_gem-fd, prime_fd, handle); + + /* +* See if the kernel has already returned this buffer to us. Just as +* for named buffers, we must not create two bo's pointing at the same +* kernel object +*/ + for (list = bufmgr_gem-prime.next; +list != bufmgr_gem-prime; +list = list-next) { + bo_gem = DRMLISTENTRY(drm_intel_bo_gem, list, name_list); + if (bo_gem-gem_handle == handle) { + drm_intel_gem_bo_reference(bo_gem-bo); + return bo_gem-bo; + } + } + if (ret) { fprintf(stderr,ret is %d %d\n, ret, errno); return NULL; @@ -2487,8 +2505,8 @@ drm_intel_bo_gem_create_from_prime(drm_intel_bufmgr *bufmgr, int prime_fd, int s bo_gem-has_error = false; bo_gem-reusable = false; - DRMINITLISTHEAD(bo_gem-name_list); DRMINITLISTHEAD(bo_gem-vma_list); + DRMLISTADDTAIL(bo_gem-name_list, bufmgr_gem-prime); VG_CLEAR(get_tiling); get_tiling.handle = bo_gem-gem_handle; @@ -3301,5 +3319,7 @@ drm_intel_bufmgr_gem_init(int fd, int batch_size) DRMINITLISTHEAD(bufmgr_gem-vma_cache); bufmgr_gem-vma_max = -1; /* unlimited by default */ + DRMINITLISTHEAD(bufmgr_gem-prime); + return bufmgr_gem-bufmgr; } -- 1.8.4.3 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH] gallium: Use base.stamp for all drawable invalidation checks.
Reviewed-by: Marek Olšák marek.ol...@amd.com Marek On Fri, Nov 22, 2013 at 5:20 AM, Keith Packard kei...@keithp.com wrote: Upper levels of the stack use base.stamp to tell when a drawable needs to be revalidated, but the dri state tracker was using dPriv-lastStamp. Those two, along with dri2.stamp, all get simultaneously incremented when a dri2 invalidate event was delivered, and so end up containing precisely the same value. This patch doesn't change the fact that there are three variables, rather it switches all of the tests to use only base.stamp, which is functionally equivalent to the previous code. Then, it passes base.stamp to the image loader getBuffers function so that the one which is checked will get updated by the XCB special event queue used by DRI3. Signed-off-by: Keith Packard kei...@keithp.com --- This patch makes sure that drawables get invalidated when the window changes size or when SwapBuffers is called; dri3 has only a single location to smite when things change, so we need to make sure the upper levels all share that location. This should permit the elimination of the dri2.stamp and lastStamp variables, which would be a nice further cleanup. src/gallium/state_trackers/dri/common/dri_drawable.c | 4 ++-- src/gallium/state_trackers/dri/drm/dri2.c| 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/gallium/state_trackers/dri/common/dri_drawable.c b/src/gallium/state_trackers/dri/common/dri_drawable.c index f255108..734bca2 100644 --- a/src/gallium/state_trackers/dri/common/dri_drawable.c +++ b/src/gallium/state_trackers/dri/common/dri_drawable.c @@ -73,7 +73,7 @@ dri_st_framebuffer_validate(struct st_context_iface *stctx, * checked. */ do { - lastStamp = drawable-dPriv-lastStamp; + lastStamp = drawable-base.stamp; new_stamp = (drawable-texture_stamp != lastStamp); if (new_stamp || new_mask || screen-broken_invalidate) { @@ -91,7 +91,7 @@ dri_st_framebuffer_validate(struct st_context_iface *stctx, drawable-texture_stamp = lastStamp; drawable-texture_mask = statt_mask; } - } while (lastStamp != drawable-dPriv-lastStamp); + } while (lastStamp != drawable-base.stamp); if (!out) return TRUE; diff --git a/src/gallium/state_trackers/dri/drm/dri2.c b/src/gallium/state_trackers/dri/drm/dri2.c index 6a56cd4..c7e4151 100644 --- a/src/gallium/state_trackers/dri/drm/dri2.c +++ b/src/gallium/state_trackers/dri/drm/dri2.c @@ -545,7 +545,7 @@ dri_image_allocate_textures(struct dri_context *ctx, (*sPriv-image.loader-getBuffers) (dPriv, image_format, - dPriv-dri2.stamp, + (uint32_t *) drawable-base.stamp, dPriv-loaderPrivate, buffer_mask, images); -- 1.8.4.2 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 0/2] Minor __DRIimage fixes for i965
While debugging the libdrm duplicate buffer object adventure, I managed to temporarily understand object lifetimes in the __DRIimage getBuffers path, and also to compare that to the DRI2 getBuffers path. Here are a couple of small fixes. I haven't looked at the i915 code, but I suspect the first one, and parts of the second one would apply. [PATCH 1/2] i965: Correct check for re-bound buffer in The code was attempting to avoid re-creating the miptree when the loader handed back the same bufffer we already had, but it was confused about how to tell -- turns out the object actually shared between the loader and the driver is the BO, not the region. And so, we compare BO pointers to see if the image buffer needs to have a new miptree. [PATCH 2/2] i965: Set fast color clear mcs_state on newly allocated Here's a chunk of code that was just missing from the __DRIimage path as a result of rebasing across some new driver additions. Both of these fixes are candidates for 10.0 as they affect the __DRIimage paths now used by Wayland. -keith ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 1/2] i965: Correct check for re-bound buffer in intel_update_image_buffer
The buffer-object is the persistent thing passed through the loader, so when updating an image buffer, check to see if it is already bound to the provided bo. The region, on the other hand, is allocated separately for the miptree, and so will never be the same as that passed back from the loader. Signed-off-by: Keith Packard kei...@keithp.com --- src/mesa/drivers/dri/i965/brw_context.c | 19 +++ 1 file changed, 15 insertions(+), 4 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_context.c b/src/mesa/drivers/dri/i965/brw_context.c index bee98e3..64ff855 100644 --- a/src/mesa/drivers/dri/i965/brw_context.c +++ b/src/mesa/drivers/dri/i965/brw_context.c @@ -1339,10 +1339,21 @@ intel_update_image_buffer(struct brw_context *intel, unsigned num_samples = rb-Base.Base.NumSamples; - if (rb-mt - rb-mt-region - rb-mt-region == region) - return; + /* Check and see if we're already bound to the right +* buffer object +*/ + if (num_samples == 0) { + if (rb-mt + rb-mt-region + rb-mt-region-bo == region-bo) + return; + } else { + if (rb-mt + rb-mt-singlesample_mt + rb-mt-singlesample_mt-region + rb-mt-singlesample_mt-region-bo == region-bo) + return; + } intel_miptree_release(rb-mt); rb-mt = intel_miptree_create_for_image_buffer(intel, -- 1.8.4.3 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 2/2] i965: Set fast color clear mcs_state on newly allocated image miptrees
Just copying code from the dri2 path to set up the fast color clear state. This also removes a couple of bogus intel_region_reference calls. Signed-off-by: Keith Packard kei...@keithp.com --- src/mesa/drivers/dri/i965/intel_mipmap_tree.c | 10 +++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c index 884ddef..ca78f32 100644 --- a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c +++ b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c @@ -762,7 +762,13 @@ intel_miptree_create_for_image_buffer(struct brw_context *intel, if (!singlesample_mt) return NULL; - intel_region_reference(singlesample_mt-region, region); + /* If this miptree is capable of supporting fast color clears, set +* mcs_state appropriately to ensure that fast clears will occur. +* Allocation of the MCS miptree will be deferred until the first fast +* clear actually occurs. +*/ + if (intel_is_non_msrt_mcs_buffer_supported(intel, singlesample_mt)) + singlesample_mt-mcs_state = INTEL_MCS_STATE_RESOLVED; if (num_samples == 0) return singlesample_mt; @@ -780,8 +786,6 @@ intel_miptree_create_for_image_buffer(struct brw_context *intel, multisample_mt-singlesample_mt = singlesample_mt; multisample_mt-need_downsample = false; - intel_region_reference(multisample_mt-region, region); - if (intel-is_front_buffer_rendering buffer_type == __DRI_IMAGE_BUFFER_FRONT) { intel_miptree_upsample(intel, multisample_mt); } -- 1.8.4.3 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 0/2] Minor __DRIimage fixes for i965
On 22 November 2013 05:52, Keith Packard kei...@keithp.com wrote: While debugging the libdrm duplicate buffer object adventure, I managed to temporarily understand object lifetimes in the __DRIimage getBuffers path, and also to compare that to the DRI2 getBuffers path. Here are a couple of small fixes. I haven't looked at the i915 code, but I suspect the first one, and parts of the second one would apply. [PATCH 1/2] i965: Correct check for re-bound buffer in The code was attempting to avoid re-creating the miptree when the loader handed back the same bufffer we already had, but it was confused about how to tell -- turns out the object actually shared between the loader and the driver is the BO, not the region. And so, we compare BO pointers to see if the image buffer needs to have a new miptree. [PATCH 2/2] i965: Set fast color clear mcs_state on newly allocated Here's a chunk of code that was just missing from the __DRIimage path as a result of rebasing across some new driver additions. Both of these fixes are candidates for 10.0 as they affect the __DRIimage paths now used by Wayland. For future reference, patches that are candidates for the latest stable branch should include the line Cc: mesa-sta...@lists.freedesktop.org in the patch. (see http://lists.freedesktop.org/archives/mesa-dev/2013-August/042667.html) I've forwarded this email to mesa-sta...@lists.freedesktop.org, which should also be sufficient. ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 2/2] tgsi: Prevent emission of instructions with empty writemask.
Thanks for reviewing. - Original Message - On 11/21/2013 02:01 PM, jfons...@vmware.com wrote: From: José Fonseca jfons...@vmware.com These degenerate instructions can often be emitted by state trackers when the semantics of instructions don't match precisely. --- src/gallium/auxiliary/tgsi/tgsi_ureg.c | 8 src/gallium/auxiliary/tgsi/tgsi_ureg.h | 22 ++ 2 files changed, 30 insertions(+) diff --git a/src/gallium/auxiliary/tgsi/tgsi_ureg.c b/src/gallium/auxiliary/tgsi/tgsi_ureg.c index 432ed00..f06858e 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_ureg.c +++ b/src/gallium/auxiliary/tgsi/tgsi_ureg.c @@ -1113,6 +1113,10 @@ ureg_insn(struct ureg_program *ureg, boolean negate = FALSE; unsigned swizzle[4] = { 0 }; + if (nr_dst ureg_dst_is_empty(dst[0])) { + return; + } This is technically not really correct, as the instruction may have multiple destinations. Not saying we really have any such instructions (or even that we should have them), but this helper (and the one below) has code to deal with multiple destinations, so that's a bit inconsistent. The assumption there is only one dst is already there. See the code immediately below saturate = nr_dst ? dst[0].Saturate : FALSE; predicate = nr_dst ? dst[0].Predicate : FALSE; I rather commit this as is now, as the multiple destination is not a real problem ATM, while the empty writemask do seem to arise due to me earlier commit. Also, there are instructions which have just one dst reg but side effects, though it may be restricted to OPCODE_POPA (and I wouldn't mind seeing it disappear) (I think the fence/atomic instructions might be fine and at first glance I don't see anything else). POPA is not used anywhere and can be removed. I'll do that in another commit. Jose saturate = nr_dst ? dst[0].Saturate : FALSE; predicate = nr_dst ? dst[0].Predicate : FALSE; if (predicate) { @@ -1162,6 +1166,10 @@ ureg_tex_insn(struct ureg_program *ureg, boolean negate = FALSE; unsigned swizzle[4] = { 0 }; + if (nr_dst ureg_dst_is_empty(dst[0])) { + return; + } Same as above. saturate = nr_dst ? dst[0].Saturate : FALSE; predicate = nr_dst ? dst[0].Predicate : FALSE; if (predicate) { diff --git a/src/gallium/auxiliary/tgsi/tgsi_ureg.h b/src/gallium/auxiliary/tgsi/tgsi_ureg.h index cf0c75e..d973edb 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_ureg.h +++ b/src/gallium/auxiliary/tgsi/tgsi_ureg.h @@ -454,6 +454,16 @@ ureg_imm1i( struct ureg_program *ureg, return ureg_DECL_immediate_int( ureg, a, 1 ); } +/* Where the destination register has a valid file, but an empty + * writemask. + */ +static INLINE boolean +ureg_dst_is_empty( struct ureg_dst dst ) +{ + return dst.File != TGSI_FILE_NULL + dst.WriteMask == 0; +} + /*** * Functions for patching up labels */ @@ -650,6 +660,7 @@ static INLINE void ureg_##op( struct ureg_program *ureg,\ { \ unsigned opcode = TGSI_OPCODE_##op; \ struct ureg_emit_insn_result insn; \ + if (ureg_dst_is_empty(dst)) return; \ insn = ureg_emit_insn(ureg, \ opcode,\ dst.Saturate, \ @@ -673,6 +684,7 @@ static INLINE void ureg_##op( struct ureg_program *ureg,\ { \ unsigned opcode = TGSI_OPCODE_##op; \ struct ureg_emit_insn_result insn; \ + if (ureg_dst_is_empty(dst)) return; \ insn = ureg_emit_insn(ureg, \ opcode,\ dst.Saturate, \ @@ -697,6 +709,7 @@ static INLINE void ureg_##op( struct ureg_program *ureg,\ { \ unsigned opcode = TGSI_OPCODE_##op; \ struct ureg_emit_insn_result insn; \ + if (ureg_dst_is_empty(dst)) return; \ insn = ureg_emit_insn(ureg, \ opcode,\ dst.Saturate,
Re: [Mesa-dev] [PATCH] llvmpipe: support 8bit subpixel precision
- Original Message - On 11/22/2013 05:29 AM, Zack Rusin wrote: For me too, other than the fixed_position members, looks good. Thanks for your perseverance on this Zack! Thanks! ok, attached is a version that makes position and dx/dy 32bit again, it seems to work great. I have a question for you guys if you run the piglits: ./bin/triangle-rasterization-overdraw -max_size -seed 0xA8402F24 -count 1 -auto on master does it fail for you? It fails for me on master, with and without the patch. I'm not sure what to make of it, I might have been looking at rasterization for too long. Looking at the rendering it looks correct. z Looks good to me. The test you mentioned certainly fails for me too. Roland I get Got spurious window resize in automatic run (8192,8192 to 1000,1000) So I can't really run the test. I think we need a fbo mode for this test too. Jose ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 2/2] tgsi: Prevent emission of instructions with empty writemask.
On 11/22/2013 03:03 PM, Jose Fonseca wrote: Thanks for reviewing. - Original Message - On 11/21/2013 02:01 PM, jfons...@vmware.com wrote: From: José Fonseca jfons...@vmware.com These degenerate instructions can often be emitted by state trackers when the semantics of instructions don't match precisely. --- src/gallium/auxiliary/tgsi/tgsi_ureg.c | 8 src/gallium/auxiliary/tgsi/tgsi_ureg.h | 22 ++ 2 files changed, 30 insertions(+) diff --git a/src/gallium/auxiliary/tgsi/tgsi_ureg.c b/src/gallium/auxiliary/tgsi/tgsi_ureg.c index 432ed00..f06858e 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_ureg.c +++ b/src/gallium/auxiliary/tgsi/tgsi_ureg.c @@ -1113,6 +1113,10 @@ ureg_insn(struct ureg_program *ureg, boolean negate = FALSE; unsigned swizzle[4] = { 0 }; + if (nr_dst ureg_dst_is_empty(dst[0])) { + return; + } This is technically not really correct, as the instruction may have multiple destinations. Not saying we really have any such instructions (or even that we should have them), but this helper (and the one below) has code to deal with multiple destinations, so that's a bit inconsistent. The assumption there is only one dst is already there. See the code immediately below Oh right! Makes me wonder if we shouldn't get rid of multi-dst business completely. saturate = nr_dst ? dst[0].Saturate : FALSE; predicate = nr_dst ? dst[0].Predicate : FALSE; I rather commit this as is now, as the multiple destination is not a real problem ATM, while the empty writemask do seem to arise due to me earlier commit. Also, there are instructions which have just one dst reg but side effects, though it may be restricted to OPCODE_POPA (and I wouldn't mind seeing it disappear) (I think the fence/atomic instructions might be fine and at first glance I don't see anything else). POPA is not used anywhere and can be removed. I'll do that in another commit. Ok thanks. Roland Jose saturate = nr_dst ? dst[0].Saturate : FALSE; predicate = nr_dst ? dst[0].Predicate : FALSE; if (predicate) { @@ -1162,6 +1166,10 @@ ureg_tex_insn(struct ureg_program *ureg, boolean negate = FALSE; unsigned swizzle[4] = { 0 }; + if (nr_dst ureg_dst_is_empty(dst[0])) { + return; + } Same as above. saturate = nr_dst ? dst[0].Saturate : FALSE; predicate = nr_dst ? dst[0].Predicate : FALSE; if (predicate) { diff --git a/src/gallium/auxiliary/tgsi/tgsi_ureg.h b/src/gallium/auxiliary/tgsi/tgsi_ureg.h index cf0c75e..d973edb 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_ureg.h +++ b/src/gallium/auxiliary/tgsi/tgsi_ureg.h @@ -454,6 +454,16 @@ ureg_imm1i( struct ureg_program *ureg, return ureg_DECL_immediate_int( ureg, a, 1 ); } +/* Where the destination register has a valid file, but an empty + * writemask. + */ +static INLINE boolean +ureg_dst_is_empty( struct ureg_dst dst ) +{ + return dst.File != TGSI_FILE_NULL + dst.WriteMask == 0; +} + /*** * Functions for patching up labels */ @@ -650,6 +660,7 @@ static INLINE void ureg_##op( struct ureg_program *ureg,\ { \ unsigned opcode = TGSI_OPCODE_##op; \ struct ureg_emit_insn_result insn; \ + if (ureg_dst_is_empty(dst)) return; \ insn = ureg_emit_insn(ureg, \ opcode,\ dst.Saturate, \ @@ -673,6 +684,7 @@ static INLINE void ureg_##op( struct ureg_program *ureg,\ { \ unsigned opcode = TGSI_OPCODE_##op; \ struct ureg_emit_insn_result insn; \ + if (ureg_dst_is_empty(dst)) return; \ insn = ureg_emit_insn(ureg, \ opcode,\ dst.Saturate, \ @@ -697,6 +709,7 @@ static INLINE void ureg_##op( struct ureg_program *ureg,\ { \ unsigned opcode = TGSI_OPCODE_##op; \ struct ureg_emit_insn_result insn; \ + if (ureg_dst_is_empty(dst)) return; \ insn = ureg_emit_insn(ureg, \ opcode,\
Re: [Mesa-dev] [PATCH] dri3, i915, i965: Add __DRI_IMAGE_FOURCC_SARGB8888
On Fri, Nov 22, 2013 at 12:01 PM, Keith Packard kei...@keithp.com wrote: Daniel Vetter dan...@ffwll.ch writes: Hm, where do we have the canonical source for all these fourcc codes? I'm asking since we have our own copy in the kernel as drm_fourcc.h, and that one is part of the userspace ABI since we use it to pass around framebuffer formats and format lists. I think it's the kernel? I really don't know, as the whole notion of fourcc codes seems crazy to me... Feel free to steal this code and stick it in the kernel if you like. Well, I wasn't ever in favour of using fourcc codes since they're just not standardized at all, highly redundant in some cases and also miss lots of stuff we actually need (like all the rgb formats). Cc'ing the heck out of this to get kernel people to hopefully notice. Maybe someone takes charge of this ... Otherwise meh. Just afraid to create long-term maintainance madness here with the kernel's iron thou-shalt-not-break-userspace-ever rule ... Not likely we'll ever accept srgb for framebuffers though. Would suck to collide with something we do want though. Yeah, it'd suck. But given how fourcc works we probably have that already, just haven't noticed yet :( -Daniel -- Daniel Vetter Software Engineer, Intel Corporation +41 (0) 79 365 57 48 - http://blog.ffwll.ch ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 2/5] meta: fix meta clear of layered framebuffers
On 19 November 2013 20:47, Paul Berry stereotype...@gmail.com wrote: From section 4.4.7 (Layered Framebuffers) of the GLSL 3.2 spec: When the Clear or ClearBuffer* commands are used to clear a layered framebuffer attachment, all layers of the attachment are cleared. This patch fixes meta clears to properly clear all layers of a layered framebuffer attachment. We accomplish this by adding a geometry shader to the meta clear program which sets gl_Layer to a uniform value. When clearing a layered framebuffer, we execute in a loop, setting the uniform to point to each layer in turn. Cc: 10.0 mesa-sta...@lists.freedesktop.org Note: this patch seems to worsen https://bugs.freedesktop.org/show_bug.cgi?id=71870 (causes it to crash instead of producing bad rendering), so we shouldn't cherry-pick it to 10.0 until we've fixed the crash. Provided that additional problems aren't discovered, the remainder of the series should still be safe to cherry-pick to 10.0. ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [v3 6/8] mesa: Fill out ARB_texture_view entry points
Hi Courtney, The VIEW_CLASS_S3TC_* classes should be added too. See the section Dependencies on EXT_texture_compression_s3tc. Marek On Wed, Nov 20, 2013 at 12:16 AM, Courtney Goeltzenleuchter court...@lunarg.com wrote: Add Mesa TextureView logic. Incorporate feedback on ARB_texture_view Signed-off-by: Courtney Goeltzenleuchter court...@lunarg.com --- src/mesa/main/textureview.c | 562 +++- 1 file changed, 561 insertions(+), 1 deletion(-) diff --git a/src/mesa/main/textureview.c b/src/mesa/main/textureview.c index 4a6bd62..1858465 100644 --- a/src/mesa/main/textureview.c +++ b/src/mesa/main/textureview.c @@ -40,8 +40,346 @@ #include texobj.h #include texstorage.h #include textureview.h +#include stdbool.h #include mtypes.h +/* Table 3.X.2 (Compatible internal formats for TextureView) + --- +| Class | Internal formats | + --- +| VIEW_CLASS_128_BITS | RGBA32F, RGBA32UI, RGBA32I | + --- +| VIEW_CLASS_96_BITS| RGB32F, RGB32UI, RGB32I | + --- +| VIEW_CLASS_64_BITS| RGBA16F, RG32F, RGBA16UI, RG32UI, RGBA16I, | +| | RG32I, RGBA16, RGBA16_SNORM | + --- +| VIEW_CLASS_48_BITS| RGB16, RGB16_SNORM, RGB16F, RGB16UI, RGB16I | + --- +| VIEW_CLASS_32_BITS| RG16F, R11F_G11F_B10F, R32F, | +| | RGB10_A2UI, RGBA8UI, RG16UI, R32UI, | +| | RGBA8I, RG16I, R32I, RGB10_A2, RGBA8, RG16, | +| | RGBA8_SNORM, RG16_SNORM, SRGB8_ALPHA8, RGB9_E5 | + --- +| VIEW_CLASS_24_BITS| RGB8, RGB8_SNORM, SRGB8, RGB8UI, RGB8I | + --- +| VIEW_CLASS_16_BITS| R16F, RG8UI, R16UI, RG8I, R16I, RG8, R16, | +| | RG8_SNORM, R16_SNORM | + --- +| VIEW_CLASS_8_BITS | R8UI, R8I, R8, R8_SNORM | + --- +| VIEW_CLASS_RGTC1_RED | COMPRESSED_RED_RGTC1, | +| | COMPRESSED_SIGNED_RED_RGTC1 | + --- +| VIEW_CLASS_RGTC2_RG | COMPRESSED_RG_RGTC2, | +| | COMPRESSED_SIGNED_RG_RGTC2 | + --- +| VIEW_CLASS_BPTC_UNORM | COMPRESSED_RGBA_BPTC_UNORM, | +| | COMPRESSED_SRGB_ALPHA_BPTC_UNORM | + --- +| VIEW_CLASS_BPTC_FLOAT | COMPRESSED_RGB_BPTC_SIGNED_FLOAT, | +| | COMPRESSED_RGB_BPTC_UNSIGNED_FLOAT | + --- + */ +struct internal_format_class_info { + GLenum view_class; + GLenum internal_format; +}; +#define INFO(c,f) {GL_##c, GL_##f} +static const struct internal_format_class_info _compatible_internal_formats[] = { + INFO(VIEW_CLASS_128_BITS, RGBA32F), + INFO(VIEW_CLASS_128_BITS, RGBA32UI), + INFO(VIEW_CLASS_128_BITS, RGBA32I), + INFO(VIEW_CLASS_96_BITS, RGB32F), + INFO(VIEW_CLASS_96_BITS, RGB32UI), + INFO(VIEW_CLASS_96_BITS, RGB32I), + INFO(VIEW_CLASS_64_BITS, RGBA16F), + INFO(VIEW_CLASS_64_BITS, RG32F), + INFO(VIEW_CLASS_64_BITS, RGBA16UI), + INFO(VIEW_CLASS_64_BITS, RG32UI), + INFO(VIEW_CLASS_64_BITS, RGBA16I), + INFO(VIEW_CLASS_64_BITS, RG32I), + INFO(VIEW_CLASS_64_BITS, RGBA16), + INFO(VIEW_CLASS_64_BITS, RGBA16_SNORM), + INFO(VIEW_CLASS_48_BITS, RGB16), + INFO(VIEW_CLASS_48_BITS, RGB16_SNORM), + INFO(VIEW_CLASS_48_BITS, RGB16F), + INFO(VIEW_CLASS_48_BITS, RGB16UI), + INFO(VIEW_CLASS_48_BITS, RGB16I), + INFO(VIEW_CLASS_32_BITS, RG16F), + INFO(VIEW_CLASS_32_BITS, R11F_G11F_B10F), + INFO(VIEW_CLASS_32_BITS, R32F),
Re: [Mesa-dev] [PATCH] mesa: Remove the ralloc canary on release builds.
Kenneth Graunke kenn...@whitecape.org writes: On 11/22/2013 12:21 AM, Eric Anholt wrote: The canary is basically just to give a better debugging message when you ralloc_free() something that wasn't rallocated. Reduces maximum memory usage of apitrace replay of the dota2 demo by 60MB on my 64-bit system (so half that on a real 32-bit dota2 environment). Really, half? It's an unsigned...that's 4 bytes regardless of 64-bit vs. 32-bit. I think this should be 60MB of savings, end of story. Scalar types get aligned to their size, so since it's followed by a pointer, there's 4 bytes of pad in between. For anyone that hasn't seen this tool before, check out pahole from the dwarves package. Run it on a .o file you think might be sucking up a bunch of memory, and see your structs like: class fs_inst : public backend_instruction { public: /* class backend_instruction ancestor; */ /* 032 */ /* XXX last struct has 7 bytes of padding */ class fs_reg dst; /*3248 */ /* --- cacheline 1 boundary (64 bytes) was 16 bytes ago --- */ class fs_reg src[3];/*80 144 */ /* --- cacheline 3 boundary (192 bytes) was 32 bytes ago --- */ bool saturate; /* 224 1 */ /* XXX 3 bytes hole, try to pack */ intconditional_mod; /* 228 4 */ uint8_tflag_subreg; /* 232 1 */ /* XXX 3 bytes hole, try to pack */ intmlen; /* 236 4 */ intregs_written; /* 240 4 */ intbase_mrf; /* 244 4 */ uint32_t texture_offset; /* 248 4 */ intsampler; /* 252 4 */ /* --- cacheline 4 boundary (256 bytes) --- */ inttarget; /* 256 4 */ bool eot; /* 260 1 */ bool header_present; /* 261 1 */ bool shadow_compare; /* 262 1 */ bool force_uncompressed; /* 263 1 */ bool force_sechalf;/* 264 1 */ bool force_writemask_all; /* 265 1 */ ... /* size: 288, cachelines: 5, members: 21 */ /* sum members: 280, holes: 3, sum holes: 8 */ /* paddings: 1, sum paddings: 7 */ /* last cacheline: 32 bytes */ }; pgp2KcnqfB6bf.pgp Description: PGP signature ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH] dri3, i915, i965: Add __DRI_IMAGE_FOURCC_SARGB8888
On Fri, Nov 22, 2013 at 05:17:37PM +0100, Daniel Vetter wrote: On Fri, Nov 22, 2013 at 12:01 PM, Keith Packard kei...@keithp.com wrote: Daniel Vetter dan...@ffwll.ch writes: Hm, where do we have the canonical source for all these fourcc codes? I'm asking since we have our own copy in the kernel as drm_fourcc.h, and that one is part of the userspace ABI since we use it to pass around framebuffer formats and format lists. I think it's the kernel? I really don't know, as the whole notion of fourcc codes seems crazy to me... Feel free to steal this code and stick it in the kernel if you like. Well, I wasn't ever in favour of using fourcc codes since they're just not standardized at all, highly redundant in some cases and also miss lots of stuff we actually need (like all the rgb formats). I also argued against them, but some people wanted them for whatever reason. And since I didn't want to argue for several years about the subject, I just gave in and made the drm pixel formats fourcss. But given that I just pulled the fourccs out of my ass, we can't really cross use them between different subsystems anyway. So if we just consider all the different fourcc namespaces totally distinct, we're not going to have any problems. Personally I can promise that I will _not_ be checking Mesa/whatever code for conflicting fourccs when I need to add a new one to drm_fourcc.h. There, now I've given fair warning and if things explode later it won't be my fault. However if someone wants to emulate the drm fourcc style for whatever reason, there is a distinct pattern how I cooked them up. Well, a few different patterns depending whether it's RGB,YUV,packed,planar etc. Cc'ing the heck out of this to get kernel people to hopefully notice. Maybe someone takes charge of this ... Otherwise meh. Just afraid to create long-term maintainance madness here with the kernel's iron thou-shalt-not-break-userspace-ever rule ... Not likely we'll ever accept srgb for framebuffers though. Would suck to collide with something we do want though. Yeah, it'd suck. But given how fourcc works we probably have that already, just haven't noticed yet :( -Daniel -- Daniel Vetter Software Engineer, Intel Corporation +41 (0) 79 365 57 48 - http://blog.ffwll.ch ___ dri-devel mailing list dri-de...@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/dri-devel -- Ville Syrjälä Intel OTC ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 2/3] Create untiled buffers in get_back_bo when needed.
Commits should be prefixed with the name of the component they're changing. This one should be egl: Marek On Thu, Nov 7, 2013 at 5:13 PM, Axel Davy axel.d...@ens.fr wrote: We must send to the compositor buffer it is able to read. Since tiling modes are different on graphic cards, we have to disable tiling when creating the buffers if we render with a different graphic card than the compositor. Signed-off-by: Axel Davy axel.d...@ens.fr --- src/egl/drivers/dri2/platform_wayland.c | 17 +++-- 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/src/egl/drivers/dri2/platform_wayland.c b/src/egl/drivers/dri2/platform_wayland.c index 709df36..b922ff5 100644 --- a/src/egl/drivers/dri2/platform_wayland.c +++ b/src/egl/drivers/dri2/platform_wayland.c @@ -283,12 +283,17 @@ get_back_bo(struct dri2_egl_surface *dri2_surf, __DRIbuffer *buffer) if (dri2_surf-back == NULL) return -1; if (dri2_surf-back-dri_image == NULL) { - dri2_surf-back-dri_image = - dri2_dpy-image-createImage(dri2_dpy-dri_screen, - dri2_surf-base.Width, - dri2_surf-base.Height, - __DRI_IMAGE_FORMAT_ARGB, - __DRI_IMAGE_USE_SHARE, + unsigned int use_flags = __DRI_IMAGE_USE_SHARE; + + if (!dri2_dpy-enable_tiling) + use_flags |= __DRI_IMAGE_USE_LINEAR; + + dri2_surf-back-dri_image = + dri2_dpy-image-createImage(dri2_dpy-dri_screen, + dri2_surf-base.Width, + dri2_surf-base.Height, + __DRI_IMAGE_FORMAT_ARGB, + use_flags, NULL); dri2_surf-back-age = 0; } -- 1.8.1.2 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 1/2] i965: Allocate vec4_visitor's uniform_size and uniform_vector_size arrays dynamically.
Petri Latvala petri.latv...@intel.com writes: [...] @@ -325,8 +326,9 @@ public: */ dst_reg output_reg[BRW_VARYING_SLOT_COUNT]; const char *output_reg_annotation[BRW_VARYING_SLOT_COUNT]; - int uniform_size[MAX_UNIFORMS]; - int uniform_vector_size[MAX_UNIFORMS]; + unsigned uniform_param_count; + int* uniform_size; + int* uniform_vector_size; int uniforms; All the code around you uses the 'type *identifier' convention, please keep it consistent. src_reg shader_start_time; [...] @@ -556,7 +558,7 @@ brw_gs_emit(struct brw_context *brw, if (likely(!(INTEL_DEBUG DEBUG_NO_DUAL_OBJECT_GS))) { c-prog_data.dual_instanced_dispatch = false; - vec4_gs_visitor v(brw, c, prog, shader, mem_ctx, true /* no_spills */); + vec4_gs_visitor v(brw, c, prog, shader, mem_ctx, true /* no_spills */, param_count); Another possibility would be to set 'c.prog_data.base.nr_params = param_count' here and in brw_vs_emit(), so you'd have the same value available from the constructor of vec4_visitor without all the parameter changes. It would be possible to do something similar in the fs_visitor code, but it seems that it would be slightly more difficult because the fs_visitor (ab-)uses nr_params to keep track of the most recently allocated uniform index, you'd need to add a private counter variable to the fs_visitor similar to what vec4_visitor does. With the upcoming ARB_shader_image_load_store changes we might get more than 4 * MAX_UNIFORM uniform allocations in some cases because image uniforms can take up several slots for the image meta-data -- I think it would make sense to extend this change to the FS back-end too. Thanks. if (v.run()) { vec4_generator g(brw, prog, c-gp-program.Base, c-prog_data.base, mem_ctx, INTEL_DEBUG DEBUG_GS); [...] pgpt3d5HdM9K6.pgp Description: PGP signature ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] More projects for newbies
I've posted a wiki with some project suggestions for people wanting to get into Mesa development: http://wiki.freedesktop.org/dri/NewbieProjects/ These projects are a little bit larger than the one I previously posted to the mailing list (http://lists.freedesktop.org/archives/mesa-dev/2013-October/046374.html), but I've tried to be very detailed. I have two hopes for this wiki: 1. People already experienced with Mesa development will avoid these projects. :) 2. Other people will add projects with similar scope and level of detail. For example, I think someone could probably add a project for GL_ARB_buffer_storage and GL_ARB_clear_texture. ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 1/2] i965: Allocate vec4_visitor's uniform_size and uniform_vector_size arrays dynamically.
On 11/22/2013 12:09 AM, Petri Latvala wrote: Signed-off-by: Petri Latvala petri.latv...@intel.com --- src/mesa/drivers/dri/i965/brw_vec4.cpp| 5 +++-- src/mesa/drivers/dri/i965/brw_vec4.h | 14 +++--- src/mesa/drivers/dri/i965/brw_vec4_gs.c | 2 +- src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp | 12 +++- src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.h | 6 -- src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp| 7 ++- src/mesa/drivers/dri/i965/brw_vec4_vs_visitor.cpp | 6 -- src/mesa/drivers/dri/i965/brw_vs.c| 2 +- src/mesa/drivers/dri/i965/brw_vs.h| 6 -- 9 files changed, 41 insertions(+), 19 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_vec4.cpp b/src/mesa/drivers/dri/i965/brw_vec4.cpp index 73f91a0..3da1b4d 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4.cpp @@ -1558,7 +1558,8 @@ brw_vs_emit(struct brw_context *brw, struct brw_vs_compile *c, struct brw_vs_prog_data *prog_data, void *mem_ctx, -unsigned *final_assembly_size) +unsigned *final_assembly_size, +unsigned param_count) { bool start_busy = false; float start_time = 0; @@ -1585,7 +1586,7 @@ brw_vs_emit(struct brw_context *brw, } } - vec4_vs_visitor v(brw, c, prog_data, prog, shader, mem_ctx); + vec4_vs_visitor v(brw, c, prog_data, prog, shader, mem_ctx, param_count); if (!v.run()) { if (prog) { prog-LinkStatus = false; diff --git a/src/mesa/drivers/dri/i965/brw_vec4.h b/src/mesa/drivers/dri/i965/brw_vec4.h index 5cec9f9..552ca55 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4.h +++ b/src/mesa/drivers/dri/i965/brw_vec4.h @@ -231,7 +231,8 @@ public: struct brw_shader *shader, void *mem_ctx, bool debug_flag, -bool no_spills); +bool no_spills, +unsigned param_count); ~vec4_visitor(); dst_reg dst_null_f() @@ -325,8 +326,9 @@ public: */ dst_reg output_reg[BRW_VARYING_SLOT_COUNT]; const char *output_reg_annotation[BRW_VARYING_SLOT_COUNT]; - int uniform_size[MAX_UNIFORMS]; - int uniform_vector_size[MAX_UNIFORMS]; + unsigned uniform_param_count; + int* uniform_size; + int* uniform_vector_size; int uniforms; src_reg shader_start_time; @@ -546,6 +548,12 @@ private: * If true, then register allocation should fail instead of spilling. */ const bool no_spills; + + /* +* Make noncopyable. +*/ + vec4_visitor(const vec4_visitor); + vec4_visitor operator=(const vec4_visitor); I think this should be a separate patch with it's own justification. I'm also not sure it's strictly necessary. I'm not a C++ expert (and most people on the project aren't either), so bear with me a bit. The usual reason to make a class non-copyable is because the default copy-constructor will only make a shallow copy of pointer fields. This can lead to either double-frees or dereferencing freed memory. However, since we're using ralloc, and the first construction of a vec4_visitor object will out-live any possible copies, neither of these situations can occur. Is that a fair assessment? Now... we probably should do this anyway... and there are a lot of classes in the i965 back-end where this should occur. I don't know if we want to make a macro to generate this boiler-plate code or if we just want to manually add it to every class. Perhaps Ken, Paul, or Curro will have an opinion... }; diff --git a/src/mesa/drivers/dri/i965/brw_vec4_gs.c b/src/mesa/drivers/dri/i965/brw_vec4_gs.c index b52d646..b0b77d1 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_gs.c +++ b/src/mesa/drivers/dri/i965/brw_vec4_gs.c @@ -213,7 +213,7 @@ do_gs_prog(struct brw_context *brw, void *mem_ctx = ralloc_context(NULL); unsigned program_size; const unsigned *program = - brw_gs_emit(brw, prog, c, mem_ctx, program_size); + brw_gs_emit(brw, prog, c, mem_ctx, program_size, param_count); if (program == NULL) { ralloc_free(mem_ctx); return false; diff --git a/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp index adbb1cf..16c05f5 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp @@ -38,10 +38,11 @@ vec4_gs_visitor::vec4_gs_visitor(struct brw_context *brw, struct gl_shader_program *prog, struct brw_shader *shader, void *mem_ctx, - bool no_spills) + bool no_spills, + unsigned param_count) :
Re: [Mesa-dev] [PATCH 1/2] i965: Allocate vec4_visitor's uniform_size and uniform_vector_size arrays dynamically.
-BEGIN PGP SIGNED MESSAGE- Hash: SHA1 On 11/22/2013 10:05 AM, Francisco Jerez wrote: Petri Latvala petri.latv...@intel.com writes: [...] @@ -325,8 +326,9 @@ public: */ dst_reg output_reg[BRW_VARYING_SLOT_COUNT]; const char *output_reg_annotation[BRW_VARYING_SLOT_COUNT]; - int uniform_size[MAX_UNIFORMS]; - int uniform_vector_size[MAX_UNIFORMS]; + unsigned uniform_param_count; + int* uniform_size; + int* uniform_vector_size; int uniforms; All the code around you uses the 'type *identifier' convention, please keep it consistent. src_reg shader_start_time; [...] @@ -556,7 +558,7 @@ brw_gs_emit(struct brw_context *brw, if (likely(!(INTEL_DEBUG DEBUG_NO_DUAL_OBJECT_GS))) { c-prog_data.dual_instanced_dispatch = false; - vec4_gs_visitor v(brw, c, prog, shader, mem_ctx, true /* no_spills */); + vec4_gs_visitor v(brw, c, prog, shader, mem_ctx, true /* no_spills */, param_count); Another possibility would be to set 'c.prog_data.base.nr_params = param_count' here and in brw_vs_emit(), so you'd have the same value available from the constructor of vec4_visitor without all the parameter changes. It would be possible to do something similar in the fs_visitor code, but it seems that it would be slightly more difficult because the fs_visitor (ab-)uses nr_params to keep track of the most recently allocated uniform index, you'd need to add a private counter variable to the fs_visitor similar to what vec4_visitor does. With the upcoming ARB_shader_image_load_store changes we might get more than 4 * MAX_UNIFORM uniform allocations in some cases because image uniforms can take up several slots for the image meta-data -- I think it would make sense to extend this change to the FS back-end too. Do you think that should get done now or as a follow-on patch? This fixes an existing problem, and I think we want a fix that can get picked to the 10.0 branch and possibly earlier stable branches. Thanks. if (v.run()) { vec4_generator g(brw, prog, c-gp-program.Base, c-prog_data.base, mem_ctx, INTEL_DEBUG DEBUG_GS); [...] ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev -BEGIN PGP SIGNATURE- Version: GnuPG v1.4.14 (GNU/Linux) iEYEARECAAYFAlKPr+0ACgkQX1gOwKyEAw9vzQCdE0DAxuZF85y7wFtw1Ypg+ejo Ft4An3msa4mkI4HU/39n9vlXoY7LrEJ2 =sUh4 -END PGP SIGNATURE- ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 1/2] i965: Allocate vec4_visitor's uniform_size and uniform_vector_size arrays dynamically.
Ian Romanick i...@freedesktop.org writes: -BEGIN PGP SIGNED MESSAGE- Hash: SHA1 On 11/22/2013 10:05 AM, Francisco Jerez wrote: [...] With the upcoming ARB_shader_image_load_store changes we might get more than 4 * MAX_UNIFORM uniform allocations in some cases because image uniforms can take up several slots for the image meta-data -- I think it would make sense to extend this change to the FS back-end too. Do you think that should get done now or as a follow-on patch? This fixes an existing problem, and I think we want a fix that can get picked to the 10.0 branch and possibly earlier stable branches. I'm fine either way. If you'd like to keep the bugfix as simple as possible for the stable branch I can take care of the fs_visitor change myself as a separate patch. pgpzvRoVmbRBv.pgp Description: PGP signature ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 1/2] i965: Allocate vec4_visitor's uniform_size and uniform_vector_size arrays dynamically.
On 11/22/2013 11:27 AM, Ian Romanick wrote: On 11/22/2013 12:09 AM, Petri Latvala wrote: Signed-off-by: Petri Latvala petri.latv...@intel.com --- src/mesa/drivers/dri/i965/brw_vec4.cpp| 5 +++-- src/mesa/drivers/dri/i965/brw_vec4.h | 14 +++--- src/mesa/drivers/dri/i965/brw_vec4_gs.c | 2 +- src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp | 12 +++- src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.h | 6 -- src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp| 7 ++- src/mesa/drivers/dri/i965/brw_vec4_vs_visitor.cpp | 6 -- src/mesa/drivers/dri/i965/brw_vs.c| 2 +- src/mesa/drivers/dri/i965/brw_vs.h| 6 -- 9 files changed, 41 insertions(+), 19 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_vec4.cpp b/src/mesa/drivers/dri/i965/brw_vec4.cpp index 73f91a0..3da1b4d 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4.cpp @@ -1558,7 +1558,8 @@ brw_vs_emit(struct brw_context *brw, struct brw_vs_compile *c, struct brw_vs_prog_data *prog_data, void *mem_ctx, -unsigned *final_assembly_size) +unsigned *final_assembly_size, +unsigned param_count) { bool start_busy = false; float start_time = 0; @@ -1585,7 +1586,7 @@ brw_vs_emit(struct brw_context *brw, } } - vec4_vs_visitor v(brw, c, prog_data, prog, shader, mem_ctx); + vec4_vs_visitor v(brw, c, prog_data, prog, shader, mem_ctx, param_count); if (!v.run()) { if (prog) { prog-LinkStatus = false; diff --git a/src/mesa/drivers/dri/i965/brw_vec4.h b/src/mesa/drivers/dri/i965/brw_vec4.h index 5cec9f9..552ca55 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4.h +++ b/src/mesa/drivers/dri/i965/brw_vec4.h @@ -231,7 +231,8 @@ public: struct brw_shader *shader, void *mem_ctx, bool debug_flag, -bool no_spills); +bool no_spills, +unsigned param_count); ~vec4_visitor(); dst_reg dst_null_f() @@ -325,8 +326,9 @@ public: */ dst_reg output_reg[BRW_VARYING_SLOT_COUNT]; const char *output_reg_annotation[BRW_VARYING_SLOT_COUNT]; - int uniform_size[MAX_UNIFORMS]; - int uniform_vector_size[MAX_UNIFORMS]; + unsigned uniform_param_count; + int* uniform_size; + int* uniform_vector_size; int uniforms; src_reg shader_start_time; @@ -546,6 +548,12 @@ private: * If true, then register allocation should fail instead of spilling. */ const bool no_spills; + + /* +* Make noncopyable. +*/ + vec4_visitor(const vec4_visitor); + vec4_visitor operator=(const vec4_visitor); I think this should be a separate patch with it's own justification. I'm also not sure it's strictly necessary. I'm not a C++ expert (and most people on the project aren't either), so bear with me a bit. The usual reason to make a class non-copyable is because the default copy-constructor will only make a shallow copy of pointer fields. This can lead to either double-frees or dereferencing freed memory. However, since we're using ralloc, and the first construction of a vec4_visitor object will out-live any possible copies, neither of these situations can occur. Is that a fair assessment? Now... we probably should do this anyway... and there are a lot of classes in the i965 back-end where this should occur. I don't know if we want to make a macro to generate this boiler-plate code or if we just want to manually add it to every class. Perhaps Ken, Paul, or Curro will have an opinion... Or you could just be sensible and just not make copies of things where it makes no sense to do so... I would drop this hunk...it's unrelated and unnecessary. --Ken ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 1/2] i965: Allocate vec4_visitor's uniform_size and uniform_vector_size arrays dynamically.
Ian Romanick i...@freedesktop.org writes: On 11/22/2013 12:09 AM, Petri Latvala wrote: [...] @@ -546,6 +548,12 @@ private: * If true, then register allocation should fail instead of spilling. */ const bool no_spills; + + /* +* Make noncopyable. +*/ + vec4_visitor(const vec4_visitor); + vec4_visitor operator=(const vec4_visitor); I think this should be a separate patch with it's own justification. I'm also not sure it's strictly necessary. I'm not a C++ expert (and most people on the project aren't either), so bear with me a bit. The usual reason to make a class non-copyable is because the default copy-constructor will only make a shallow copy of pointer fields. This can lead to either double-frees or dereferencing freed memory. However, since we're using ralloc, and the first construction of a vec4_visitor object will out-live any possible copies, neither of these situations can occur. Is that a fair assessment? Now... we probably should do this anyway... and there are a lot of classes in the i965 back-end where this should occur. I don't know if we want to make a macro to generate this boiler-plate code or if we just want to manually add it to every class. Perhaps Ken, Paul, or Curro will have an opinion... This hunk looks perfectly fine to me (well, aside from the -placement). All singleton classes should have its copy-constructor declared private to avoid the situations you have described, and adding a member variable that needs special handling on copy construction seems like the right time to do so. I'm OK with using the private copy-constructor and assignment operator idiom explicitly as in Petri's code or with having a macro for the same purpose to save typing -- A third possibility that I find slightly more elegant would be to define a noncopyable class all singleton objects could inherit from, like: | class noncopyable { |private: | noncopyable(noncopyable ); | noncopyable operator=(const noncopyable ); | }; Because macros that declare class members and need to use access specifiers can have unexpected side-effects. Thanks. pgpU793roSig1.pgp Description: PGP signature ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 1/2] i965: Allocate vec4_visitor's uniform_size and uniform_vector_size arrays dynamically.
Kenneth Graunke kenn...@whitecape.org writes: On 11/22/2013 11:27 AM, Ian Romanick wrote: On 11/22/2013 12:09 AM, Petri Latvala wrote: [...] @@ -546,6 +548,12 @@ private: * If true, then register allocation should fail instead of spilling. */ const bool no_spills; + + /* +* Make noncopyable. +*/ + vec4_visitor(const vec4_visitor); + vec4_visitor operator=(const vec4_visitor); [...] Or you could just be sensible and just not make copies of things where it makes no sense to do so... I would drop this hunk...it's unrelated and unnecessary. It makes it less likely to make mistakes, doesn't it? E.g. in cases where you pass an argument by value where you really meant to pass it by reference. I think our code would benefit from doing this consistently, not only because it tells the compiler which classes it makes sense to copy and which ones don't, but also because it makes the semantics of the class clearer to developers. pgplF1DNM67jH.pgp Description: PGP signature ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 1/2] i965: Allocate vec4_visitor's uniform_size and uniform_vector_size arrays dynamically.
On 11/22/2013 12:23 PM, Francisco Jerez wrote: Kenneth Graunke kenn...@whitecape.org writes: On 11/22/2013 11:27 AM, Ian Romanick wrote: On 11/22/2013 12:09 AM, Petri Latvala wrote: [...] @@ -546,6 +548,12 @@ private: * If true, then register allocation should fail instead of spilling. */ const bool no_spills; + + /* +* Make noncopyable. +*/ + vec4_visitor(const vec4_visitor); + vec4_visitor operator=(const vec4_visitor); [...] Or you could just be sensible and just not make copies of things where it makes no sense to do so... I would drop this hunk...it's unrelated and unnecessary. It makes it less likely to make mistakes, doesn't it? E.g. in cases where you pass an argument by value where you really meant to pass it by reference. I think our code would benefit from doing this consistently, not only because it tells the compiler which classes it makes sense to copy and which ones don't, but also because it makes the semantics of the class clearer to developers. Ah, I see...the point is that you might accidentally do: void foo(vec4_visitor v, int bar); and end up copying the world, when you meant to pass by reference. I guess that's useful. I probably wouldn't NAK that. I was thinking you were trying to prevent things like: vec4_visitor v2(v); vec4_visitor v3 = v; which suggests a fundamental misunderstanding of the code which a compiler error is unlikely to help you overcome. At any rate, it's really a separate change, so it should be a separate patch (as Ian mentioned). Each patch should ideally only make one specific change. Smaller patches are almost always better. --Ken ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 1/2] mesa: Add gl_formats to cover all GLUser provided format/type combinations
On Fri, Nov 15, 2013 at 9:58 PM, Mark Mueller markkmuel...@gmail.com wrote: On Fri, Nov 15, 2013 at 9:52 AM, Marek Olšák mar...@gmail.com wrote: I don't understand this and I don't think this is the right way to implement hw-accelerated TexImage. Some of the formats are unsupported by all hardware I know, others just don't make any sense (e.g. RGBA5999_REV). Please check out st_choose_matching_format. This is what the function comment says: /** * Given an OpenGL user-requested format and type, and swapBytes state, * return the format which exactly matches those parameters, so that * a memcpy-based transfer can be done. * * If no format is supported, return PIPE_FORMAT_NONE. */ My patch allows for similar functionality without using Gallium PIPE_FORMATs, which aren't supported in the i965 driver. RGPA5999_REV is there because it is used by a the glean test case pixelFormats. Having hardware support for the added formats is not relevant. Why is it not relevant? If there is no hardware support, adding those formats is a waste of time. Will the formats be unpacked by shaders or what? The MESA_* formats have only one purpose: to be used as OpenGL textures and renderbuffers. Marek ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 15/18] mesa: Add ARB_viewport_array plumbing
Hi Chris, I'm using this version of the spec: http://www.opengl.org/registry/specs/ARB/viewport_array.txt On Thu, Nov 21, 2013 at 4:41 PM, Chris Forbes chr...@ijw.co.nz wrote: I was just comparing to the list in the ARB_viewport_array spec. On Fri, Nov 22, 2013 at 11:33 AM, Courtney Goeltzenleuchter court...@lunarg.com wrote: Hi Chris, Where are you getting your defines? I copied them from include/GL/gl.h #define GL_VIEWPORT 0x0BA2 /* Scissor box */ #define GL_SCISSOR_BOX 0x0C10 #define GL_SCISSOR_TEST 0x0C11 #define GL_SCISSOR_TEST 0x0C11 #define GL_DEPTH_RANGE 0x0B70 Ah, FIRST_VERTEX looks different. #define GL_FIRST_VERTEX_CONVENTION0x8E4D I'll add PROVOKING_VERTEX Looks like UNDEFINED_VERTEX was wrong as well. (include/GL/glext.h) #define GL_UNDEFINED_VERTEX 0x8260 I was modelling one of the other extension xml files and they had similar defines, though I could see no effect including or excluding them. Should I just get rid of the definitions for values that already exist in gl.h or glext.h? Courtney On Thu, Nov 21, 2013 at 1:00 PM, Chris Forbes chr...@ijw.co.nz wrote: I'm surprised the build system accepts the conflicting second definition of SCISSOR_BOX at all, actually -- that's weird. On Fri, Nov 22, 2013 at 8:55 AM, Chris Forbes chr...@ijw.co.nz wrote: I mean some of the values don't match the spec :) On Fri, Nov 22, 2013 at 7:52 AM, Courtney Goeltzenleuchter court...@lunarg.com wrote: On Wed, Nov 20, 2013 at 7:28 PM, Chris Forbes chr...@ijw.co.nzwrote: Oops -- the 8E4E is obviously correct. Artifact of me switching how I was commenting halfway through. On Thu, Nov 21, 2013 at 3:25 PM, Chris Forbes chr...@ijw.co.nz wrote: These are bogus: +enum name=SCISSOR_BOX value=0x0C10/ +enum name=VIEWPORT value=0x0BA2/ +enum name=DEPTH_RANGE value=0x0B70/ +enum name=SCISSOR_TEST value=0x0C11/ +enum name=FIRST_VERTEX_CONVENTION value=0x0C10/ In the spec I'm using I see: New Tokens Accepted by the pname parameter of GetBooleanv, GetIntegerv, GetFloatv, GetDoublev and GetInteger64v: MAX_VIEWPORTS 0x825B VIEWPORT_SUBPIXEL_BITS 0x825C VIEWPORT_BOUNDS_RANGE 0x825D LAYER_PROVOKING_VERTEX 0x825E VIEWPORT_INDEX_PROVOKING_VERTEX 0x825F Accepted by the pname parameter of GetIntegeri_v: *SCISSOR_BOX 0x0C10* Accepted by the pname parameter of GetFloati_v: *VIEWPORT0x0BA2* Accepted by the pname parameter of GetDoublei_v: *DEPTH_RANGE 0x0B70* Accepted by the pname parameter of Enablei, Disablei, and IsEnabledi: *SCISSOR_TEST0x0C11* Thus my confusion regarding bogus values. Returned in the data parameter from a Get query with a pname of LAYER_PROVOKING_VERTEX or VIEWPORT_INDEX_PROVOKING_VERTEX: FIRST_VERTEX_CONVENTION 0x8E4D LAST_VERTEX_CONVENTION 0x8E4E PROVOKING_VERTEX0x8E4F UNDEFINED_VERTEX0x8260 What do you mean by bogus? I was emulating other extension xml files. Are these not needed because they are already defined in gl_ext.h? 0x8E4D +enum name=LAST_VERTEX_CONVENTION value=0x8E4E/ 0x8E4E add: enum name=PROVOKING_VERTEX value=0x8E4F/ +enum name=UNDEFINED_VERTEX value=0x8E4F/ 0x8260 -- Courtney Goeltzenleuchter LunarG -- Courtney Goeltzenleuchter LunarG -- Courtney Goeltzenleuchter LunarG ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 1/9] gallium/dri: Support DRI Image extension version 6
+ void *loaderPrivate) +{ + __DRIimage *img; + struct gl_context *ctx = ((struct st_context *)dri_context(context))-ctx; + struct gl_texture_object *obj; + struct pipe_resource *tex; + GLuint face = 0; This part doesn't work. A dri_context can be converted to a st_context. To get the pipe_resource, we should use the st_context_iface in dri_context, and the function get_resource_for_egl_image. Unfortunately, this function isn't implemented in mesa. I've found this three years old patch, which implements it: http://marc.info/?l=mesa3d-devm=128431813411436 The get_resource_for_egl_image we need is at the end of the patch, and the implementation look correct to me. Could a state_tracker expert review it and push the part of the patch we need? + img-dri_format = driGLFormatToImageFormat(obj-_BufferObjectFormat); This part doesn't work either: it doesn't give the correct format. Getting this function right would really help to write glamor dri3 helpers Axel Davy ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 8/9] gallium/radeon: Implement hooks for DRI Image 7
+size = lseek(whandle-handle, SEEK_END, 0); +/* + * Could check errno to determine whether the kernel is new enough, but + * it doesn't really matter why this failed, just that it failed. + */ +if (size == (off_t)-1) { +FREE(bo); +goto fail; +} +lseek(whandle-handle, SEEK_SET, 0); The lseek arguments are not in the right order +args.handle = bo-handle; +r = drmCommandWriteRead(bo-rws-fd, DRM_RADEON_GEM_BUSY, args, sizeof(args)); +if (r) { +fprintf(stderr, radeon: Failed to find initial domain for imported bo\n); +radeon_bo_destroy(bo-base); +return NULL; +} the ioctl should be called again until it doesn't return -EBUSY. Axel Davy ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 1/9] gallium/dri: Support DRI Image extension version 6
A dri_context can be converted to a st_context. Sorry for the typo. It should be A dri_context can't be converted to a st_context. Axel Davy ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 1/2] glsl: Fix lowering of direct assignment in lower_clip_distance.
In commit 065da16 (glsl: Convert lower_clip_distance_visitor to be an ir_rvalue_visitor), we failed to notice that since lower_clip_distance_visitor overrides visit_leave(ir_assignment *), ir_rvalue_visitor::visit_leave(ir_assignment *) wasn't getting called. As a result, clip distance dereferences appearing directly on the right hand side of an assignment (not in a subexpression) weren't getting properly lowered. This caused an ir_dereference_variable node to be left in the IR that referred to the old gl_ClipDistance variable. However, since the lowering pass replaces gl_ClipDistance with gl_ClipDistanceMESA, this turned into a dangling pointer when the IR got reparented. Prior to the introduction of geometry shaders, this bug was unlikely to arise, because (a) reading from gl_ClipDistance[i] in the fragment shader was rare, and (b) when it happened, it was likely that it would either appear in a subexpression, or be hoisted into a subexpression by tree grafting. However, in a geometry shader, we're likely to see a statement like this, which would trigger the bug: gl_ClipDistance[i] = gl_in[j].gl_ClipDistance[i]; This patch causes lower_clip_distance_visitor::visit_leave(ir_assignment *) to call the base class visitor, so that the right hand side of the assignment is properly lowered. Fixes piglit test: - spec/glsl-1.50/execution/geometry/clip-distance-itemized-copy Cc: Ian Romanick i...@freedesktop.org Cc: 9.2 mesa-sta...@lists.freedesktop.org Cc: 10.0 mesa-sta...@lists.freedesktop.org --- src/glsl/lower_clip_distance.cpp | 5 + 1 file changed, 5 insertions(+) diff --git a/src/glsl/lower_clip_distance.cpp b/src/glsl/lower_clip_distance.cpp index 682c8fd..04fa6d4 100644 --- a/src/glsl/lower_clip_distance.cpp +++ b/src/glsl/lower_clip_distance.cpp @@ -381,6 +381,11 @@ lower_clip_distance_visitor::fix_lhs(ir_assignment *ir) ir_visitor_status lower_clip_distance_visitor::visit_leave(ir_assignment *ir) { + /* First invoke the base class visitor. This causes handle_rvalue() to be +* called on ir-rhs and ir-condition. +*/ + ir_rvalue_visitor::visit_leave(ir); + if (this-is_clip_distance_vec8(ir-lhs) || this-is_clip_distance_vec8(ir-rhs)) { /* LHS or RHS of the assignment is the entire 1D gl_ClipDistance array -- 1.8.4.2 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 2/2] glsl/linker: Validate IR just before reparenting.
If reparent_ir() is called on invalid IR, then there's a danger that it will fail to reparent all of the necessary nodes. For example, if the IR contains an ir_dereference_variable which refers to an ir_variable that's not in the tree, that ir_variable won't get reparented, resulting in subtle use-after-free bugs once the non-reparented nodes are freed. (This is exactly what happened in the bug fixed by the previous commit). This patch makes this kind of bug far easier to track down, by transforming it from a use-after-free bug into an explicit IR validation error. --- src/glsl/linker.cpp | 5 + 1 file changed, 5 insertions(+) diff --git a/src/glsl/linker.cpp b/src/glsl/linker.cpp index fac186a..1366077 100644 --- a/src/glsl/linker.cpp +++ b/src/glsl/linker.cpp @@ -2396,6 +2396,11 @@ done: if (prog-_LinkedShaders[i] == NULL) continue; + /* Do a final validation step to make sure that the IR wasn't + * invalidated by any modifications performed after intrastage linking. + */ + validate_ir_tree(prog-_LinkedShaders[i]-ir); + /* Retain any live IR, but trash the rest. */ reparent_ir(prog-_LinkedShaders[i]-ir, prog-_LinkedShaders[i]-ir); -- 1.8.4.2 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 1/2] mesa: Add gl_formats to cover all GLUser provided format/type combinations
On Fri, Nov 22, 2013 at 12:36 PM, Marek Olšák mar...@gmail.com wrote: On Fri, Nov 15, 2013 at 9:58 PM, Mark Mueller markkmuel...@gmail.com wrote: On Fri, Nov 15, 2013 at 9:52 AM, Marek Olšák mar...@gmail.com wrote: I don't understand this and I don't think this is the right way to implement hw-accelerated TexImage. Some of the formats are unsupported by all hardware I know, others just don't make any sense (e.g. RGBA5999_REV). Please check out st_choose_matching_format. This is what the function comment says: /** * Given an OpenGL user-requested format and type, and swapBytes state, * return the format which exactly matches those parameters, so that * a memcpy-based transfer can be done. * * If no format is supported, return PIPE_FORMAT_NONE. */ My patch allows for similar functionality without using Gallium PIPE_FORMATs, which aren't supported in the i965 driver. RGPA5999_REV is there because it is used by a the glean test case pixelFormats. Having hardware support for the added formats is not relevant. Why is it not relevant? If there is no hardware support, adding those formats is a waste of time. Will the formats be unpacked by shaders or what? The MESA_* formats have only one purpose: to be used as OpenGL textures and renderbuffers. Yes, that's the nature of GPU based texture uploading. The driver generates a shader and the GPU does the necessary unpacking from a cached memcpy of the application texture and stuffs it into the final mip tree in its target tiled format. In order to do that, the driver must know the original format of the texture, which can't be done from the current choices of gl_formats, thus I'm adding more. Recently I've made some local improvements so the i965 driver that can produce more information from fewer gl_formats, thus I'll be posting an updated patch soon. If there's any question of the merits, here's an example - let's say the application uploads a GL_RGB format and asks for a GL_UNSIGNED_SHORT_5_6_5 internal format. Tests on Ivy Bridge show that when i965 uses the CPU to upload a 256 x 256 texture it does so at ~28 - 77 MB/sec, whereas when i965 uses the GPU, the same texture uploads at ~2400 MB/sec - including memcpy and all. Even the most basic cases with no repacking still show a 2x - 3x improvement because the GPU inherently handles tiling much faster. In general the overhead of the memcpy is easily absorbed by the fact that the graphics pipe doesn't have to stall while the CPU does the unpacking, tiling, and repacking in the single driver thread. Mark ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 15/18] mesa: Add ARB_viewport_array plumbing
It's just that last block that were messed up -- rest was context. Sorry for any confusion. On Sat, Nov 23, 2013 at 10:06 AM, Courtney Goeltzenleuchter court...@lunarg.com wrote: Hi Chris, I'm using this version of the spec: http://www.opengl.org/registry/specs/ARB/viewport_array.txt On Thu, Nov 21, 2013 at 4:41 PM, Chris Forbes chr...@ijw.co.nz wrote: I was just comparing to the list in the ARB_viewport_array spec. On Fri, Nov 22, 2013 at 11:33 AM, Courtney Goeltzenleuchter court...@lunarg.com wrote: Hi Chris, Where are you getting your defines? I copied them from include/GL/gl.h #define GL_VIEWPORT 0x0BA2 /* Scissor box */ #define GL_SCISSOR_BOX 0x0C10 #define GL_SCISSOR_TEST 0x0C11 #define GL_SCISSOR_TEST 0x0C11 #define GL_DEPTH_RANGE 0x0B70 Ah, FIRST_VERTEX looks different. #define GL_FIRST_VERTEX_CONVENTION0x8E4D I'll add PROVOKING_VERTEX Looks like UNDEFINED_VERTEX was wrong as well. (include/GL/glext.h) #define GL_UNDEFINED_VERTEX 0x8260 I was modelling one of the other extension xml files and they had similar defines, though I could see no effect including or excluding them. Should I just get rid of the definitions for values that already exist in gl.h or glext.h? Courtney On Thu, Nov 21, 2013 at 1:00 PM, Chris Forbes chr...@ijw.co.nz wrote: I'm surprised the build system accepts the conflicting second definition of SCISSOR_BOX at all, actually -- that's weird. On Fri, Nov 22, 2013 at 8:55 AM, Chris Forbes chr...@ijw.co.nz wrote: I mean some of the values don't match the spec :) On Fri, Nov 22, 2013 at 7:52 AM, Courtney Goeltzenleuchter court...@lunarg.com wrote: On Wed, Nov 20, 2013 at 7:28 PM, Chris Forbes chr...@ijw.co.nzwrote: Oops -- the 8E4E is obviously correct. Artifact of me switching how I was commenting halfway through. On Thu, Nov 21, 2013 at 3:25 PM, Chris Forbes chr...@ijw.co.nz wrote: These are bogus: +enum name=SCISSOR_BOX value=0x0C10/ +enum name=VIEWPORT value=0x0BA2/ +enum name=DEPTH_RANGE value=0x0B70/ +enum name=SCISSOR_TEST value=0x0C11/ +enum name=FIRST_VERTEX_CONVENTION value=0x0C10/ In the spec I'm using I see: New Tokens Accepted by the pname parameter of GetBooleanv, GetIntegerv, GetFloatv, GetDoublev and GetInteger64v: MAX_VIEWPORTS 0x825B VIEWPORT_SUBPIXEL_BITS 0x825C VIEWPORT_BOUNDS_RANGE 0x825D LAYER_PROVOKING_VERTEX 0x825E VIEWPORT_INDEX_PROVOKING_VERTEX 0x825F Accepted by the pname parameter of GetIntegeri_v: *SCISSOR_BOX 0x0C10* Accepted by the pname parameter of GetFloati_v: *VIEWPORT0x0BA2* Accepted by the pname parameter of GetDoublei_v: *DEPTH_RANGE 0x0B70* Accepted by the pname parameter of Enablei, Disablei, and IsEnabledi: *SCISSOR_TEST0x0C11* Thus my confusion regarding bogus values. Returned in the data parameter from a Get query with a pname of LAYER_PROVOKING_VERTEX or VIEWPORT_INDEX_PROVOKING_VERTEX: FIRST_VERTEX_CONVENTION 0x8E4D LAST_VERTEX_CONVENTION 0x8E4E PROVOKING_VERTEX0x8E4F UNDEFINED_VERTEX0x8260 What do you mean by bogus? I was emulating other extension xml files. Are these not needed because they are already defined in gl_ext.h? 0x8E4D +enum name=LAST_VERTEX_CONVENTION value=0x8E4E/ 0x8E4E add: enum name=PROVOKING_VERTEX value=0x8E4F/ +enum name=UNDEFINED_VERTEX value=0x8E4F/ 0x8260 -- Courtney Goeltzenleuchter LunarG -- Courtney Goeltzenleuchter LunarG -- Courtney Goeltzenleuchter LunarG ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [Intel-gfx] [PATCH] dri3, i915, i965: Add __DRI_IMAGE_FOURCC_SARGB8888
On Fri, Nov 22, 2013 at 05:17:37PM +0100, Daniel Vetter wrote: On Fri, Nov 22, 2013 at 12:01 PM, Keith Packard kei...@keithp.com wrote: Daniel Vetter dan...@ffwll.ch writes: Hm, where do we have the canonical source for all these fourcc codes? I'm asking since we have our own copy in the kernel as drm_fourcc.h, and that one is part of the userspace ABI since we use it to pass around framebuffer formats and format lists. I think it's the kernel? I really don't know, as the whole notion of fourcc codes seems crazy to me... Feel free to steal this code and stick it in the kernel if you like. Well, I wasn't ever in favour of using fourcc codes since they're just not standardized at all, highly redundant in some cases and also miss lots of stuff we actually need (like all the rgb formats). These drm codes are not fourcc codes in any other way than that they're defined by creating a 32 bit value by picking four characters. I don't know what PTSD triggers people have from hearing fourcc, but it seems severe. Forget all that, these codes are DRM specific defines that are not inteded to match anything anybody else does. It doesn't matter if these match of conflict with v4l, fourcc.org, wikipedia.org or what the amiga did. They're just tokens that let us define succintly what the pixel format of a kms framebuffer is and tell the kernel. I don't know what else you'd propose? Pass an X visual in the ioctl? An EGL config? This is our name space, we can add stuff as we need (as Keith is doing here). include/uapi/drm/drm_fourcc.h is the canonical source for these values and we should add DRM_FORMAT_SARGB there to make sure we don't clash. Why are these codes in mesa (and gbm and wl_drm protocol) then? Because it turns out that once you have an stable and established namespace for pixel formats (and a kernel userspace header is about as stable and established as it gets) it makes a lot of sense to reuse those values. I already explained to Keith why we use different sets of format codes in the DRI interface, but it's always fun to slam other peoples code. Anyway, it's pretty simple, the __DRI_IMAGE_FORMAT_* defines predate the introduction of drm_fourcc.h. When we later added suport for planar YUV __DRIimages, the kernel had picked up drm_fourcc.h after a long sad bikeshedding flamewar, which included the planar formats we needed. At this point we could continue using our custom __DRI_IMAGE_FORMAT_* defines or we could switch to the tokens that we had finally converged on. But don't let me ruin a good old snide remark. Cc'ing the heck out of this to get kernel people to hopefully notice. Maybe someone takes charge of this ... Otherwise meh. I don't know what you want to change. These values are already kernel ABI, we use them in drmAddFB2, and again, I don't understand what problem you're seeing. Kristian Just afraid to create long-term maintainance madness here with the kernel's iron thou-shalt-not-break-userspace-ever rule ... Not likely we'll ever accept srgb for framebuffers though. Would suck to collide with something we do want though. Yeah, it'd suck. But given how fourcc works we probably have that already, just haven't noticed yet :( -Daniel -- Daniel Vetter Software Engineer, Intel Corporation +41 (0) 79 365 57 48 - http://blog.ffwll.ch ___ Intel-gfx mailing list intel-...@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 15/18] mesa: Add ARB_viewport_array plumbing
Got it. On Fri, Nov 22, 2013 at 2:55 PM, Chris Forbes chr...@ijw.co.nz wrote: It's just that last block that were messed up -- rest was context. Sorry for any confusion. On Sat, Nov 23, 2013 at 10:06 AM, Courtney Goeltzenleuchter court...@lunarg.com wrote: Hi Chris, I'm using this version of the spec: http://www.opengl.org/registry/specs/ARB/viewport_array.txt On Thu, Nov 21, 2013 at 4:41 PM, Chris Forbes chr...@ijw.co.nz wrote: I was just comparing to the list in the ARB_viewport_array spec. On Fri, Nov 22, 2013 at 11:33 AM, Courtney Goeltzenleuchter court...@lunarg.com wrote: Hi Chris, Where are you getting your defines? I copied them from include/GL/gl.h #define GL_VIEWPORT 0x0BA2 /* Scissor box */ #define GL_SCISSOR_BOX 0x0C10 #define GL_SCISSOR_TEST 0x0C11 #define GL_SCISSOR_TEST 0x0C11 #define GL_DEPTH_RANGE 0x0B70 Ah, FIRST_VERTEX looks different. #define GL_FIRST_VERTEX_CONVENTION0x8E4D I'll add PROVOKING_VERTEX Looks like UNDEFINED_VERTEX was wrong as well. (include/GL/glext.h) #define GL_UNDEFINED_VERTEX 0x8260 I was modelling one of the other extension xml files and they had similar defines, though I could see no effect including or excluding them. Should I just get rid of the definitions for values that already exist in gl.h or glext.h? Courtney On Thu, Nov 21, 2013 at 1:00 PM, Chris Forbes chr...@ijw.co.nz wrote: I'm surprised the build system accepts the conflicting second definition of SCISSOR_BOX at all, actually -- that's weird. On Fri, Nov 22, 2013 at 8:55 AM, Chris Forbes chr...@ijw.co.nzwrote: I mean some of the values don't match the spec :) On Fri, Nov 22, 2013 at 7:52 AM, Courtney Goeltzenleuchter court...@lunarg.com wrote: On Wed, Nov 20, 2013 at 7:28 PM, Chris Forbes chr...@ijw.co.nzwrote: Oops -- the 8E4E is obviously correct. Artifact of me switching how I was commenting halfway through. On Thu, Nov 21, 2013 at 3:25 PM, Chris Forbes chr...@ijw.co.nz wrote: These are bogus: +enum name=SCISSOR_BOX value=0x0C10/ +enum name=VIEWPORT value=0x0BA2/ +enum name=DEPTH_RANGE value=0x0B70/ +enum name=SCISSOR_TEST value=0x0C11/ +enum name=FIRST_VERTEX_CONVENTION value=0x0C10/ In the spec I'm using I see: New Tokens Accepted by the pname parameter of GetBooleanv, GetIntegerv, GetFloatv, GetDoublev and GetInteger64v: MAX_VIEWPORTS 0x825B VIEWPORT_SUBPIXEL_BITS 0x825C VIEWPORT_BOUNDS_RANGE 0x825D LAYER_PROVOKING_VERTEX 0x825E VIEWPORT_INDEX_PROVOKING_VERTEX 0x825F Accepted by the pname parameter of GetIntegeri_v: *SCISSOR_BOX 0x0C10* Accepted by the pname parameter of GetFloati_v: *VIEWPORT0x0BA2* Accepted by the pname parameter of GetDoublei_v: *DEPTH_RANGE 0x0B70* Accepted by the pname parameter of Enablei, Disablei, and IsEnabledi: *SCISSOR_TEST0x0C11* Thus my confusion regarding bogus values. Returned in the data parameter from a Get query with a pname of LAYER_PROVOKING_VERTEX or VIEWPORT_INDEX_PROVOKING_VERTEX: FIRST_VERTEX_CONVENTION 0x8E4D LAST_VERTEX_CONVENTION 0x8E4E PROVOKING_VERTEX0x8E4F UNDEFINED_VERTEX0x8260 What do you mean by bogus? I was emulating other extension xml files. Are these not needed because they are already defined in gl_ext.h? 0x8E4D +enum name=LAST_VERTEX_CONVENTION value=0x8E4E/ 0x8E4E add: enum name=PROVOKING_VERTEX value=0x8E4F/ +enum name=UNDEFINED_VERTEX value=0x8E4F/ 0x8260 -- Courtney Goeltzenleuchter LunarG -- Courtney Goeltzenleuchter LunarG -- Courtney Goeltzenleuchter LunarG -- Courtney Goeltzenleuchter LunarG ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [Intel-gfx] [PATCH] dri3, i915, i965: Add __DRI_IMAGE_FOURCC_SARGB8888
On Fri, Nov 22, 2013 at 02:12:13PM -0800, Kristian Høgsberg wrote: On Fri, Nov 22, 2013 at 05:17:37PM +0100, Daniel Vetter wrote: On Fri, Nov 22, 2013 at 12:01 PM, Keith Packard kei...@keithp.com wrote: Daniel Vetter dan...@ffwll.ch writes: Hm, where do we have the canonical source for all these fourcc codes? I'm asking since we have our own copy in the kernel as drm_fourcc.h, and that one is part of the userspace ABI since we use it to pass around framebuffer formats and format lists. I think it's the kernel? I really don't know, as the whole notion of fourcc codes seems crazy to me... Feel free to steal this code and stick it in the kernel if you like. Well, I wasn't ever in favour of using fourcc codes since they're just not standardized at all, highly redundant in some cases and also miss lots of stuff we actually need (like all the rgb formats). These drm codes are not fourcc codes in any other way than that they're defined by creating a 32 bit value by picking four characters. I don't know what PTSD triggers people have from hearing fourcc, but it seems severe. Forget all that, these codes are DRM specific defines that are not inteded to match anything anybody else does. It doesn't matter if these match of conflict with v4l, fourcc.org, wikipedia.org or what the amiga did. They're just tokens that let us define succintly what the pixel format of a kms framebuffer is and tell the kernel. I don't know what else you'd propose? Pass an X visual in the ioctl? An EGL config? This is our name space, we can add stuff as we need (as Keith is doing here). include/uapi/drm/drm_fourcc.h is the canonical source for these values and we should add DRM_FORMAT_SARGB there to make sure we don't clash. What is this format anyway? -ENODOCS If its just an srgb version of ARGB, then I wouldn't really want it in drm_fourcc.h. I expect colorspacy stuff will be handled by various crtc/plane properties in the kernel so we don't need to encode that stuff into the fb format. -- Ville Syrjälä Intel OTC ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 1/2] mesa: Add gl_formats to cover all GLUser provided format/type combinations
On Fri, Nov 22, 2013 at 10:45 PM, Mark Mueller markkmuel...@gmail.com wrote: On Fri, Nov 22, 2013 at 12:36 PM, Marek Olšák mar...@gmail.com wrote: On Fri, Nov 15, 2013 at 9:58 PM, Mark Mueller markkmuel...@gmail.com wrote: On Fri, Nov 15, 2013 at 9:52 AM, Marek Olšák mar...@gmail.com wrote: I don't understand this and I don't think this is the right way to implement hw-accelerated TexImage. Some of the formats are unsupported by all hardware I know, others just don't make any sense (e.g. RGBA5999_REV). Please check out st_choose_matching_format. This is what the function comment says: /** * Given an OpenGL user-requested format and type, and swapBytes state, * return the format which exactly matches those parameters, so that * a memcpy-based transfer can be done. * * If no format is supported, return PIPE_FORMAT_NONE. */ My patch allows for similar functionality without using Gallium PIPE_FORMATs, which aren't supported in the i965 driver. RGPA5999_REV is there because it is used by a the glean test case pixelFormats. Having hardware support for the added formats is not relevant. Why is it not relevant? If there is no hardware support, adding those formats is a waste of time. Will the formats be unpacked by shaders or what? The MESA_* formats have only one purpose: to be used as OpenGL textures and renderbuffers. Yes, that's the nature of GPU based texture uploading. The driver generates a shader and the GPU does the necessary unpacking from a cached memcpy of the application texture and stuffs it into the final mip tree in its target tiled format. In order to do that, the driver must know the original format of the texture, which can't be done from the current choices of gl_formats, This is not true. Drivers have access to the parameters of TexImage and are allowed to do with them whatever they want, so they do know the exact format of user data. I don't see why you want new texture formats while all you need is a shader that can read raw bytes and do the unpacking. If you used a blit instead of a shader and your hardware had fixed-function circuitry to read the new formats, that would be an entirely different story. If I implemented shader-based unpacking, I would probably use a texture buffer object and common formats like R8UI, R16UI, R32UI, same for RG, RGB, and RGBA and also the signed variants, but not much else. thus I'm adding more. Recently I've made some local improvements so the i965 driver that can produce more information from fewer gl_formats, thus I'll be posting an updated patch soon. If there's any question of the merits, here's an example - let's say the application uploads a GL_RGB format and asks for a GL_UNSIGNED_SHORT_5_6_5 internal format. Tests on Ivy Bridge show that when i965 uses the CPU to upload a 256 x 256 texture it does so at ~28 - 77 MB/sec, whereas when i965 uses the GPU, the same texture uploads at ~2400 MB/sec - including memcpy and all. Even the most basic cases with no repacking still show a 2x - 3x improvement because the GPU inherently handles tiling much faster. In general the overhead of the memcpy is easily absorbed by the fact that the graphics pipe doesn't have to stall while the CPU does the unpacking, tiling, and repacking in the single driver thread. I understand this very well. I did the same thing for Gallium. Also, avoiding CPU-GPU stalls has nothing to do with packing and unpacking the pixels. It's a completely independent issue. Marek ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] Spec interpretation question: layered framebuffers with mismatched layer counts
The ARB_geometry_shader4 spec says, in the list of conditions necessary for framebuffer completeness: * If any framebuffer attachment is layered, all attachments must have the same layer count. For three-dimensional textures, the layer count is the depth of the attached volume. For cube map textures, the layer count is always six. For one- and two-dimensional array textures, the layer count is simply the number of layers in the array texture. { FRAMEBUFFER_INCOMPLETE_LAYER_COUNT_ARB } When geometry shaders were adopted into desktop GL, this condition was dropped. The constant FRAMEBUFFER_INCOMPLETE_LAYER_COUNT doesn't appear at all in the desktop GL spec. Instead, GL 3.2 says (in section 4.4.7 (Layered Framebuffers)): When fragments are written to a layered framebuffer, the fragment’s layer number selects an image from the array of images at each attachment point to use for the stencil test (see section 4.1.5), depth buffer test (see section 4.1.6), and for blending and color buffer writes (see section 4.1.8). If the fragment’s layer number is negative, or greater than the minimum number of layers of any attachment, the effects of the fragment on the framebuffer contents are undefined. (note: presumably where the spec says greater, greater than equal is actually intended). In other words, a framebuffer is allowed to have layers with mismatched layer counts, but if so then the client may only reliably render to layer numbers that are present in all attachments. Mesa currently implements the rule in ARB_geometry_shader4. Technically, this is a bug, since Mesa is trying to implement geometry shaders as specified in GL 3.2, not as specified in ARB_geometry_shader4. However, there are two mitigating factors: 1. If we follow the GL 3.2 spec, then it's not clear what should happen if the client calls glClear() on a framebuffer with mismatched layer counts. For example, if I have a color attachment with 4 layers and a color attachment with 2 layers, should all 4 layers of the color attachment with 4 layers be cleared, or just the first 2? Or is it undefined? If we're required to clear all 4 layers, that's going to make the Meta implementation of glClear() a lot more clumsy. 2. The Nvidia proprietary drivers for Linux (version 313.18) follows the ARB_geometry_shader4 rule (returning FRAMEBUFFER_INCOMPLETE_LAYER_COUNT_ARB from glCheckFramebufferStatus()), even when an OpenGL 3.2+ context is used. Currently, I'm inclined to leave Mesa as is, and file a spec bug with Khronos encouraging them to adopt the ARB_geometry_shader4 text into OpenGL. I'm curious to hear other people's opinions. Thanks, Paul ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 0/3] Enable GL 3.2 support for i965, bump Mesa version.
Hi, I believe geometry shaders are all we would have to implement for Sandy Bridge. Unfortunately, geometry shaders work pretty differently on Sandy Bridge, so getting them to work won't be a slam dunk. Here's roughly what would have to be done: This is too sad - I had the impression mesa-10 would bring opengl-3.3 to my SNB laptop I bought last year. I usually don't play computer games, but recently bought two games refusing to start (dear esther and oil rush) because both require ogl = 3.2. Regards, Clemens ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] Spec interpretation question: layered framebuffers with mismatched layer counts
The number of layers can be specified for each framebuffer attachment independently in Gallium, Direct3D, and also our hardware. OpenGL seems to be the only weirdo here. ;) I think our hardware handles writes to non-existent layers independently of other attachments. It either clamps the layer index or throws the fragments going to non-existent layers away. I would silently allow mismatched layer counts and pass them to the hardware. It must be able to deal with it in its own way, otherwise it wouldn't support it in the first place. Marek On Sat, Nov 23, 2013 at 12:08 AM, Paul Berry stereotype...@gmail.com wrote: The ARB_geometry_shader4 spec says, in the list of conditions necessary for framebuffer completeness: * If any framebuffer attachment is layered, all attachments must have the same layer count. For three-dimensional textures, the layer count is the depth of the attached volume. For cube map textures, the layer count is always six. For one- and two-dimensional array textures, the layer count is simply the number of layers in the array texture. { FRAMEBUFFER_INCOMPLETE_LAYER_COUNT_ARB } When geometry shaders were adopted into desktop GL, this condition was dropped. The constant FRAMEBUFFER_INCOMPLETE_LAYER_COUNT doesn't appear at all in the desktop GL spec. Instead, GL 3.2 says (in section 4.4.7 (Layered Framebuffers)): When fragments are written to a layered framebuffer, the fragment’s layer number selects an image from the array of images at each attachment point to use for the stencil test (see section 4.1.5), depth buffer test (see section 4.1.6), and for blending and color buffer writes (see section 4.1.8). If the fragment’s layer number is negative, or greater than the minimum number of layers of any attachment, the effects of the fragment on the framebuffer contents are undefined. (note: presumably where the spec says greater, greater than equal is actually intended). In other words, a framebuffer is allowed to have layers with mismatched layer counts, but if so then the client may only reliably render to layer numbers that are present in all attachments. Mesa currently implements the rule in ARB_geometry_shader4. Technically, this is a bug, since Mesa is trying to implement geometry shaders as specified in GL 3.2, not as specified in ARB_geometry_shader4. However, there are two mitigating factors: 1. If we follow the GL 3.2 spec, then it's not clear what should happen if the client calls glClear() on a framebuffer with mismatched layer counts. For example, if I have a color attachment with 4 layers and a color attachment with 2 layers, should all 4 layers of the color attachment with 4 layers be cleared, or just the first 2? Or is it undefined? If we're required to clear all 4 layers, that's going to make the Meta implementation of glClear() a lot more clumsy. 2. The Nvidia proprietary drivers for Linux (version 313.18) follows the ARB_geometry_shader4 rule (returning FRAMEBUFFER_INCOMPLETE_LAYER_COUNT_ARB from glCheckFramebufferStatus()), even when an OpenGL 3.2+ context is used. Currently, I'm inclined to leave Mesa as is, and file a spec bug with Khronos encouraging them to adopt the ARB_geometry_shader4 text into OpenGL. I'm curious to hear other people's opinions. Thanks, Paul ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH] R600: Expand vector FABS
From: Tom Stellard thomas.stell...@amd.com NOTE: This is a candidate for the 3.4 branch. --- lib/Target/R600/AMDGPUISelLowering.cpp | 1 + test/CodeGen/R600/fabs.ll | 36 -- 2 files changed, 35 insertions(+), 2 deletions(-) diff --git a/lib/Target/R600/AMDGPUISelLowering.cpp b/lib/Target/R600/AMDGPUISelLowering.cpp index f2a6aab..c4d75ff 100644 --- a/lib/Target/R600/AMDGPUISelLowering.cpp +++ b/lib/Target/R600/AMDGPUISelLowering.cpp @@ -179,6 +179,7 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(TargetMachine TM) : for (unsigned int x = 0; x NumFloatTypes; ++x) { MVT::SimpleValueType VT = FloatTypes[x]; +setOperationAction(ISD::FABS, VT, Expand); setOperationAction(ISD::FADD, VT, Expand); setOperationAction(ISD::FDIV, VT, Expand); setOperationAction(ISD::FFLOOR, VT, Expand); diff --git a/test/CodeGen/R600/fabs.ll b/test/CodeGen/R600/fabs.ll index 23ab468..346e4e9 100644 --- a/test/CodeGen/R600/fabs.ll +++ b/test/CodeGen/R600/fabs.ll @@ -5,10 +5,10 @@ ; (fabs (f32 bitcast (i32 a))) = (f32 bitcast (and (i32 a), 0x7FFF)) ; unless isFabsFree returns true -; R600-CHECK: @fabs_free +; R600-CHECK-LABEL: @fabs_free ; R600-CHECK-NOT: AND ; R600-CHECK: |PV.{{[XYZW]}}| -; SI-CHECK: @fabs_free +; SI-CHECK-LABEL: @fabs_free ; SI-CHECK: V_ADD_F32_e64 v{{[0-9]}}, s{{[0-9]}}, 0, 1, 0, 0, 0 define void @fabs_free(float addrspace(1)* %out, i32 %in) { @@ -19,4 +19,36 @@ entry: ret void } +; R600-CHECK-LABEL: @fabs_v2 +; R600-CHECK: |{{(PV|T[0-9])\.[XYZW]}}| +; R600-CHECK: |{{(PV|T[0-9])\.[XYZW]}}| +; SI-CHECK-LABEL: @fabs_v2 +; SI-CHECK: V_ADD_F32_e64 VGPR{{[0-9]}}, SGPR{{[0-9]}}, 0, 1, 0, 0, 0 +; SI-CHECK: V_ADD_F32_e64 VGPR{{[0-9]}}, SGPR{{[0-9]}}, 0, 1, 0, 0, 0 +define void @fabs_v2(2 x float addrspace(1)* %out, 2 x float %in) { +entry: + %0 = call 2 x float @llvm.fabs.v2f32(2 x float %in) + store 2 x float %0, 2 x float addrspace(1)* %out + ret void +} + +; R600-CHECK-LABEL: @fabs_v4 +; R600-CHECK: |{{(PV|T[0-9])\.[XYZW]}}| +; R600-CHECK: |{{(PV|T[0-9])\.[XYZW]}}| +; R600-CHECK: |{{(PV|T[0-9])\.[XYZW]}}| +; R600-CHECK: |{{(PV|T[0-9])\.[XYZW]}}| +; SI-CHECK-LABEL: @fabs_v4 +; SI-CHECK: V_ADD_F32_e64 VGPR{{[0-9]}}, SGPR{{[0-9]}}, 0, 1, 0, 0, 0 +; SI-CHECK: V_ADD_F32_e64 VGPR{{[0-9]}}, SGPR{{[0-9]}}, 0, 1, 0, 0, 0 +; SI-CHECK: V_ADD_F32_e64 VGPR{{[0-9]}}, SGPR{{[0-9]}}, 0, 1, 0, 0, 0 +; SI-CHECK: V_ADD_F32_e64 VGPR{{[0-9]}}, SGPR{{[0-9]}}, 0, 1, 0, 0, 0 +define void @fabs_v4(4 x float addrspace(1)* %out, 4 x float %in) { +entry: + %0 = call 4 x float @llvm.fabs.v4f32(4 x float %in) + store 4 x float %0, 4 x float addrspace(1)* %out + ret void +} + declare float @fabs(float ) readnone +declare 2 x float @llvm.fabs.v2f32(2 x float ) readnone +declare 4 x float @llvm.fabs.v4f32(4 x float ) readnone -- 1.8.1.4 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [Intel-gfx] [PATCH] dri3, i915, i965: Add __DRI_IMAGE_FOURCC_SARGB8888
Kristian Høgsberg hoegsb...@gmail.com writes: I already explained to Keith why we use different sets of format codes in the DRI interface, but it's always fun to slam other peoples code. As we discussed, my complaint isn't so much about __DRI_IMAGE_FOURCC, but the fact that the __DRIimage interfaces use *both* __DRI_IMAGE_FOURCC and __DRI_IMAGE_FORMAT at different times. Ok, here's a fine thing we can actually fix -- the pattern that mesa uses all over the place in converting formats looks like this (not to pick on anyone, it's repeated everywhere, this is just the first one I found in gbm_dri.c): static uint32_t gbm_dri_to_gbm_format(uint32_t dri_format) { uint32_t ret = 0; switch (dri_format) { case __DRI_IMAGE_FORMAT_RGB565: ret = GBM_FORMAT_RGB565; break; case __DRI_IMAGE_FORMAT_XRGB: ret = GBM_FORMAT_XRGB; break; case __DRI_IMAGE_FORMAT_ARGB: ret = GBM_FORMAT_ARGB; break; case __DRI_IMAGE_FORMAT_ABGR: ret = GBM_FORMAT_ABGR; break; default: ret = 0; break; } return ret; } The problem here is that any unknown incoming formats get translated to garbage (0) outgoing. With fourcc codes, there is the slight advantage that 0 is never a legal value, but it sure would be nice to print a warning or even abort if you get a format code you don't understand as there's no way 0 is ever going to do what you want. Anyone have a preference? Abort? Print an error? Silently continue to do the wrong thing? -- keith.pack...@intel.com pgpmBS_oO7m0p.pgp Description: PGP signature ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [Intel-gfx] [PATCH] dri3, i915, i965: Add __DRI_IMAGE_FOURCC_SARGB8888
Ville Syrjälä ville.syrj...@linux.intel.com writes: What is this format anyway? -ENODOCS Same as MESA_FORMAT_SARGB8 and __DRI_IMAGE_FORMAT_SARGB8 :-) If its just an srgb version of ARGB, then I wouldn't really want it in drm_fourcc.h. I expect colorspacy stuff will be handled by various crtc/plane properties in the kernel so we don't need to encode that stuff into the fb format. It's not any different from splitting YUV codes from RGB codes; a different color encoding with the same bitfields. And, for mesa, it's necessary to differentiate between ARGB and SARGB within the same format code given how the API is structured. So, we have choices: 1) Let Mesa define it's own fourcc codes and risk future accidental collisions 2) Rewrite piles of mesa code to split out the color encoding from the bitfield information and pass it separately. 3) Add reasonable format codes like this to the kernel fourcc list. -- keith.pack...@intel.com pgpyYJXhOW5b4.pgp Description: PGP signature ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] Spec interpretation question: layered framebuffers with mismatched layer counts
It is however illegal in d3d10 to have multiple render targets with different dimensions (including array size): http://msdn.microsoft.com/en-us/library/windows/desktop/bb205131%28v=vs.85%29.aspx I don't know what happens if you try to bind such rendertargets. FWIW when I implemented layered rendering in llvmpipe, I simply clamped rendering to the minimum max layer of all attachments. My reading of the GL spec was that just about anything is allowed to happen if you write to such a layer, so clamping to the minimum max layer (and thus write to the wrong layer even for adjustments which do have that layer) looked alright to me. It would be quite unpractical if that weren't allowed (well other options such as dropping rendering completely on the floor would be doable too, but something like writing the correct layers for attachments which have them and drop rendering or clamp layer for the others would be HIGHLY unpractical). Roland Am 23.11.2013 00:24, schrieb Marek Olšák: The number of layers can be specified for each framebuffer attachment independently in Gallium, Direct3D, and also our hardware. OpenGL seems to be the only weirdo here. ;) I think our hardware handles writes to non-existent layers independently of other attachments. It either clamps the layer index or throws the fragments going to non-existent layers away. I would silently allow mismatched layer counts and pass them to the hardware. It must be able to deal with it in its own way, otherwise it wouldn't support it in the first place. Marek On Sat, Nov 23, 2013 at 12:08 AM, Paul Berry stereotype...@gmail.com wrote: The ARB_geometry_shader4 spec says, in the list of conditions necessary for framebuffer completeness: * If any framebuffer attachment is layered, all attachments must have the same layer count. For three-dimensional textures, the layer count is the depth of the attached volume. For cube map textures, the layer count is always six. For one- and two-dimensional array textures, the layer count is simply the number of layers in the array texture. { FRAMEBUFFER_INCOMPLETE_LAYER_COUNT_ARB } When geometry shaders were adopted into desktop GL, this condition was dropped. The constant FRAMEBUFFER_INCOMPLETE_LAYER_COUNT doesn't appear at all in the desktop GL spec. Instead, GL 3.2 says (in section 4.4.7 (Layered Framebuffers)): When fragments are written to a layered framebuffer, the fragment’s layer number selects an image from the array of images at each attachment point to use for the stencil test (see section 4.1.5), depth buffer test (see section 4.1.6), and for blending and color buffer writes (see section 4.1.8). If the fragment’s layer number is negative, or greater than the minimum number of layers of any attachment, the effects of the fragment on the framebuffer contents are undefined. (note: presumably where the spec says greater, greater than equal is actually intended). In other words, a framebuffer is allowed to have layers with mismatched layer counts, but if so then the client may only reliably render to layer numbers that are present in all attachments. Mesa currently implements the rule in ARB_geometry_shader4. Technically, this is a bug, since Mesa is trying to implement geometry shaders as specified in GL 3.2, not as specified in ARB_geometry_shader4. However, there are two mitigating factors: 1. If we follow the GL 3.2 spec, then it's not clear what should happen if the client calls glClear() on a framebuffer with mismatched layer counts. For example, if I have a color attachment with 4 layers and a color attachment with 2 layers, should all 4 layers of the color attachment with 4 layers be cleared, or just the first 2? Or is it undefined? If we're required to clear all 4 layers, that's going to make the Meta implementation of glClear() a lot more clumsy. 2. The Nvidia proprietary drivers for Linux (version 313.18) follows the ARB_geometry_shader4 rule (returning FRAMEBUFFER_INCOMPLETE_LAYER_COUNT_ARB from glCheckFramebufferStatus()), even when an OpenGL 3.2+ context is used. Currently, I'm inclined to leave Mesa as is, and file a spec bug with Khronos encouraging them to adopt the ARB_geometry_shader4 text into OpenGL. I'm curious to hear other people's opinions. Thanks, Paul ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 0/3] Enable GL 3.2 support for i965, bump Mesa version.
On 11/22/2013 03:15 PM, Clemens Eisserer wrote: Hi, I believe geometry shaders are all we would have to implement for Sandy Bridge. Unfortunately, geometry shaders work pretty differently on Sandy Bridge, so getting them to work won't be a slam dunk. Here's roughly what would have to be done: This is too sad - I had the impression mesa-10 would bring opengl-3.3 to my SNB laptop I bought last year. I usually don't play computer games, but recently bought two games refusing to start (dear esther and oil rush) because both require ogl = 3.2. Regards, Clemens Many applications require OpenGL 3.2 since it's the lowest 3.x version supported by Mac OS X. But they don't actually need the new features. For such applications, you can try setting some environment variables: export MESA_GL_VERSION_OVERRIDE=3.2 export MESA_GLSL_VERSION_OVERRIDE=150 These make the driver claim OpenGL 3.2 and GLSL 1.50 support, even though it's incomplete. OilRush ought to work fine with those set. I haven't tried Dear Esther since it doesn't appear that there's a native Linux client in Steam yet. But you could give it a try. --Ken ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH] Fix --enable-XX-bit flags by moving LT_INIT where it should
Moving LT_INIT after setting completely (AM_)C(XX)FLAGS and LDFLAGS. LT_INIT needs them as they are expected to be used all along the compilation when the macro runs its tests to determine among other things the host type. For info, see http://www.gnu.org/software/libtool/manual/html_node/LT_005fINIT.html Fixes https://bugs.freedesktop.org/show_bug.cgi?id=50754 Signed-off-by: Alexandre Demers alexandre.f.dem...@gmail.com Tested-by: Tapani Palli lem...@gmail.com --- configure.ac | 11 --- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/configure.ac b/configure.ac index fb16338..d41595d 100644 --- a/configure.ac +++ b/configure.ac @@ -51,9 +51,6 @@ AX_PYTHON_MODULE([libxml2], [needed]) AC_PROG_SED AC_PROG_MKDIR_P -LT_PREREQ([2.2]) -LT_INIT([disable-static]) - AX_PROG_BISON([], AS_IF([test ! -f $srcdir/src/glsl/glcpp/glcpp-parse.c], [AC_MSG_ERROR([bison not found - unable to compile glcpp-parse.y])])) @@ -1956,6 +1953,14 @@ dnl Add user CFLAGS and CXXFLAGS CFLAGS=$CFLAGS $USER_CFLAGS CXXFLAGS=$CXXFLAGS $USER_CXXFLAGS +dnl +dnl LT_INIT adds tests to determine host based on some variables like (AM_)C(XX)FLAGS and (AM_)LDFLAGS. +dnl They need to be set before calling LT_INIT so the macro can configure things correctly when cross_compiling. +dnl This will allow --enable-xx-bit to work as expected. +dnl +LT_PREREQ([2.2]) +LT_INIT([disable-static]) + dnl Substitute the config AC_CONFIG_FILES([Makefile src/Makefile -- 1.8.4.2 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [Intel-gfx] [PATCH] dri3, i915, i965: Add __DRI_IMAGE_FOURCC_SARGB8888
On Fri, Nov 22, 2013 at 03:43:13PM -0800, Keith Packard wrote: Ville Syrjälä ville.syrj...@linux.intel.com writes: What is this format anyway? -ENODOCS Same as MESA_FORMAT_SARGB8 and __DRI_IMAGE_FORMAT_SARGB8 :-) If its just an srgb version of ARGB, then I wouldn't really want it in drm_fourcc.h. I expect colorspacy stuff will be handled by various crtc/plane properties in the kernel so we don't need to encode that stuff into the fb format. It's not any different from splitting YUV codes from RGB codes; Not really. Saying something is YUV (or rather Y'CbCr) doesn't actually tell you the color space. It just tells you whether the information is encoded as R+G+B or Y+Cb+Cr. How you convert between them is another matter. You need to know the gamma, color primaries, chroma siting for sub-sampled YCbCr formats, etc. -- Ville Syrjälä Intel OTC ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 2/2] i965/gs: Fix gl_PrimitiveIDIn when using SW primitive restart.
Paul Berry stereotype...@gmail.com writes: Ivy Bridge hardware doesn't support primitve restart under all circumstances--when it doesn't, we emulate it in software by splitting up each logical draw operation into multiple 3DPRIMITIVE commands. This causes the hardware's primitive ID counter to be reset to 0, producing incorrect values for gl_PrimitiveIDIn. To work around that problem, we create a hidden uniform which is added to the hardware's primitive ID counter at the top of the geometry shader; the software primitive restart mechanism ensures that this uniform always contains the number of primitives that were sent down the pipeline by previous 3DPRIMITIVE commands within the same logical draw operation. To reduce the performance impact of the workaround, we only compile it into the shader when the hardware is Ivy Bridge and the shader accesses gl_PrimitiveIDIn. To reduce unnecessary state updates, we only flag _NEW_PROGRAM_CONSTANTS when when the workaround is compiled in *and* software primitive restart is active. On Ivy Bridge, fixes all variants of glsl-1.50-geometry-primitive-id-restart except the GL_LINE_LOOP variants, which are broken due to an unrelated hardware limitation. diff --git a/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp index 96636e8..50feb89 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp @@ -116,9 +116,39 @@ vec4_gs_visitor::setup_payload() } +/** + * When using software primitive restart, Ivy Bridge resets the primitive ID + * on each restart. Work around this by creating a hidden uniform whose value + * will be added to the incoming primitive ID. + */ +void +vec4_gs_visitor::primitive_id_workaround() +{ + /* Set up the uniform */ + this-uniform_vector_size[this-uniforms] = 1; + dst_reg primitive_id_offset(UNIFORM, this-uniforms); Or src_reg primitive_id_offset(UNIFORM, this-uniforms, glsl_type::uint_type); That'll get you a reasonable swizzle that won't be pulling in other weird stuff when the uniform gets repacked. + primitive_id_offset.type = BRW_REGISTER_TYPE_UD; + for (int i = 0; i 4; i++) { + prog_data-param[this-uniforms * 4 + i] = + reinterpret_castconst float *(brw-prim_restart.sw_prim_counter); + } In other cases param components beyond uniform_vector_size have had a pointer to a static zero, but I don't see a harm from doing this, either. (I would have used a normal C cast, but oh well) + this-uniforms++; + + /* Emit code to add the uniform to primitive ID */ + this-current_annotation = primitive ID workaround; + dst_reg dst(ATTR, VARYING_SLOT_PRIMITIVE_ID); + dst.type = BRW_REGISTER_TYPE_UD; + emit(ADD(dst, src_reg(dst), src_reg(primitive_id_offset))); Note: This overwrites .yzw of g1, but it looks like those are delivered as zero, and I suspect they're not used afterwards. You may want to use dst(ATTR, VARYING_SLOT_PRIMITIVE_ID, glsl_type::uint_type, WRITEMASK_X) for sanity. + this-current_annotation = NULL; +} I don't think any of this feedback has an actual runtime effect. Either way, these two patches are: Reviewed-by: Eric Anholt e...@anholt.net pgpoEszrmNyb8.pgp Description: PGP signature ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 1/2] mesa: Add gl_formats to cover all GLUser provided format/type combinations
On Fri, Nov 22, 2013 at 3:06 PM, Marek Olšák mar...@gmail.com wrote: On Fri, Nov 22, 2013 at 10:45 PM, Mark Mueller markkmuel...@gmail.com wrote: On Fri, Nov 22, 2013 at 12:36 PM, Marek Olšák mar...@gmail.com wrote: On Fri, Nov 15, 2013 at 9:58 PM, Mark Mueller markkmuel...@gmail.com wrote: On Fri, Nov 15, 2013 at 9:52 AM, Marek Olšák mar...@gmail.com wrote: I don't understand this and I don't think this is the right way to implement hw-accelerated TexImage. Some of the formats are unsupported by all hardware I know, others just don't make any sense (e.g. RGBA5999_REV). Please check out st_choose_matching_format. This is what the function comment says: /** * Given an OpenGL user-requested format and type, and swapBytes state, * return the format which exactly matches those parameters, so that * a memcpy-based transfer can be done. * * If no format is supported, return PIPE_FORMAT_NONE. */ My patch allows for similar functionality without using Gallium PIPE_FORMATs, which aren't supported in the i965 driver. RGPA5999_REV is there because it is used by a the glean test case pixelFormats. Having hardware support for the added formats is not relevant. Why is it not relevant? If there is no hardware support, adding those formats is a waste of time. Will the formats be unpacked by shaders or what? The MESA_* formats have only one purpose: to be used as OpenGL textures and renderbuffers. Yes, that's the nature of GPU based texture uploading. The driver generates a shader and the GPU does the necessary unpacking from a cached memcpy of the application texture and stuffs it into the final mip tree in its target tiled format. In order to do that, the driver must know the original format of the texture, which can't be done from the current choices of gl_formats, This is not true. Drivers have access to the parameters of TexImage and are allowed to do with them whatever they want, so they do know the exact format of user data. I don't see why you want new texture formats while all you need is a shader that can read raw bytes and do the unpacking. If you used a blit instead of a shader and your hardware had fixed-function circuitry to read the new formats, that would be an entirely different story. If I implemented shader-based unpacking, I would probably use a texture buffer object and common formats like R8UI, R16UI, R32UI, same for RG, RGB, and RGBA and also the signed variants, but not much else. How about let's forget the whole concept of GPU loading for a moment as that is only clouding the issue. As you said: the mesa_* formats have one purpose: to be used as OpenGL textures When I read the OGL spec for glTexImage2D it lists GL_BGR as a format. Yet looking at mesa_* formats there is only one BGR format represented: MESA_FORMAT_BGR888. There are 8 other valid OGL BGR textures that don't have MESA_FORMATs while all of the RGBs ones _are_ all there? ie. these are the OGL BGR formats that are missing: MESA_FORMAT_BGR_INT8, MESA_FORMAT_BGR_UINT8, MESA_FORMAT_BGR_INT16, MESA_FORMAT_BGR_UINT16, MESA_FORMAT_BGR_FLOAT16, MESA_FORMAT_BGR_INT32, MESA_FORMAT_BGR_UINT32, MESA_FORMAT_BGR_FLOAT32 There are also RGB, RGBA, and BGRA formats missing. Completeness seems like justification enough for their inclusion: /* Red Green Blue */ MESA_FORMAT_RGB233_REV, MESA_FORMAT_RGB10_REV, /* Red Green Blue Alpha */ MESA_FORMAT_RGBA1010102, MESA_FORMAT_RGBA2101010_REV, MESA_FORMAT_RGBA5999_REV, MESA_FORMAT_RGBA, MESA_FORMAT_RGBA_REV, MESA_FORMAT_RGBA1555_REV, /* Blue Green Red Alpha */ MESA_FORMAT_BGRA_INT8, MESA_FORMAT_BGRA_INT8_REV, MESA_FORMAT_BGRA_UINT8, MESA_FORMAT_BGRA_INT16, MESA_FORMAT_BGRA_UINT16, MESA_FORMAT_BGRA_FLOAT16, MESA_FORMAT_BGRA_INT32, MESA_FORMAT_BGRA_UINT32, MESA_FORMAT_BGRA_FLOAT32, MESA_FORMAT_BGRA, MESA_FORMAT_BGRA_REV, MESA_FORMAT_BGRA5551, MESA_FORMAT_BGRA1555_REV, MESA_FORMAT_BGRA1010102, MESA_FORMAT_BGRA2101010_REV, MESA_FORMAT_BGRA5999_REV, Mark ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 0/3] Implement DRI_PRIME support for Wayland
On 11/22/2013 01:16 AM, Kristian Høgsberg wrote: I'm not sold on the nested compositor for this use case. It introduces another context switch between the game and the main compositor and more input and output lag. Instead I think we need to consider whether we want a new __DRIimage entry point like: (*blitImage)(__DRIcontext *ctx, __DRIimage *src, __DRIimage *ctx) and then do the copy in platform_wayland.c when we have non-matching tile-formats. Kristian Thanks for the comments. There are advantages to both possibilities: using a nested compositor or doing the copy inside Mesa. I imagine doing a blit could be the default, and rendering directly to the linear buffer could be an option set by an env var, or driconf. I'm deeply convinced we should allow to render to the linear buffer without copy, and the nested compositor use-case makes sense. For the blit, a function (*blitImage)(__DRIcontext *ctx, __DRIimage *src, __DRIimage *dst) makes sense, but we would need another function (not related specifically to Prime): (*throttle) (__DRIcontext *ctx) Because rendering something heavy on non-intel card (intel cards to throttling automatically) cause input lag (and It is solved by forcing throttling at every swap). And ideally, we could have more control on tiling, for example if the computer has two AMD cards of the same generation with same tiling modes, we could always use tiling. I would like the compositor to still send the classic drm device in the wl_drm.device event. The client can then use stat(2) to stat it and defer the corresponding render node from that by adding 128 to the minor. This way we don't break older mesa versions by sending them a render node that they'll then fail to authenticate. I do not agree on this: if the compositor does run under a render-node, there are high chances it can't authenticate clients wanting to run on the not-render-node device. Moreover, I believe clients shouldn't use render-nodes by default if they can avoid it. I don't get the point of older mesa versions: why would you use an older Mesa version inside a more recent Mesa version? Some arguments in favor of allowing the nested compositor case to render without copy on another card: . XWayland inside would run as if the main device is the device of the nested compositor. (I can't say how X dri3 will support Prime, so I can't say yet if this is a big advantage or not). For example if Xrender is slow on the main card, you can with this system use Xrender on the other card. . In case you are under system compositors (like would KWin), you would like to be able to render your whole desktop on the card you want, without an additional copy. . We could imagine having outputs on different card, the compositor under system compositors would connect to multiple system compositors running on each card (and giving access to different outputs). The compositor would use card X: the system compositor on card X would have tiled buffers without copies, whereas the other system compositors would have untiled buffers without copies. . The nested compositor could allow the user to choose between capping the compositing to 60 fps or not. When we would cap the compositing to 60 fps, we would avoid some useless copies (while adding a very small latency between when the frame is sent and when it is displayed) (. The nested compositor could have additional features like recording using the acceleration of the other card ) All these arguments can be put in short in: more flexibility For an heavy game 60 fps, I agree it makes much more sense to do the copy inside Mesa, than using a nested compositor. Axel Davy ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev