[Mesa-dev] [PATCH v3] glsl: Expand matrix flip optimization pass to cover more cases.
Also, as suggested by Ian Romanick, make it so we don't need a bunch of individual handles to flippable matrices, instead we register matrix/transpose_matrix pairs in a hash table for all built-in matrices using the non-transpose matrix name as key. --- I was wondering, is it really safe to only dup the variable name in the case that transpose_ptr != NULL? What if the variable gets killed in other optimization passes? we would have garbage keys in the hash table... wouldn't that cause problems? src/glsl/opt_flip_matrices.cpp | 153 +++-- 1 file changed, 117 insertions(+), 36 deletions(-) diff --git a/src/glsl/opt_flip_matrices.cpp b/src/glsl/opt_flip_matrices.cpp index 9044fd6..80ecb0d 100644 --- a/src/glsl/opt_flip_matrices.cpp +++ b/src/glsl/opt_flip_matrices.cpp @@ -29,44 +29,142 @@ * On some hardware, this is more efficient. * * This currently only does the conversion for built-in matrices which - * already have transposed equivalents. Namely, gl_ModelViewProjectionMatrix - * and gl_TextureMatrix. + * already have transposed equivalents. */ #include "ir.h" #include "ir_optimization.h" #include "main/macros.h" +#include "program/hash_table.h" namespace { + class matrix_flipper : public ir_hierarchical_visitor { public: + struct matrix_and_transpose { + ir_variable *matrix; + ir_variable *transpose_matrix; + }; + matrix_flipper(exec_list *instructions) { progress = false; - mvp_transpose = NULL; - texmat_transpose = NULL; + + /* Build a hash table of built-in matrices and their transposes. + * + * The key for the entries in the hash table is the non-transpose matrix + * name. This assumes that all built-in transpose matrices have the + * "Transpose" suffix. + */ + ht = hash_table_ctor(0, hash_table_string_hash, + hash_table_string_compare); foreach_list(n, instructions) { ir_instruction *ir = (ir_instruction *) n; ir_variable *var = ir->as_variable(); + if (!var) continue; - if (strcmp(var->name, "gl_ModelViewProjectionMatrixTranspose") == 0) -mvp_transpose = var; - if (strcmp(var->name, "gl_TextureMatrixTranspose") == 0) -texmat_transpose = var; + + /* Must be a matrix or array of matrices. */ + if (!var->type->is_matrix() && + !(var->type->is_array() && var->type->fields.array->is_matrix())) +continue; + + /* Must be a built-in */ + if (is_gl_identifier(var->name)) +continue; + + /* Create a new entry for this matrix if we don't have one yet */ + bool new_entry = false; + struct matrix_and_transpose *entry = +(struct matrix_and_transpose *) hash_table_find(ht, var->name); + if (!entry) { +new_entry = true; +entry = new struct matrix_and_transpose(); +entry->matrix = NULL; +entry->transpose_matrix = NULL; + } + + const char *transpose_ptr = strstr(var->name, "Transpose"); + if (transpose_ptr == NULL) { +entry->matrix = var; + } else { +/* We should not be adding transpose built-in matrices that do + * not end in 'Transpose'. + */ +assert(transpose_ptr[9] == 0); +entry->transpose_matrix = var; + } + + if (new_entry) { +char *entry_key; +if (transpose_ptr == NULL) { + entry_key = (char *) var->name; +} else { + entry_key = + ralloc_strndup(this, var->name, transpose_ptr - var->name); +} +hash_table_insert(ht, entry, entry_key); + } } } + ~matrix_flipper() + { + hash_table_dtor(ht); + } + ir_visitor_status visit_enter(ir_expression *ir); bool progress; private: - ir_variable *mvp_transpose; - ir_variable *texmat_transpose; + void transform_operands(ir_expression *ir, + ir_variable *mat_var, + ir_variable *mat_transpose); + void transform_operands_array_of_matrix(ir_expression *ir, + ir_variable *mat_var, + ir_variable *mat_transpose); + struct hash_table *ht; }; } +void +matrix_flipper::transform_operands(ir_expression *ir, + ir_variable *mat_var, + ir_variable *mat_transpose) +{ +#ifndef NDEBUG + ir_dereference_variable *deref = ir->operands[0]->as_dereference_variable(); + assert(deref && deref->var == mat_var); +#endif + + void *mem_ctx = ralloc_parent(ir); + ir->operands[0] = ir->operands[1]; + ir->operands[1] = new(mem_ctx) ir_dereference_variable(mat_transpose); +} + +void +matrix_flipper::transform_operan
[Mesa-dev] [PATCH] radeon/llvm: Adapt to AMDGPU.rsq intrinsic change in LLVM 3.5
From: Michel Dänzer Signed-off-by: Michel Dänzer --- src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c | 4 1 file changed, 4 insertions(+) diff --git a/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c b/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c index f8be0df..217fa32 100644 --- a/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c +++ b/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c @@ -1384,7 +1384,11 @@ void radeon_llvm_context_init(struct radeon_llvm_context * ctx) bld_base->op_actions[TGSI_OPCODE_UCMP].emit = emit_ucmp; bld_base->rsq_action.emit = build_tgsi_intrinsic_nomem; +#if HAVE_LLVM >= 0x0305 + bld_base->rsq_action.intr_name = "llvm.AMDGPU.rsq."; +#else bld_base->rsq_action.intr_name = "llvm.AMDGPU.rsq"; +#endif } void radeon_llvm_create_func(struct radeon_llvm_context * ctx, -- 2.0.0 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH] GLX: Add an env var to enable the support of GLX extensions needing both client and server support, in case of client support and direct rendering.
On Wed, 18 Jun 2014 22:55:16 -0700 Kenneth Graunke wrote: > On Wednesday, June 18, 2014 11:32:45 PM Axel Davy wrote: > > In the case of XWayland, there's no accelerated indirect rendering. > > For example GLX_ARB_create_context is not advertised by the server, > > and according to the spec, we are not allowed to advertise it > > to the application because of that. > > > > This env var makes Mesa ignore this restriction, and > > a GLX extension is advertised whenever the client supports it. > > > > Signed-off-by: Axel Davy > > --- > > src/glx/glxextensions.c | 9 - > > 1 file changed, 8 insertions(+), 1 deletion(-) > > In this specific case, I think it might make sense to just advertise the > extension and return a GLX error if asked for indirect rendering. Or, just > lie and return a direct rendering context. > > That would make the common case that most people want (GLX_ARB_create_context > for direct rendered 3.2+ core profile stuff) work out of the box, without the > need for environment variables. It's technically out of spec, but for X on > Wayland, I think it's not unreasonable. > > On the other hand, supporting AIGLX of sorts might be possible... > > With XWayland, there are really a couple layers to "indirect" rendering... > 1. Doing it X client side (direct rendering) > 2. Doing it in the XWayland X11 server/Wayland client (semi-indirect). > 3. Doing it wherever Weston/etc are running (total indirect). > > It seems like XWayland could support AIGLX with model #2 - X clients would > speak GLX protocol to XWayland, which could then do the GL. Model #3 seems > like something we should avoid at all costs. Not only avoid, but model #3 is practically impossible (well, not sensible) anyway. There is no rendering protocol in Wayland, you'd have to invent that first, and then it would probably just get rejected. IMO we can just keep talking about direct (model #1) and indirect (model #2) rendering as usual. To me those are basically GLX concepts and have nothing to do with Wayland. Thanks, pq ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH v2 22/23] mesa: Init Geom.UsesEndPrimitive in shader programs.
On Wed, 2014-06-18 at 13:48 -0700, Ian Romanick wrote: > I think this patch and 23 should be moved first in the series... the can > certainly land before the other patches in the series. Right. I'll push these two tomorrow if nobody says otherwise today. Since you marked patch 23 for stable: do I need to do something else other than adding the CC tag in the commit message? Should I send the patch also to mesa-sta...@lists.freedesktop.org? Iago > This patch is > > Reviewed-by: Ian Romanick > > On 06/18/2014 02:51 AM, Iago Toral Quiroga wrote: > > --- > > src/mesa/main/shaderobj.c | 1 + > > 1 file changed, 1 insertion(+) > > > > diff --git a/src/mesa/main/shaderobj.c b/src/mesa/main/shaderobj.c > > index 03db862..b3d428c 100644 > > --- a/src/mesa/main/shaderobj.c > > +++ b/src/mesa/main/shaderobj.c > > @@ -248,6 +248,7 @@ _mesa_init_shader_program(struct gl_context *ctx, > > struct gl_shader_program *prog > > prog->Geom.VerticesOut = 0; > > prog->Geom.InputType = GL_TRIANGLES; > > prog->Geom.OutputType = GL_TRIANGLE_STRIP; > > + prog->Geom.UsesEndPrimitive = GL_FALSE; > > prog->Geom.UsesStreams = GL_FALSE; > > > > prog->TransformFeedback.BufferMode = GL_INTERLEAVED_ATTRIBS; > > > > ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH v2 12/23] glsl: Validate vertex emission in geometry shaders.
On Wed, 2014-06-18 at 13:38 -0700, Ian Romanick wrote: > On 06/18/2014 02:51 AM, Iago Toral Quiroga wrote: > > Check if non-zero streams are used. Fail to link if emitting to unsupported > > streams or emitting to non-zero streams with output type other than > > GL_POINTS. > > --- > > src/glsl/linker.cpp | 148 > > +++- > > 1 file changed, 134 insertions(+), 14 deletions(-) > > > > diff --git a/src/glsl/linker.cpp b/src/glsl/linker.cpp > > index 0b6a716..f8ff138 100644 > > --- a/src/glsl/linker.cpp > > +++ b/src/glsl/linker.cpp > > @@ -250,31 +250,100 @@ public: > > } > > }; > > > > - > > /** > > - * Visitor that determines whether or not a shader uses ir_end_primitive. > > + * Visitor that determines the highest stream id to which a (geometry) > > shader > > + * emits vertices. It also checks whether End{Stream}Primitive is ever > > called. > > */ > > -class find_end_primitive_visitor : public ir_hierarchical_visitor { > > +class find_emit_vertex_visitor : public ir_hierarchical_visitor { > > public: > > - find_end_primitive_visitor() > > - : found(false) > > + find_emit_vertex_visitor(int max_allowed) > > + : max_stream_allowed(max_allowed), > > +invalid_stream_id(0), > > +invalid_stream_id_from_emit_vertex(false), > > +end_primitive_found(false), > > +uses_non_zero_stream(false) > > { > >/* empty */ > > } > > > > - virtual ir_visitor_status visit(ir_end_primitive *) > > + virtual ir_visitor_status visit_leave(ir_emit_vertex *ir) > > { > > - found = true; > > - return visit_stop; > > + int stream_id = ir->stream_id(); > > + > > + if (stream_id < 0) { > > + invalid_stream_id = stream_id; > > + invalid_stream_id_from_emit_vertex = true; > > + return visit_stop; > > + } > > + > > + if (stream_id > max_stream_allowed) { > > + invalid_stream_id = stream_id; > > + invalid_stream_id_from_emit_vertex = true; > > + return visit_stop; > > + } > > + > > + if (stream_id != 0) > > + uses_non_zero_stream = true; > > + > > + return visit_continue; > > } > > > > - bool end_primitive_found() > > + virtual ir_visitor_status visit_leave(ir_end_primitive *ir) > > { > > - return found; > > + end_primitive_found = true; > > + > > + int stream_id = ir->stream_id(); > > + > > + if (stream_id < 0) { > > + invalid_stream_id = stream_id; > > + invalid_stream_id_from_emit_vertex = false; > > + return visit_stop; > > + } > > + > > + if (stream_id > max_stream_allowed) { > > + invalid_stream_id = stream_id; > > + invalid_stream_id_from_emit_vertex = false; > > + return visit_stop; > > + } > > + > > + if (stream_id != 0) > > + uses_non_zero_stream = true; > > + > > + return visit_continue; > > + } > > + > > + bool error() > > + { > > + return invalid_stream_id != 0; > > + } > > + > > + const char *error_func() > > + { > > + return invalid_stream_id_from_emit_vertex ? > > + "EmitStreamVertex" : "EndStreamPrimitive"; > > + } > > + > > + int error_stream() > > + { > > + return invalid_stream_id; > > + } > > + > > + bool uses_streams() > > + { > > + return uses_non_zero_stream; > > + } > > + > > + bool uses_end_primitive() > > + { > > + return end_primitive_found; > > } > > > > private: > > - bool found; > > + int max_stream_allowed; > > + int invalid_stream_id; > > + bool invalid_stream_id_from_emit_vertex; > > + bool end_primitive_found; > > + bool uses_non_zero_stream; > > }; > > > > } /* anonymous namespace */ > > @@ -551,10 +620,58 @@ validate_geometry_shader_executable(struct > > gl_shader_program *prog, > > > > analyze_clip_usage(prog, shader, &prog->Geom.UsesClipDistance, > >&prog->Geom.ClipDistanceArraySize); > > +} > > + > > +/** > > + * Check if geometry shaders emit to non-zero streams and do corresponding > > + * validations. > > + */ > > +static void > > +validate_geometry_shader_emissions(struct gl_context *ctx, > > + struct gl_shader_program *prog) > > +{ > > + if (prog->_LinkedShaders[MESA_SHADER_GEOMETRY] != NULL) { > > + find_emit_vertex_visitor emit_vertex(ctx->Const.MaxVertexStreams - > > 1); > > + emit_vertex.run(prog->_LinkedShaders[MESA_SHADER_GEOMETRY]->ir); > > + if (emit_vertex.error()) { > > + linker_error(prog, "Invalid call %s(%d). Accepted values for the " > > + "stream parameter are in the range [0, %d].", > > + emit_vertex.error_func(), > > + emit_vertex.error_stream(), > > + ctx->Const.MaxVertexStreams - 1); > > + } > > + prog->Geom.UsesStreams = emit_vertex.uses_streams(); > > + prog->Ge
Re: [Mesa-dev] [PATCH v2 01/23] glsl: Add parsing support for multi-stream output in geometry shaders.
On Wed, 2014-06-18 at 11:16 -0700, Ian Romanick wrote: > On 06/18/2014 02:51 AM, Iago Toral Quiroga wrote: > > From: Samuel Iglesias Gonsalvez > > > > This implements parsing requirements for multi-stream support in > > geometry shaders as defined in ARB_gpu_shader5. > > > > Signed-off-by: Samuel Iglesias Gonsalvez > > A few minor nits below. With those fixed, this patch is > > Reviewed-by: Ian Romanick Thanks for your review! I will work on it. Sam signature.asc Description: This is a digitally signed message part ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH v2 04/23] glsl: Assign GLSL StreamIds to transform feedback outputs.
On Wed, 2014-06-18 at 12:56 -0700, Ian Romanick wrote: > On 06/18/2014 02:51 AM, Iago Toral Quiroga wrote: > > Inter-shader outputs must be on stream 0, which is the default. > > --- > > src/glsl/link_varyings.cpp | 12 +--- > > src/glsl/link_varyings.h | 7 +++ > > 2 files changed, 16 insertions(+), 3 deletions(-) > > > > diff --git a/src/glsl/link_varyings.cpp b/src/glsl/link_varyings.cpp > > index f765d37..9725a43 100644 > > --- a/src/glsl/link_varyings.cpp > > +++ b/src/glsl/link_varyings.cpp > > @@ -291,6 +291,7 @@ tfeedback_decl::init(struct gl_context *ctx, const void > > *mem_ctx, > > this->skip_components = 0; > > this->next_buffer_separator = false; > > this->matched_candidate = NULL; > > + this->stream_id = 0; > > > > if (ctx->Extensions.ARB_transform_feedback3) { > >/* Parse gl_NextBuffer. */ > > @@ -355,8 +356,8 @@ tfeedback_decl::is_same(const tfeedback_decl &x, const > > tfeedback_decl &y) > > > > > > /** > > - * Assign a location for this tfeedback_decl object based on the transform > > - * feedback candidate found by find_candidate. > > + * Assign a location and stream ID for this tfeedback_decl object based on > > the > > + * transform feedback candidate found by find_candidate. > > * > > * If an error occurs, the error is reported through linker_error() and > > false > > * is returned. > > @@ -437,6 +438,11 @@ tfeedback_decl::assign_location(struct gl_context *ctx, > >return false; > > } > > > > + /* Only transform feedback varyings can be assigned to non-zero streams, > > +* so assign the stream id here. > > +*/ > > + this->stream_id = this->matched_candidate->toplevel_var->data.stream; > > + > > return true; > > } > > > > @@ -495,7 +501,7 @@ tfeedback_decl::store(struct gl_context *ctx, struct > > gl_shader_program *prog, > >info->Outputs[info->NumOutputs].ComponentOffset = location_frac; > >info->Outputs[info->NumOutputs].OutputRegister = location; > >info->Outputs[info->NumOutputs].NumComponents = output_size; > > - info->Outputs[info->NumOutputs].StreamId = 0; > > + info->Outputs[info->NumOutputs].StreamId = stream_id; > >info->Outputs[info->NumOutputs].OutputBuffer = buffer; > >info->Outputs[info->NumOutputs].DstOffset = > > info->BufferStride[buffer]; > >++info->NumOutputs; > > diff --git a/src/glsl/link_varyings.h b/src/glsl/link_varyings.h > > index 6fa2681..4e485c3 100644 > > --- a/src/glsl/link_varyings.h > > +++ b/src/glsl/link_varyings.h > > @@ -210,6 +210,13 @@ private: > > * data structure that was found. Otherwise NULL. > > */ > > const tfeedback_candidate *matched_candidate; > > + > > + /** > > +* StreamId assigned to this varying (defaults to 0). Can only be set to > > +* values other than 0 in geometry shaders that use the stream layout > > +* modifier. Accepted values must be in the range [0, > > MAX_VERTEX_STREAMS]. > > MAX_VERTEX_STREAMS-1, right? Ooops, right. I'll fix that. > > +*/ > > + unsigned stream_id; > > }; > > > > > > > > ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH v2] glsl: Expand matrix flip optimization pass to cover more cases.
On Wed, 2014-06-18 at 14:43 -0700, Ian Romanick wrote: > On 06/18/2014 05:03 AM, Iago Toral Quiroga wrote: > > Also, as suggested by Ian Romanick, make it so we don't need a bunch of > > individual handles to flippable matrices, instead we register > > matrix/transpose_matrix pairs in a hash table for all built-in matrices > > using the non-transpose matrix name as key. > > --- > > src/glsl/opt_flip_matrices.cpp | 145 > > ++--- > > 1 file changed, 108 insertions(+), 37 deletions(-) > > > > diff --git a/src/glsl/opt_flip_matrices.cpp b/src/glsl/opt_flip_matrices.cpp > > index 9044fd6..d0b8125 100644 > > --- a/src/glsl/opt_flip_matrices.cpp > > +++ b/src/glsl/opt_flip_matrices.cpp > > @@ -29,44 +29,132 @@ > > * On some hardware, this is more efficient. > > * > > * This currently only does the conversion for built-in matrices which > > - * already have transposed equivalents. Namely, > > gl_ModelViewProjectionMatrix > > - * and gl_TextureMatrix. > > + * already have transposed equivalents. > > */ > > #include "ir.h" > > #include "ir_optimization.h" > > #include "main/macros.h" > > +#include "program/hash_table.h" > > > > namespace { > > + > > class matrix_flipper : public ir_hierarchical_visitor { > > public: > > + struct matrix_and_transpose { > > + ir_variable *matrix; > > + ir_variable *transpose_matrix; > > + }; > > + > > matrix_flipper(exec_list *instructions) > > { > >progress = false; > > - mvp_transpose = NULL; > > - texmat_transpose = NULL; > > + > > + /* Build a hash table of built-in matrices and their transposes. > > + * > > + * The key for the entries in the hash table is the non-transpose > > matrix > > + * name. This assumes that all built-in transpose matrices have the > > + * "Transpose" suffix. > > + */ > > + ht = hash_table_ctor(0, hash_table_string_hash, > > + hash_table_string_compare); > > > >foreach_list(n, instructions) { > > ir_instruction *ir = (ir_instruction *) n; > > ir_variable *var = ir->as_variable(); > > - if (!var) > > + > > + /* Must be a matrix */ > > + if (!var || !var->type->is_matrix()) > > continue; > > gl_TextureMatrix is an array of matrices, so var->type->is_matrix() > will fail. I think you want: > > if (!var) > continue; > > /* Must be a matrix or array of matrices. */ > if (!var->type->is_matrix() && > !(var->type->is_array() && var->type->fields.array->is_matrix())) > continue; Oh, right. > > - if (strcmp(var->name, "gl_ModelViewProjectionMatrixTranspose") == > > 0) > > -mvp_transpose = var; > > - if (strcmp(var->name, "gl_TextureMatrixTranspose") == 0) > > -texmat_transpose = var; > > + /* Must be a built-in */ > > + if (strstr(var->name, "gl_") != var->name) > > +continue; > > The name has to start with gl_, not just contain it. Use > is_gl_identifier(var->name) instead. Actually, this checks that it starts with it (see != var->name), but I'll use is_gl_identifier. > > + > > + /* Create a new entry for this matrix if we don't have one yet */ > > + bool new_entry = false; > > + struct matrix_and_transpose *entry = > > +(struct matrix_and_transpose *) hash_table_find(ht, var->name); > > + if (!entry) { > > +new_entry = true; > > +entry = new struct matrix_and_transpose(); > > +entry->matrix = NULL; > > +entry->transpose_matrix = NULL; > > + } > > + > > + const char *transpose_ptr = strstr(var->name, "Transpose"); > > + if (transpose_ptr == NULL) { > > +entry->matrix = var; > > + } else { > > It's probably worth adding an assertion in case a built-in is ever > added with something after Transpose. The probability is very, very > low, but I'd rather be safe. Sure, I will add that. >assert(transpose_ptr[9] == 0); > > > +entry->transpose_matrix = var; > > + } > > + > > + if (new_entry) { > > +char *entry_key; > > +if (transpose_ptr == NULL) { > > + entry_key = strdup(var->name); > > +} else { > > + entry_key = strndup(var->name, transpose_ptr - var->name); > > +} > > hash_table_dtor doesn't free the keys, so all of this memory leaks. > Use ralloc_strndup, and only copy the name in the transpose_ptr != NULL > case. Ok. > > +hash_table_insert(ht, entry, entry_key); > > + } > >} > > } > > > > + ~matrix_flipper() > > + { > > + hash_table_dtor(ht); > > + } > > + > > ir_visitor_status visit_enter(ir_expression *ir); > > > > bool progress; > > > > private: > > - ir_variable *mvp_transpos
Re: [Mesa-dev] [PATCH] GLX: Add an env var to enable the support of GLX extensions needing both client and server support, in case of client support and direct rendering.
On Wednesday, June 18, 2014 11:32:45 PM Axel Davy wrote: > In the case of XWayland, there's no accelerated indirect rendering. > For example GLX_ARB_create_context is not advertised by the server, > and according to the spec, we are not allowed to advertise it > to the application because of that. > > This env var makes Mesa ignore this restriction, and > a GLX extension is advertised whenever the client supports it. > > Signed-off-by: Axel Davy > --- > src/glx/glxextensions.c | 9 - > 1 file changed, 8 insertions(+), 1 deletion(-) In this specific case, I think it might make sense to just advertise the extension and return a GLX error if asked for indirect rendering. Or, just lie and return a direct rendering context. That would make the common case that most people want (GLX_ARB_create_context for direct rendered 3.2+ core profile stuff) work out of the box, without the need for environment variables. It's technically out of spec, but for X on Wayland, I think it's not unreasonable. On the other hand, supporting AIGLX of sorts might be possible... With XWayland, there are really a couple layers to "indirect" rendering... 1. Doing it X client side (direct rendering) 2. Doing it in the XWayland X11 server/Wayland client (semi-indirect). 3. Doing it wherever Weston/etc are running (total indirect). It seems like XWayland could support AIGLX with model #2 - X clients would speak GLX protocol to XWayland, which could then do the GL. Model #3 seems like something we should avoid at all costs. Of course, I don't know that there's any *benefit* to supporting AIGLX in XWayland, so...my real suggestion is to just raise a GLX error or lie if asked to create an indirect context. --Ken signature.asc Description: This is a digitally signed message part. ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 01/11] gallium: Add __DRIimageDriverExtension support to gallium
On Wed, Jun 18, 2014 at 11:27 PM, Axel Davy wrote: > __DRIimageDriverExtension is used by GLX DRI3 and Wayland. > > This patch is a rewrite of > http://lists.freedesktop.org/archives/mesa-dev/2014-May/060318.html > and > http://lists.freedesktop.org/archives/mesa-dev/2014-May/060317.html > > Signed-off-by: Axel Davy > Reviewed-by: Marek Olšák > > Previous patches were: > Signed-off-by: Ben Skeggs > Signed-off-by: Keith Packard > --- > src/gallium/state_trackers/dri/drm/dri2.c | 469 > ++ > 1 file changed, 286 insertions(+), 183 deletions(-) > > diff --git a/src/gallium/state_trackers/dri/drm/dri2.c > b/src/gallium/state_trackers/dri/drm/dri2.c > index 7dccc5e..124d91b 100644 > --- a/src/gallium/state_trackers/dri/drm/dri2.c > +++ b/src/gallium/state_trackers/dri/drm/dri2.c > @@ -201,32 +201,192 @@ dri2_drawable_get_buffers(struct dri_drawable > *drawable, > return buffers; > } > > -/** > - * Process __DRIbuffer and convert them into pipe_resources. > +static bool > +dri_image_drawable_get_buffers(struct dri_drawable *drawable, > + struct __DRIimageList *images, > + const enum st_attachment_type *statts, > + unsigned statts_count) > +{ > + __DRIdrawable *dPriv = drawable->dPriv; > + __DRIscreen *sPriv = drawable->sPriv; > + unsigned int image_format = __DRI_IMAGE_FORMAT_NONE; > + enum pipe_format pf; > + uint32_t buffer_mask = 0; > + unsigned i, bind; > + > + for (i = 0; i < statts_count; i++) { > + dri_drawable_get_format(drawable, statts[i], &pf, &bind); > + if (pf == PIPE_FORMAT_NONE) > + continue; > + > + switch (statts[i]) { > + case ST_ATTACHMENT_FRONT_LEFT: > + buffer_mask |= __DRI_IMAGE_BUFFER_FRONT; > + break; > + case ST_ATTACHMENT_BACK_LEFT: > + buffer_mask |= __DRI_IMAGE_BUFFER_BACK; > + break; > + default: > + continue; > + } > + > + switch (pf) { > + case PIPE_FORMAT_B5G6R5_UNORM: > + image_format = __DRI_IMAGE_FORMAT_RGB565; > + break; > + case PIPE_FORMAT_B8G8R8X8_UNORM: > + image_format = __DRI_IMAGE_FORMAT_XRGB; > + break; > + case PIPE_FORMAT_B8G8R8A8_UNORM: > + image_format = __DRI_IMAGE_FORMAT_ARGB; > + break; > + case PIPE_FORMAT_R8G8B8A8_UNORM: > + image_format = __DRI_IMAGE_FORMAT_ABGR; > + break; > + default: > + image_format = __DRI_IMAGE_FORMAT_NONE; > + break; > + } > + } > + > + return (*sPriv->image.loader->getBuffers) (dPriv, image_format, > + (uint32_t *) &drawable->base.stamp, > + dPriv->loaderPrivate, buffer_mask, > + images); > +} > + > +static __DRIbuffer * > +dri2_allocate_buffer(__DRIscreen *sPriv, > + unsigned attachment, unsigned format, > + int width, int height) > +{ > + struct dri_screen *screen = dri_screen(sPriv); > + struct dri2_buffer *buffer; > + struct pipe_resource templ; > + enum pipe_format pf; > + unsigned bind = 0; > + struct winsys_handle whandle; > + > + switch (attachment) { > + case __DRI_BUFFER_FRONT_LEFT: > + case __DRI_BUFFER_FAKE_FRONT_LEFT: > + bind = PIPE_BIND_RENDER_TARGET | PIPE_BIND_SAMPLER_VIEW; > + break; > + case __DRI_BUFFER_BACK_LEFT: > + bind = PIPE_BIND_RENDER_TARGET | PIPE_BIND_SAMPLER_VIEW; > + break; > + case __DRI_BUFFER_DEPTH: > + case __DRI_BUFFER_DEPTH_STENCIL: > + case __DRI_BUFFER_STENCIL: > +bind = PIPE_BIND_DEPTH_STENCIL; /* XXX sampler? */ > + break; > + } > + > + /* because we get the handle and stride */ > + bind |= PIPE_BIND_SHARED; > + > + switch (format) { > + case 32: > + pf = PIPE_FORMAT_B8G8R8A8_UNORM; > + break; > + case 24: > + pf = PIPE_FORMAT_B8G8R8X8_UNORM; > + break; > + case 16: > + pf = PIPE_FORMAT_Z16_UNORM; > + break; > + default: > + return NULL; > + } > + > + buffer = CALLOC_STRUCT(dri2_buffer); > + if (!buffer) > + return NULL; > + > + memset(&templ, 0, sizeof(templ)); > + templ.bind = bind; > + templ.format = pf; > + templ.target = PIPE_TEXTURE_2D; > + templ.last_level = 0; > + templ.width0 = width; > + templ.height0 = height; > + templ.depth0 = 1; > + templ.array_size = 1; > + > + buffer->resource = > + screen->base.screen->resource_create(screen->base.screen, &templ); I believe the expectation is that before you create resources with a certain format/bind combo, you need to check first with ->is_format_supported. For example pre-NVA0 nv50 cards don't support Z16. > + if (!buffer->resource) { > + FREE(buffer); > + return NULL; > + } > + > + memset(&whandle, 0, sizeof(
Re: [Mesa-dev] [PATCH] GLX: Add an env var to enable the support of GLX extensions needing both client and server support, in case of client support and direct rendering.
On Wed, Jun 18, 2014 at 11:32 PM, Axel Davy wrote: > In the case of XWayland, there's no accelerated indirect rendering. > For example GLX_ARB_create_context is not advertised by the server, > and according to the spec, we are not allowed to advertise it > to the application because of that. > > This env var makes Mesa ignore this restriction, and > a GLX extension is advertised whenever the client supports it. > > Signed-off-by: Axel Davy > --- > src/glx/glxextensions.c | 9 - > 1 file changed, 8 insertions(+), 1 deletion(-) > > diff --git a/src/glx/glxextensions.c b/src/glx/glxextensions.c > index ce5d66d..34cb6d0 100644 > --- a/src/glx/glxextensions.c > +++ b/src/glx/glxextensions.c > @@ -566,10 +566,15 @@ __glXCalculateUsableExtensions(struct glx_screen * psc, > GLboolean display_is_direct_capable, > int minor_version) > { > + const char *ignore_server_restrictions = getenv("IGNORE_SERVER"); I think such variables tend to have some sort of prefix. GLX_ seems to make sense here... or MESA_. > + unsigned char should_ignore_server_restrictions = 0; > unsigned char server_support[8]; > unsigned char usable[8]; > unsigned i; > > + if (ignore_server_restrictions && !strcmp(ignore_server_restrictions,"1")) > + should_ignore_server_restrictions = 0xff; > + > __glXExtensionsCtr(); > __glXExtensionsCtrScreen(psc); > > @@ -617,7 +622,9 @@ __glXCalculateUsableExtensions(struct glx_screen * psc, > | (client_glx_support[i] & psc->direct_support[i] & > server_support[i]) > | (client_glx_support[i] & psc->direct_support[i] & > - direct_glx_only[i]); > + direct_glx_only[i]) > +| (client_glx_support[i] & psc->direct_support[i] & > + should_ignore_server_restrictions); >} > } > else { > -- > 1.9.1 > > ___ > mesa-dev mailing list > mesa-dev@lists.freedesktop.org > http://lists.freedesktop.org/mailman/listinfo/mesa-dev ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH] gallivm: set mcpu when initializing llvm execution engine
On Wed, Jun 18, 2014 at 6:28 PM, wrote: > From: Roland Scheidegger > > Previously llvm detected cpu features automatically when the execution engine > was created (based on host cpu). This is no longer the case, which meant llvm > was then not able to emit some of the intrinsics we used as we didn't specify > any sse attributes (only on avx supporting systems this was not a problem > since > despite at least some llvm versions enabling it anyway we always set this > manually). So, instead of trying to figure out which MAttrs to set just set > MCPU. > > This fixes https://bugs.freedesktop.org/show_bug.cgi?id=77493. > --- > src/gallium/auxiliary/gallivm/lp_bld_misc.cpp | 22 -- > 1 file changed, 20 insertions(+), 2 deletions(-) > > diff --git a/src/gallium/auxiliary/gallivm/lp_bld_misc.cpp > b/src/gallium/auxiliary/gallivm/lp_bld_misc.cpp > index 38fbe1f..6bea964 100644 > --- a/src/gallium/auxiliary/gallivm/lp_bld_misc.cpp > +++ b/src/gallium/auxiliary/gallivm/lp_bld_misc.cpp > @@ -468,8 +468,8 @@ > lp_build_create_jit_compiler_for_module(LLVMExecutionEngineRef *OutJIT, >/* > * AVX feature is not automatically detected from CPUID by the X86 > target > * yet, because the old (yet default) JIT engine is not capable of > - * emitting the opcodes. But as we're using MCJIT here, it is safe to > - * add set this attribute. > + * emitting the opcodes. On newer llvm versions it is and at least some > + * versions (tested with 3.3) will emit avx opcodes without this > anyway. > */ >MAttrs.push_back("+avx"); >if (util_cpu_caps.has_f16c) { > @@ -478,12 +478,30 @@ > lp_build_create_jit_compiler_for_module(LLVMExecutionEngineRef *OutJIT, >builder.setMAttrs(MAttrs); > } > > +#if HAVE_LLVM >= 0x0305 > + StringRef MCPU = llvm::sys::getHostCPUName(); > + /* > +* The cpu bits are no longer set automatically, so need to set mcpu > manually. > +* Note that the MAttrs set above will be sort of ignored (since we should > +* not set any which would not be set by specifying the cpu anyway). > +* It ought to be safe though since getHostCPUName() should include bits > +* not only from the cpu but environment as well (for instance if it's > safe > +* to use avx instructions which need OS support). According to > +* http://llvm.org/bugs/show_bug.cgi?id=19429 however if I understand this > +* right it may be necessary to specify older cpu (or disable mattrs) > though > +* when not using MCJIT so no instructions are generated which the old JIT > +* can't handle. Not entirely sure if we really need to do anything yet. > +*/ > + builder.setMCPU(MCPU); > +#endif > + > ShaderMemoryManager *MM = new ShaderMemoryManager(); > *OutCode = MM->getGeneratedCode(); > > builder.setJITMemoryManager(MM); > > ExecutionEngine *JIT; > + > #if HAVE_LLVM >= 0x0302 > JIT = builder.create(); > #else > -- > 1.9.1 > ___ > mesa-dev mailing list > mesa-dev@lists.freedesktop.org > http://lists.freedesktop.org/mailman/listinfo/mesa-dev This patch fixes the bug for me. Tested-by: Vinson Lee ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 11/11] nvc0: fix dri3 prime buffer creation
At the very least, there are Apple MacBooks with NVAC (IGP) and NV96 chips, not sure if there are any intel + nv50 optimus setups. Of course those laptops actually aren't particularly well-supported by nouveau atm. On Wed, Jun 18, 2014 at 11:45 PM, Axel Davy wrote: > Is there any non-nvc0 Nvidia cards in hybrid graphics laptops ? > > If the answer is yes, then we probably need the same fix for nv50. > > On 18/06/2014 23:34, Ilia Mirkin wrote : > >> Does nv50 need a similar fix? [BTW note that nv50 the chipset (not the >> family), is unable to place memtype != 0 buffers into gart. nv84+ are >> all fine.] >> >> On Wed, Jun 18, 2014 at 11:27 PM, Axel Davy wrote: >>> >>> From: Dave Airlie >>> >>> We need to place shared buffers into GART. >>> >>> Signed-off-by: Dave Airlie >>> Reviewed-by: Axel Davy >>> --- >>> src/gallium/drivers/nouveau/nvc0/nvc0_miptree.c | 2 +- >>> 1 file changed, 1 insertion(+), 1 deletion(-) >>> >>> diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_miptree.c >>> b/src/gallium/drivers/nouveau/nvc0/nvc0_miptree.c >>> index 79c9390..2f3cba8 100644 >>> --- a/src/gallium/drivers/nouveau/nvc0/nvc0_miptree.c >>> +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_miptree.c >>> @@ -296,7 +296,7 @@ nvc0_miptree_create(struct pipe_screen *pscreen, >>> } >>> bo_config.nvc0.tile_mode = mt->level[0].tile_mode; >>> >>> - if (!bo_config.nvc0.memtype && pt->usage == PIPE_USAGE_STAGING) >>> + if (!bo_config.nvc0.memtype && (pt->usage == PIPE_USAGE_STAGING || >>> pt->bind & PIPE_BIND_SHARED)) >>> mt->base.domain = NOUVEAU_BO_GART; >>> else >>> mt->base.domain = NOUVEAU_BO_VRAM; >>> -- >>> 1.9.1 >>> >>> ___ >>> mesa-dev mailing list >>> mesa-dev@lists.freedesktop.org >>> http://lists.freedesktop.org/mailman/listinfo/mesa-dev > > ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 11/11] nvc0: fix dri3 prime buffer creation
Is there any non-nvc0 Nvidia cards in hybrid graphics laptops ? If the answer is yes, then we probably need the same fix for nv50. On 18/06/2014 23:34, Ilia Mirkin wrote : Does nv50 need a similar fix? [BTW note that nv50 the chipset (not the family), is unable to place memtype != 0 buffers into gart. nv84+ are all fine.] On Wed, Jun 18, 2014 at 11:27 PM, Axel Davy wrote: From: Dave Airlie We need to place shared buffers into GART. Signed-off-by: Dave Airlie Reviewed-by: Axel Davy --- src/gallium/drivers/nouveau/nvc0/nvc0_miptree.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_miptree.c b/src/gallium/drivers/nouveau/nvc0/nvc0_miptree.c index 79c9390..2f3cba8 100644 --- a/src/gallium/drivers/nouveau/nvc0/nvc0_miptree.c +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_miptree.c @@ -296,7 +296,7 @@ nvc0_miptree_create(struct pipe_screen *pscreen, } bo_config.nvc0.tile_mode = mt->level[0].tile_mode; - if (!bo_config.nvc0.memtype && pt->usage == PIPE_USAGE_STAGING) + if (!bo_config.nvc0.memtype && (pt->usage == PIPE_USAGE_STAGING || pt->bind & PIPE_BIND_SHARED)) mt->base.domain = NOUVEAU_BO_GART; else mt->base.domain = NOUVEAU_BO_VRAM; -- 1.9.1 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 01/11] gallium: Add __DRIimageDriverExtension support to gallium
On Thu, Jun 19, 2014 at 1:27 PM, Axel Davy wrote: > __DRIimageDriverExtension is used by GLX DRI3 and Wayland. > > This patch is a rewrite of > http://lists.freedesktop.org/archives/mesa-dev/2014-May/060318.html > and > http://lists.freedesktop.org/archives/mesa-dev/2014-May/060317.html > > Signed-off-by: Axel Davy > Reviewed-by: Marek Olšák Good timing, I did the exact same thing myself a couple of hours ago :P Reviewed-by: Ben Skeggs > > Previous patches were: > Signed-off-by: Ben Skeggs > Signed-off-by: Keith Packard > --- > src/gallium/state_trackers/dri/drm/dri2.c | 469 > ++ > 1 file changed, 286 insertions(+), 183 deletions(-) > > diff --git a/src/gallium/state_trackers/dri/drm/dri2.c > b/src/gallium/state_trackers/dri/drm/dri2.c > index 7dccc5e..124d91b 100644 > --- a/src/gallium/state_trackers/dri/drm/dri2.c > +++ b/src/gallium/state_trackers/dri/drm/dri2.c > @@ -201,32 +201,192 @@ dri2_drawable_get_buffers(struct dri_drawable > *drawable, > return buffers; > } > > -/** > - * Process __DRIbuffer and convert them into pipe_resources. > +static bool > +dri_image_drawable_get_buffers(struct dri_drawable *drawable, > + struct __DRIimageList *images, > + const enum st_attachment_type *statts, > + unsigned statts_count) > +{ > + __DRIdrawable *dPriv = drawable->dPriv; > + __DRIscreen *sPriv = drawable->sPriv; > + unsigned int image_format = __DRI_IMAGE_FORMAT_NONE; > + enum pipe_format pf; > + uint32_t buffer_mask = 0; > + unsigned i, bind; > + > + for (i = 0; i < statts_count; i++) { > + dri_drawable_get_format(drawable, statts[i], &pf, &bind); > + if (pf == PIPE_FORMAT_NONE) > + continue; > + > + switch (statts[i]) { > + case ST_ATTACHMENT_FRONT_LEFT: > + buffer_mask |= __DRI_IMAGE_BUFFER_FRONT; > + break; > + case ST_ATTACHMENT_BACK_LEFT: > + buffer_mask |= __DRI_IMAGE_BUFFER_BACK; > + break; > + default: > + continue; > + } > + > + switch (pf) { > + case PIPE_FORMAT_B5G6R5_UNORM: > + image_format = __DRI_IMAGE_FORMAT_RGB565; > + break; > + case PIPE_FORMAT_B8G8R8X8_UNORM: > + image_format = __DRI_IMAGE_FORMAT_XRGB; > + break; > + case PIPE_FORMAT_B8G8R8A8_UNORM: > + image_format = __DRI_IMAGE_FORMAT_ARGB; > + break; > + case PIPE_FORMAT_R8G8B8A8_UNORM: > + image_format = __DRI_IMAGE_FORMAT_ABGR; > + break; > + default: > + image_format = __DRI_IMAGE_FORMAT_NONE; > + break; > + } > + } > + > + return (*sPriv->image.loader->getBuffers) (dPriv, image_format, > + (uint32_t *) &drawable->base.stamp, > + dPriv->loaderPrivate, buffer_mask, > + images); > +} > + > +static __DRIbuffer * > +dri2_allocate_buffer(__DRIscreen *sPriv, > + unsigned attachment, unsigned format, > + int width, int height) > +{ > + struct dri_screen *screen = dri_screen(sPriv); > + struct dri2_buffer *buffer; > + struct pipe_resource templ; > + enum pipe_format pf; > + unsigned bind = 0; > + struct winsys_handle whandle; > + > + switch (attachment) { > + case __DRI_BUFFER_FRONT_LEFT: > + case __DRI_BUFFER_FAKE_FRONT_LEFT: > + bind = PIPE_BIND_RENDER_TARGET | PIPE_BIND_SAMPLER_VIEW; > + break; > + case __DRI_BUFFER_BACK_LEFT: > + bind = PIPE_BIND_RENDER_TARGET | PIPE_BIND_SAMPLER_VIEW; > + break; > + case __DRI_BUFFER_DEPTH: > + case __DRI_BUFFER_DEPTH_STENCIL: > + case __DRI_BUFFER_STENCIL: > +bind = PIPE_BIND_DEPTH_STENCIL; /* XXX sampler? */ > + break; > + } > + > + /* because we get the handle and stride */ > + bind |= PIPE_BIND_SHARED; > + > + switch (format) { > + case 32: > + pf = PIPE_FORMAT_B8G8R8A8_UNORM; > + break; > + case 24: > + pf = PIPE_FORMAT_B8G8R8X8_UNORM; > + break; > + case 16: > + pf = PIPE_FORMAT_Z16_UNORM; > + break; > + default: > + return NULL; > + } > + > + buffer = CALLOC_STRUCT(dri2_buffer); > + if (!buffer) > + return NULL; > + > + memset(&templ, 0, sizeof(templ)); > + templ.bind = bind; > + templ.format = pf; > + templ.target = PIPE_TEXTURE_2D; > + templ.last_level = 0; > + templ.width0 = width; > + templ.height0 = height; > + templ.depth0 = 1; > + templ.array_size = 1; > + > + buffer->resource = > + screen->base.screen->resource_create(screen->base.screen, &templ); > + if (!buffer->resource) { > + FREE(buffer); > + return NULL; > + } > + > + memset(&whandle, 0, sizeof(whandle)); > + whandle.type = DRM_API_HANDLE_TYPE_SHARED; > + screen->base.screen->resource_get_han
Re: [Mesa-dev] [PATCH 11/11] nvc0: fix dri3 prime buffer creation
Does nv50 need a similar fix? [BTW note that nv50 the chipset (not the family), is unable to place memtype != 0 buffers into gart. nv84+ are all fine.] On Wed, Jun 18, 2014 at 11:27 PM, Axel Davy wrote: > From: Dave Airlie > > We need to place shared buffers into GART. > > Signed-off-by: Dave Airlie > Reviewed-by: Axel Davy > --- > src/gallium/drivers/nouveau/nvc0/nvc0_miptree.c | 2 +- > 1 file changed, 1 insertion(+), 1 deletion(-) > > diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_miptree.c > b/src/gallium/drivers/nouveau/nvc0/nvc0_miptree.c > index 79c9390..2f3cba8 100644 > --- a/src/gallium/drivers/nouveau/nvc0/nvc0_miptree.c > +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_miptree.c > @@ -296,7 +296,7 @@ nvc0_miptree_create(struct pipe_screen *pscreen, > } > bo_config.nvc0.tile_mode = mt->level[0].tile_mode; > > - if (!bo_config.nvc0.memtype && pt->usage == PIPE_USAGE_STAGING) > + if (!bo_config.nvc0.memtype && (pt->usage == PIPE_USAGE_STAGING || > pt->bind & PIPE_BIND_SHARED)) >mt->base.domain = NOUVEAU_BO_GART; > else >mt->base.domain = NOUVEAU_BO_VRAM; > -- > 1.9.1 > > ___ > mesa-dev mailing list > mesa-dev@lists.freedesktop.org > http://lists.freedesktop.org/mailman/listinfo/mesa-dev ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH] GLX: Add an env var to enable the support of GLX extensions needing both client and server support, in case of client support and direct rendering.
In the case of XWayland, there's no accelerated indirect rendering. For example GLX_ARB_create_context is not advertised by the server, and according to the spec, we are not allowed to advertise it to the application because of that. This env var makes Mesa ignore this restriction, and a GLX extension is advertised whenever the client supports it. Signed-off-by: Axel Davy --- src/glx/glxextensions.c | 9 - 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/src/glx/glxextensions.c b/src/glx/glxextensions.c index ce5d66d..34cb6d0 100644 --- a/src/glx/glxextensions.c +++ b/src/glx/glxextensions.c @@ -566,10 +566,15 @@ __glXCalculateUsableExtensions(struct glx_screen * psc, GLboolean display_is_direct_capable, int minor_version) { + const char *ignore_server_restrictions = getenv("IGNORE_SERVER"); + unsigned char should_ignore_server_restrictions = 0; unsigned char server_support[8]; unsigned char usable[8]; unsigned i; + if (ignore_server_restrictions && !strcmp(ignore_server_restrictions,"1")) + should_ignore_server_restrictions = 0xff; + __glXExtensionsCtr(); __glXExtensionsCtrScreen(psc); @@ -617,7 +622,9 @@ __glXCalculateUsableExtensions(struct glx_screen * psc, | (client_glx_support[i] & psc->direct_support[i] & server_support[i]) | (client_glx_support[i] & psc->direct_support[i] & - direct_glx_only[i]); + direct_glx_only[i]) +| (client_glx_support[i] & psc->direct_support[i] & + should_ignore_server_restrictions); } } else { -- 1.9.1 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 09/11] Wayland/egl: Add GPU offloading support
This is easier than GLX DRI3 GPU offloading support, because applications are not allowed to read the front buffer. We just need to send to the server buffers it can read, and for that we use for every back buffer an intermediate buffer with no tiling to which we copy before sending the buffer to the compositor. Signed-off-by: Axel Davy --- src/egl/drivers/dri2/egl_dri2.h | 5 +- src/egl/drivers/dri2/platform_wayland.c | 171 ++-- 2 files changed, 142 insertions(+), 34 deletions(-) diff --git a/src/egl/drivers/dri2/egl_dri2.h b/src/egl/drivers/dri2/egl_dri2.h index 0dd9d69..4b70c48 100644 --- a/src/egl/drivers/dri2/egl_dri2.h +++ b/src/egl/drivers/dri2/egl_dri2.h @@ -195,6 +195,8 @@ struct dri2_egl_display int authenticated; int formats; uint32_t capabilities; + int is_different_gpu; + int blit_front; #endif }; @@ -247,7 +249,8 @@ struct dri2_egl_surface struct { #ifdef HAVE_WAYLAND_PLATFORM struct wl_buffer *wl_buffer; - __DRIimage *dri_image; + __DRIimage *rendering_image; + __DRIimage *shared_image; #endif #ifdef HAVE_DRM_PLATFORM struct gbm_bo *bo; diff --git a/src/egl/drivers/dri2/platform_wayland.c b/src/egl/drivers/dri2/platform_wayland.c index 537d26e..0dd4640 100644 --- a/src/egl/drivers/dri2/platform_wayland.c +++ b/src/egl/drivers/dri2/platform_wayland.c @@ -238,8 +238,10 @@ dri2_wl_destroy_surface(_EGLDriver *drv, _EGLDisplay *disp, _EGLSurface *surf) for (i = 0; i < ARRAY_SIZE(dri2_surf->color_buffers); i++) { if (dri2_surf->color_buffers[i].wl_buffer) wl_buffer_destroy(dri2_surf->color_buffers[i].wl_buffer); - if (dri2_surf->color_buffers[i].dri_image) - dri2_dpy->image->destroyImage(dri2_surf->color_buffers[i].dri_image); + if (dri2_surf->color_buffers[i].rendering_image) { + dri2_dpy->image->destroyImage(dri2_surf->color_buffers[i].rendering_image); + dri2_dpy->image->destroyImage(dri2_surf->color_buffers[i].shared_image); + } } for (i = 0; i < __DRI_BUFFER_COUNT; i++) @@ -272,11 +274,14 @@ dri2_wl_release_buffers(struct dri2_egl_surface *dri2_surf) if (dri2_surf->color_buffers[i].wl_buffer && !dri2_surf->color_buffers[i].locked) wl_buffer_destroy(dri2_surf->color_buffers[i].wl_buffer); - if (dri2_surf->color_buffers[i].dri_image) - dri2_dpy->image->destroyImage(dri2_surf->color_buffers[i].dri_image); + if (dri2_surf->color_buffers[i].rendering_image) { + dri2_dpy->image->destroyImage(dri2_surf->color_buffers[i].rendering_image); + dri2_dpy->image->destroyImage(dri2_surf->color_buffers[i].shared_image); + } dri2_surf->color_buffers[i].wl_buffer = NULL; - dri2_surf->color_buffers[i].dri_image = NULL; + dri2_surf->color_buffers[i].rendering_image = NULL; + dri2_surf->color_buffers[i].shared_image = NULL; dri2_surf->color_buffers[i].locked = 0; } @@ -292,6 +297,7 @@ get_back_bo(struct dri2_egl_surface *dri2_surf) { struct dri2_egl_display *dri2_dpy = dri2_egl_display(dri2_surf->base.Resource.Display); + unsigned int use_flags; int i; /* We always want to throttle to some event (either a frame callback or @@ -311,24 +317,45 @@ get_back_bo(struct dri2_egl_surface *dri2_surf) continue; if (dri2_surf->back == NULL) dri2_surf->back = &dri2_surf->color_buffers[i]; - else if (dri2_surf->back->dri_image == NULL) + else if (dri2_surf->back->rendering_image == NULL) dri2_surf->back = &dri2_surf->color_buffers[i]; } } if (dri2_surf->back == NULL) return -1; - if (dri2_surf->back->dri_image == NULL) { - dri2_surf->back->dri_image = + + if (dri2_surf->back->rendering_image == NULL) { + use_flags = __DRI_IMAGE_USE_SHARE; + + if (dri2_dpy->is_different_gpu) + use_flags |= __DRI_IMAGE_USE_LINEAR; + + dri2_surf->back->shared_image = dri2_dpy->image->createImage(dri2_dpy->dri_screen, dri2_surf->base.Width, dri2_surf->base.Height, __DRI_IMAGE_FORMAT_ARGB, - __DRI_IMAGE_USE_SHARE, + use_flags, NULL); + if (dri2_surf->back->shared_image == NULL) + return -1; + + if (dri2_dpy->blit_front) + dri2_surf->back->rendering_image = +dri2_dpy->image->createImage(dri2_dpy->dri_screen, + dri2_surf->base.Width, + dri2_surf->base.Height, + __DRI_IMAGE_FORMAT_ARGB, +
[Mesa-dev] [PATCH 08/11] GLX/DRI3: Add GPU offloading support.
The differences with DRI2 GPU offloading are: . There's no logic for GPU offloading needed in the Xserver . for DRI2, the card would render to a back buffer, and the content would be copied to the front buffer (the same buffers everytime). Here we can potentially use several back buffers and copy to buffers with no tiling to share with X. We send them with the Present extension. That means than the DRI2 solution is forced to have tearings with GPU offloading. In the ideal scenario, this DRI3 solution doesn't have this problem. However without dma-buf fences, a race can appear (if the card is slow and the rendering hasn't finished before the server card reads the buffer), and then old content is displayed. If a user hits this, he should probably revert to the DRI2 solution (LIBGL_DRI3_DISABLE). Users with cards fast enough seem to not hit this in practice (I have an Amd hd 7730m, and I don't hit this, except if I force a low dpm mode) . for non-fullscreen apps, the DRI2 GPU offloading solution requires compositing. This DRI3 solution doesn't have this requirement. Rendering to a pixmap also works. . There is no need to have a DDX loaded for the secondary card. V4: Fixes some piglit tests Signed-off-by: Axel Davy --- src/glx/dri3_glx.c | 236 +++- src/glx/dri3_priv.h | 2 + 2 files changed, 198 insertions(+), 40 deletions(-) diff --git a/src/glx/dri3_glx.c b/src/glx/dri3_glx.c index b309cd4..f147112 100644 --- a/src/glx/dri3_glx.c +++ b/src/glx/dri3_glx.c @@ -596,22 +596,44 @@ dri3_copy_sub_buffer(__GLXDRIdrawable *pdraw, int x, int y, { struct dri3_drawable *priv = (struct dri3_drawable *) pdraw; struct dri3_screen *psc = (struct dri3_screen *) pdraw->psc; + struct dri3_context *pcp = (struct dri3_context *) __glXGetCurrentContext(); xcb_connection_t *c = XGetXCBConnection(priv->base.psc->dpy); - struct dri3_buffer *back = dri3_back_buffer(priv); + struct dri3_buffer *back; - unsigned flags; + unsigned flags = __DRI2_FLUSH_DRAWABLE; /* Check we have the right attachments */ if (!priv->have_back || priv->is_pixmap) return; - flags = __DRI2_FLUSH_DRAWABLE; if (flush) flags |= __DRI2_FLUSH_CONTEXT; dri3_flush(psc, priv, flags, __DRI2_THROTTLE_SWAPBUFFER); + back = dri3_back_buffer(priv); y = priv->height - y - height; + if (psc->is_different_gpu && (&pcp->base != &dummyContext) && pcp->base.psc == &psc->base) { + /* Update the linear buffer part of the back buffer + * for the dri3_copy_area operation + */ + psc->image->blitImage(pcp->driContext, +back->linear_buffer, +back->image, +0, 0, back->width, +back->height, +0, 0, back->width, +back->height, 1); + /* We use blitImage to update our fake front, + */ + if (priv->have_fake_front) + psc->image->blitImage(pcp->driContext, + dri3_fake_front_buffer(priv)->image, + back->image, + x, y, width, height, + x, y, width, height, 1); + } + dri3_fence_reset(c, back); dri3_copy_area(c, dri3_back_buffer(priv)->pixmap, @@ -622,7 +644,7 @@ dri3_copy_sub_buffer(__GLXDRIdrawable *pdraw, int x, int y, /* Refresh the fake front (if present) after we just damaged the real * front. */ - if (priv->have_fake_front) { + if (priv->have_fake_front && !psc->is_different_gpu) { dri3_fence_reset(c, dri3_fake_front_buffer(priv)); dri3_copy_area(c, dri3_back_buffer(priv)->pixmap, @@ -655,25 +677,62 @@ dri3_copy_drawable(struct dri3_drawable *priv, Drawable dest, Drawable src) static void dri3_wait_x(struct glx_context *gc) { + struct dri3_context *pcp = (struct dri3_context *) gc; struct dri3_drawable *priv = (struct dri3_drawable *) GetGLXDRIDrawable(gc->currentDpy, gc->currentDrawable); + struct dri3_screen *psc; + struct dri3_buffer *front; if (priv == NULL || !priv->have_fake_front) return; - dri3_copy_drawable(priv, dri3_fake_front_buffer(priv)->pixmap, priv->base.xDrawable); + psc = (struct dri3_screen *) priv->base.psc; + front = dri3_fake_front_buffer(priv); + + dri3_copy_drawable(priv, front->pixmap, priv->base.xDrawable); + + /* In the psc->is_different_gpu case, the linear buffer has been updated, +* but not yet the tiled buffer. +* Copy back to the tiled buffer we use for rendering. +* Note that we don't need flushing. +*/ + if (psc->is_different_gpu && (&pcp->base != &dummyContext) && pcp->base.psc == &psc->base) + psc->image->blitImage(pcp->driContext, +front->image, +front->linear_buffer, +
[Mesa-dev] [PATCH 11/11] nvc0: fix dri3 prime buffer creation
From: Dave Airlie We need to place shared buffers into GART. Signed-off-by: Dave Airlie Reviewed-by: Axel Davy --- src/gallium/drivers/nouveau/nvc0/nvc0_miptree.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_miptree.c b/src/gallium/drivers/nouveau/nvc0/nvc0_miptree.c index 79c9390..2f3cba8 100644 --- a/src/gallium/drivers/nouveau/nvc0/nvc0_miptree.c +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_miptree.c @@ -296,7 +296,7 @@ nvc0_miptree_create(struct pipe_screen *pscreen, } bo_config.nvc0.tile_mode = mt->level[0].tile_mode; - if (!bo_config.nvc0.memtype && pt->usage == PIPE_USAGE_STAGING) + if (!bo_config.nvc0.memtype && (pt->usage == PIPE_USAGE_STAGING || pt->bind & PIPE_BIND_SHARED)) mt->base.domain = NOUVEAU_BO_GART; else mt->base.domain = NOUVEAU_BO_VRAM; -- 1.9.1 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 07/11] Gallium/dri2: implement blitImage
V3: call flush_resource before flush V4: Add new flags Signed-off-by: Axel Davy --- src/gallium/state_trackers/dri/drm/dri2.c | 55 +-- 1 file changed, 52 insertions(+), 3 deletions(-) diff --git a/src/gallium/state_trackers/dri/drm/dri2.c b/src/gallium/state_trackers/dri/drm/dri2.c index 124d91b..9753ce0 100644 --- a/src/gallium/state_trackers/dri/drm/dri2.c +++ b/src/gallium/state_trackers/dri/drm/dri2.c @@ -1132,6 +1132,54 @@ dri2_from_dma_bufs(__DRIscreen *screen, } static void +dri2_blit_image(__DRIcontext *context, __DRIimage *dst, __DRIimage *src, +int dstx0, int dsty0, int dstwidth, int dstheight, +int srcx0, int srcy0, int srcwidth, int srcheight, +int flush_flag) +{ + struct dri_context *ctx = dri_context(context); + struct pipe_context *pipe = ctx->st->pipe; + struct pipe_screen *screen; + struct pipe_fence_handle *fence; + struct pipe_blit_info blit; + + if (!dst || !src) + return; + + memset(&blit, 0, sizeof(blit)); + blit.dst.resource = dst->texture; + blit.dst.box.x = dstx0; + blit.dst.box.y = dsty0; + blit.dst.box.width = dstwidth; + blit.dst.box.height = dstheight; + blit.dst.box.depth = 1; + blit.dst.format = dst->texture->format; + blit.src.resource = src->texture; + blit.src.box.x = srcx0; + blit.src.box.y = srcy0; + blit.src.box.width = srcwidth; + blit.src.box.height = srcheight; + blit.src.box.depth = 1; + blit.src.format = src->texture->format; + blit.mask = PIPE_MASK_RGBA; + blit.filter = PIPE_TEX_FILTER_NEAREST; + + pipe->blit(pipe, &blit); + + if (flush_flag == __BLIT_FLAG_FLUSH) { + pipe->flush_resource(pipe, dst->texture); + ctx->st->flush(ctx->st, 0, NULL); + } + else if (flush_flag == __BLIT_FLAG_FINISH) { + screen = dri_screen(ctx->sPriv)->base.screen; + pipe->flush_resource(pipe, dst->texture); + ctx->st->flush(ctx->st, 0, &fence); + (void) screen->fence_finish(screen, fence, PIPE_TIMEOUT_INFINITE); + screen->fence_reference(screen, &fence, NULL); + } +} + +static void dri2_destroy_image(__DRIimage *img) { pipe_resource_reference(&img->texture, NULL); @@ -1140,7 +1188,7 @@ dri2_destroy_image(__DRIimage *img) /* The extension is modified during runtime if DRI_PRIME is detected */ static __DRIimageExtension dri2ImageExtension = { -.base = { __DRI_IMAGE, 6 }, +.base = { __DRI_IMAGE, 9 }, .createImageFromName = dri2_create_image_from_name, .createImageFromRenderbuffer = dri2_create_image_from_renderbuffer, @@ -1152,6 +1200,9 @@ static __DRIimageExtension dri2ImageExtension = { .createImageFromNames = dri2_from_names, .fromPlanar = dri2_from_planar, .createImageFromTexture = dri2_create_from_texture, +.createImageFromFds = NULL, +.createImageFromDmaBufs = NULL, +.blitImage= dri2_blit_image, }; /* @@ -1206,8 +1257,6 @@ dri2_init_screen(__DRIscreen * sPriv) if (drmGetCap(sPriv->fd, DRM_CAP_PRIME, &cap) == 0 && (cap & DRM_PRIME_CAP_IMPORT)) { - - dri2ImageExtension.base.version = 8; dri2ImageExtension.createImageFromFds = dri2_from_fds; dri2ImageExtension.createImageFromDmaBufs = dri2_from_dma_bufs; } -- 1.9.1 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 05/11] loader: Use drirc device_id parameter in complement to DRI_PRIME
DRI_PRIME is not very handy, because you have to launch the executable with it set, which is not always easy to do. By using drirc, the user specifies the target executable and the device to use. After that the program will be launched everytime on the target device. For example if .drirc contains: Then glmark2 will use if possible the render-node of ID_PATH_TAG pci-_01_00_0. v2: Fix compilation issue v3: Add "-lm" and rebase. Signed-off-by: Axel Davy --- src/Makefile.am | 4 +++- src/loader/Makefile.am | 23 ++--- src/loader/loader.c | 27 + src/mesa/drivers/dri/common/xmlconfig.h | 2 ++ src/mesa/drivers/dri/common/xmlpool/t_options.h | 14 + 5 files changed, 66 insertions(+), 4 deletions(-) diff --git a/src/Makefile.am b/src/Makefile.am index 9d1580f..d4a7090 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -19,12 +19,14 @@ # FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS # IN THE SOFTWARE. -SUBDIRS = gtest loader mapi +SUBDIRS = gtest mapi if NEED_OPENGL_COMMON SUBDIRS += glsl mesa endif +SUBDIRS += loader + if HAVE_DRI_GLX SUBDIRS += glx endif diff --git a/src/loader/Makefile.am b/src/loader/Makefile.am index bddf7ac..ae8a844 100644 --- a/src/loader/Makefile.am +++ b/src/loader/Makefile.am @@ -29,6 +29,25 @@ libloader_la_CPPFLAGS = \ $(VISIBILITY_CFLAGS) \ $(LIBUDEV_CFLAGS) +libloader_la_SOURCES = $(LOADER_C_FILES) +libloader_la_LIBADD = + +if NEED_OPENGL_COMMON +libloader_la_CPPFLAGS += \ + -I$(top_srcdir)/src/mesa/drivers/dri/common/ \ + -I$(top_builddir)/src/mesa/drivers/dri/common/ \ + -I$(top_srcdir)/src/mesa/ \ + -I$(top_srcdir)/src/mapi/ \ + -DUSE_DRICONF + +libloader_la_SOURCES += \ + $(top_srcdir)/src/mesa/drivers/dri/common/xmlconfig.c + +libloader_la_LIBADD += \ + -lm \ + $(EXPAT_LIBS) +endif + if !HAVE_LIBDRM libloader_la_CPPFLAGS += \ -D__NOT_HAVE_DRM_H @@ -36,8 +55,6 @@ else libloader_la_CPPFLAGS += \ $(LIBDRM_CFLAGS) -libloader_la_LIBADD = \ +libloader_la_LIBADD += \ $(LIBDRM_LIBS) endif - -libloader_la_SOURCES = $(LOADER_C_FILES) diff --git a/src/loader/loader.c b/src/loader/loader.c index 19d99d5..47e1f58 100644 --- a/src/loader/loader.c +++ b/src/loader/loader.c @@ -74,6 +74,10 @@ #include #include #include +#ifdef USE_DRICONF +#include "xmlconfig.h" +#include "xmlpool.h" +#endif #endif #ifdef HAVE_SYSFS #include @@ -323,9 +327,22 @@ drm_open_device(const char *device_name) return fd; } +#ifdef USE_DRICONF +const char __driConfigOptionsLoader[] = +DRI_CONF_BEGIN +DRI_CONF_SECTION_INITIALIZATION +DRI_CONF_DEVICE_ID_PATH_TAG() +DRI_CONF_SECTION_END +DRI_CONF_END; +#endif + int loader_get_user_preferred_fd(int default_fd, int *different_device) { struct udev *udev; +#ifdef USE_DRICONF + driOptionCache defaultInitOptions; + driOptionCache userInitOptions; +#endif const char *dri_prime = getenv("DRI_PRIME"); char *prime = NULL; int is_different_device = 0, fd = default_fd; @@ -337,6 +354,16 @@ int loader_get_user_preferred_fd(int default_fd, int *different_device) if (dri_prime) prime = strdup(dri_prime); +#ifdef USE_DRICONF + else { + driParseOptionInfo(&defaultInitOptions, __driConfigOptionsLoader); + driParseConfigFiles(&userInitOptions, &defaultInitOptions, 0, "loader"); + if (driCheckOption(&userInitOptions, "device_id", DRI_STRING)) + prime = strdup(driQueryOptionstr(&userInitOptions, "device_id")); + driDestroyOptionCache(&userInitOptions); + driDestroyOptionInfo(&defaultInitOptions); + } +#endif if (prime == NULL) { *different_device = 0; diff --git a/src/mesa/drivers/dri/common/xmlconfig.h b/src/mesa/drivers/dri/common/xmlconfig.h index 786caae..a4daa6b 100644 --- a/src/mesa/drivers/dri/common/xmlconfig.h +++ b/src/mesa/drivers/dri/common/xmlconfig.h @@ -30,6 +30,8 @@ #ifndef __XMLCONFIG_H #define __XMLCONFIG_H +#include + #define STRING_CONF_MAXLEN 25 /** \brief Option data types */ diff --git a/src/mesa/drivers/dri/common/xmlpool/t_options.h b/src/mesa/drivers/dri/common/xmlpool/t_options.h index 3bf804a..fc9e104 100644 --- a/src/mesa/drivers/dri/common/xmlpool/t_options.h +++ b/src/mesa/drivers/dri/common/xmlpool/t_options.h @@ -321,3 +321,17 @@ DRI_CONF_SECTION_BEGIN \ DRI_CONF_OPT_BEGIN_B(always_have_depth_buffer, def) \ DRI_CONF_DESC(en,gettext("Create all visuals with a depth buffer")) \ DRI_CONF_OPT_END + + + +/** + * \brief Initialization configuration options + */ +#define DRI_CONF_SECTION_INITIALIZATION \ +DRI_CONF_SECTION_BEGIN \ +DRI_CONF_DESC(en,gettext("Initialization")) + +#define DRI_CONF_DEVICE_ID_PATH_TAG(def) \ +DRI_CONF_OPT_BEGIN(device_id, string, def) \ +DRI_CONF
[Mesa-dev] [PATCH 02/11] Makes DRI3 use invalidate.
This doesn't change anything to the intel DRI3 implementation, but enables the gallium implementation to use dri2.stamp instead of relying on the stamp shared with the st backend. Signed-off-by: Axel Davy --- src/glx/dri3_glx.c | 7 +++ 1 file changed, 7 insertions(+) diff --git a/src/glx/dri3_glx.c b/src/glx/dri3_glx.c index 55eed39..b309cd4 100644 --- a/src/glx/dri3_glx.c +++ b/src/glx/dri3_glx.c @@ -1308,9 +1308,14 @@ static const __DRIimageLoaderExtension imageLoaderExtension = { .flushFrontBuffer= dri3_flush_front_buffer, }; +const __DRIuseInvalidateExtension dri3UseInvalidate = { + .base = { __DRI_USE_INVALIDATE, 1 } +}; + static const __DRIextension *loader_extensions[] = { &imageLoaderExtension.base, &systemTimeExtension.base, + &dri3UseInvalidate.base, NULL }; @@ -1384,6 +1389,8 @@ dri3_swap_buffers(__GLXDRIdrawable *pdraw, int64_t target_msc, int64_t divisor, ++(*priv->stamp); } + (*psc->f->invalidate)(priv->driDrawable); + return ret; } -- 1.9.1 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 04/11] drirc: Add string support
Signed-off-by: Axel Davy --- src/mesa/drivers/dri/common/xmlconfig.c | 29 + src/mesa/drivers/dri/common/xmlconfig.h | 7 ++- 2 files changed, 35 insertions(+), 1 deletion(-) diff --git a/src/mesa/drivers/dri/common/xmlconfig.c b/src/mesa/drivers/dri/common/xmlconfig.c index b95e452..d41d2b2 100644 --- a/src/mesa/drivers/dri/common/xmlconfig.c +++ b/src/mesa/drivers/dri/common/xmlconfig.c @@ -311,6 +311,11 @@ static GLboolean parseValue (driOptionValue *v, driOptionType type, case DRI_FLOAT: v->_float = strToF (string, &tail); break; + case DRI_STRING: + if (v->_string) + free (v->_string); + v->_string = strndup(string, STRING_CONF_MAXLEN); + return GL_TRUE; } if (tail == string) @@ -404,6 +409,8 @@ static GLboolean checkValue (const driOptionValue *v, const driOptionInfo *info) v->_float <= info->ranges[i].end._float) return GL_TRUE; break; + case DRI_STRING: + break; default: assert (0); /* should never happen */ } @@ -567,6 +574,8 @@ static void parseOptInfoAttr (struct OptInfoData *data, const XML_Char **attr) { cache->info[opt].type = DRI_INT; else if (!strcmp (attrVal[OA_TYPE], "float")) cache->info[opt].type = DRI_FLOAT; +else if (!strcmp (attrVal[OA_TYPE], "string")) + cache->info[opt].type = DRI_STRING; else XML_FATAL ("illegal type in option: %s.", attrVal[OA_TYPE]); @@ -867,6 +876,7 @@ static void optConfEndElem (void *userData, const XML_Char *name) { /** \brief Initialize an option cache based on info */ static void initOptionCache (driOptionCache *cache, const driOptionCache *info) { +GLuint i, size = 1 << info->tableSize; cache->info = info->info; cache->tableSize = info->tableSize; cache->values = malloc((1values, info->values, (1 info[i].type == DRI_STRING) + XSTRDUP(cache->values[i]._string, info->values[i]._string); +} } /** \brief Parse the named configuration file */ @@ -981,6 +995,13 @@ void driDestroyOptionInfo (driOptionCache *info) { } void driDestroyOptionCache (driOptionCache *cache) { +if (cache->info) { + GLuint i, size = 1 << cache->tableSize; + for (i = 0; i < size; ++i) { + if (cache->info[i].type == DRI_STRING) + free(cache->values[i]._string); + } +} free(cache->values); } @@ -1013,3 +1034,11 @@ GLfloat driQueryOptionf (const driOptionCache *cache, const char *name) { assert (cache->info[i].type == DRI_FLOAT); return cache->values[i]._float; } + +char *driQueryOptionstr (const driOptionCache *cache, const char *name) { +GLuint i = findOption (cache, name); + /* make sure the option is defined and has the correct type */ +assert (cache->info[i].name != NULL); +assert (cache->info[i].type == DRI_STRING); +return cache->values[i]._string; +} diff --git a/src/mesa/drivers/dri/common/xmlconfig.h b/src/mesa/drivers/dri/common/xmlconfig.h index d0ad42c..786caae 100644 --- a/src/mesa/drivers/dri/common/xmlconfig.h +++ b/src/mesa/drivers/dri/common/xmlconfig.h @@ -30,9 +30,11 @@ #ifndef __XMLCONFIG_H #define __XMLCONFIG_H +#define STRING_CONF_MAXLEN 25 + /** \brief Option data types */ typedef enum driOptionType { -DRI_BOOL, DRI_ENUM, DRI_INT, DRI_FLOAT +DRI_BOOL, DRI_ENUM, DRI_INT, DRI_FLOAT, DRI_STRING } driOptionType; /** \brief Option value */ @@ -40,6 +42,7 @@ typedef union driOptionValue { GLboolean _bool; /**< \brief Boolean */ GLint _int; /**< \brief Integer or Enum */ GLfloat _float; /**< \brief Floating-point */ +char *_string; /**< \brief String */ } driOptionValue; /** \brief Single range of valid values @@ -118,5 +121,7 @@ GLboolean driQueryOptionb (const driOptionCache *cache, const char *name); GLint driQueryOptioni (const driOptionCache *cache, const char *name); /** \brief Query a floating-point option value */ GLfloat driQueryOptionf (const driOptionCache *cache, const char *name); +/** \brief Query a string option value */ +char *driQueryOptionstr (const driOptionCache *cache, const char *name); #endif -- 1.9.1 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 10/11] Radeonsi: Use dma_copy when possible for si_blit.
This improves GLX DRI3 GPU offloading significantly on CPU bound benchmarks particularly. No performance impact for DRI2 GPU offloading. v2: Add missing tests Signed-off-by: Axel Davy Reviewed-by: Marek Olšák --- src/gallium/drivers/radeonsi/si_blit.c | 19 +++ 1 file changed, 19 insertions(+) diff --git a/src/gallium/drivers/radeonsi/si_blit.c b/src/gallium/drivers/radeonsi/si_blit.c index 8c3e136..6162dfa 100644 --- a/src/gallium/drivers/radeonsi/si_blit.c +++ b/src/gallium/drivers/radeonsi/si_blit.c @@ -730,6 +730,25 @@ static void si_blit(struct pipe_context *ctx, return; } + if (info->src.box.width == info->dst.box.width && + info->src.box.height == info->dst.box.height && + info->src.format == info->dst.format && + info->src.box.width > 0 && + info->src.box.height > 0 && + info->src.resource->nr_samples <= 1 && + info->dst.resource->nr_samples <= 1 && + info->src.box.depth == info->dst.box.depth && + info->mask == PIPE_MASK_RGBA && + !info->scissor_enable && + (!info->render_condition_enable || +!sctx->b.current_render_cond)) { + sctx->b.dma_copy(ctx, info->dst.resource, info->dst.level, +info->dst.box.x, info->dst.box.y, +info->dst.box.z, info->src.resource, +info->src.level, &(info->src.box)); + return; + } + assert(util_blitter_is_blit_supported(sctx->blitter, info)); /* The driver doesn't decompress resources automatically while -- 1.9.1 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 03/11] Loader: Add gpu selection code via DRI_PRIME.
v2: Fix the leak of device_name v3: Rebased It enables to use the DRI_PRIME env var to specify which gpu to use. Two syntax are supported: If DRI_PRIME is 1 it means: take any other gpu than the default one. If DRI_PRIME is the ID_PATH_TAG of a device: choose this device if possible. The ID_PATH_TAG is a tag filled by udev. You can check it with 'udevadm info' on the device node. For example it can be "pci-_01_00_0". Render-nodes need to be enabled to choose another gpu, and they need to have the ID_PATH_TAG advertised. It is possible for not very recent udev that the tag is not advertised for render-nodes, then ones need to add a file containing: SUBSYSTEM=="drm", IMPORT{builtin}="path_id" in /etc/udev/rules.d/ Signed-off-by: Axel Davy --- src/loader/loader.c | 185 src/loader/loader.h | 7 ++ 2 files changed, 192 insertions(+) diff --git a/src/loader/loader.c b/src/loader/loader.c index 0f26265..19d99d5 100644 --- a/src/loader/loader.c +++ b/src/loader/loader.c @@ -70,6 +70,10 @@ #ifdef HAVE_LIBUDEV #include #include +#include +#include +#include +#include #endif #ifdef HAVE_SYSFS #include @@ -214,6 +218,187 @@ out: return (*chip_id >= 0); } + +static char * +get_render_node_from_id_path_tag(struct udev *udev, + char *id_path_tag, + char another_tag) +{ + struct udev_device *device; + struct udev_enumerate *e; + struct udev_list_entry *entry; + const char *path, *id_path_tag_tmp; + char *path_res; + char found = 0; + UDEV_SYMBOL(struct udev_enumerate *, udev_enumerate_new, + (struct udev *)); + UDEV_SYMBOL(int, udev_enumerate_add_match_subsystem, + (struct udev_enumerate *, const char *)); + UDEV_SYMBOL(int, udev_enumerate_add_match_sysname, + (struct udev_enumerate *, const char *)); + UDEV_SYMBOL(int, udev_enumerate_scan_devices, + (struct udev_enumerate *)); + UDEV_SYMBOL(struct udev_list_entry *, udev_enumerate_get_list_entry, + (struct udev_enumerate *)); + UDEV_SYMBOL(struct udev_list_entry *, udev_list_entry_get_next, + (struct udev_list_entry *)); + UDEV_SYMBOL(const char *, udev_list_entry_get_name, + (struct udev_list_entry *)); + UDEV_SYMBOL(struct udev_device *, udev_device_new_from_syspath, + (struct udev *, const char *)); + UDEV_SYMBOL(const char *, udev_device_get_property_value, + (struct udev_device *, const char *)); + UDEV_SYMBOL(const char *, udev_device_get_devnode, + (struct udev_device *)); + UDEV_SYMBOL(struct udev_device *, udev_device_unref, + (struct udev_device *)); + + e = udev_enumerate_new(udev); + udev_enumerate_add_match_subsystem(e, "drm"); + udev_enumerate_add_match_sysname(e, "render*"); + + udev_enumerate_scan_devices(e); + udev_list_entry_foreach(entry, udev_enumerate_get_list_entry(e)) { + path = udev_list_entry_get_name(entry); + device = udev_device_new_from_syspath(udev, path); + if (!device) + continue; + id_path_tag_tmp = udev_device_get_property_value(device, "ID_PATH_TAG"); + if (id_path_tag_tmp) { + if ((!another_tag && !strcmp(id_path_tag, id_path_tag_tmp)) || + (another_tag && strcmp(id_path_tag, id_path_tag_tmp))) { +found = 1; +break; + } + } + udev_device_unref(device); + } + + if (found) { + path_res = strdup(udev_device_get_devnode(device)); + udev_device_unref(device); + return path_res; + } + return NULL; +} + +static char * +get_id_path_tag_from_fd(struct udev *udev, int fd) +{ + struct udev_device *device; + const char *id_path_tag_tmp; + char *id_path_tag; + UDEV_SYMBOL(const char *, udev_device_get_property_value, + (struct udev_device *, const char *)); + UDEV_SYMBOL(struct udev_device *, udev_device_unref, + (struct udev_device *)); + + device = udev_device_new_from_fd(udev, fd); + if (!device) + return NULL; + + id_path_tag_tmp = udev_device_get_property_value(device, "ID_PATH_TAG"); + if (!id_path_tag_tmp) + return NULL; + + id_path_tag = strdup(id_path_tag_tmp); + + udev_device_unref(device); + return id_path_tag; +} + +static int +drm_open_device(const char *device_name) +{ + int fd; +#ifdef O_CLOEXEC + fd = open(device_name, O_RDWR | O_CLOEXEC); + if (fd == -1 && errno == EINVAL) +#endif + { + fd = open(device_name, O_RDWR); + if (fd != -1) + fcntl(fd, F_SETFD, fcntl(fd, F_GETFD) | FD_CLOEXEC); + } + return fd; +} + +int loader_get_user_preferred_fd(int default_fd, int *different_device) +{ + struct udev *udev; + const char *dri_prime = getenv("DRI_PRIME"); + char *prime = NULL; + int is_different_device = 0, fd = default_fd; + char *default_device_id_path_tag; + char *device_
[Mesa-dev] [PATCH 06/11] DRIimage: add blitImage to the specification
It allows to blit two __DRIimages. Signed-off-by: Axel Davy --- include/GL/internal/dri_interface.h | 24 +++- 1 file changed, 23 insertions(+), 1 deletion(-) diff --git a/include/GL/internal/dri_interface.h b/include/GL/internal/dri_interface.h index 4d57d0b..300156e 100644 --- a/include/GL/internal/dri_interface.h +++ b/include/GL/internal/dri_interface.h @@ -1005,7 +1005,7 @@ struct __DRIdri2ExtensionRec { * extensions. */ #define __DRI_IMAGE "DRI_IMAGE" -#define __DRI_IMAGE_VERSION 8 +#define __DRI_IMAGE_VERSION 9 /** * These formats correspond to the similarly named MESA_FORMAT_* @@ -1133,6 +1133,13 @@ enum __DRIChromaSiting { #define __DRI_IMAGE_ERROR_BAD_PARAMETER 3 /*@}*/ +/** + * blitImage flags + */ + +#define __BLIT_FLAG_FLUSH 0x0001 +#define __BLIT_FLAG_FINISH 0x0002 + typedef struct __DRIimageRec __DRIimage; typedef struct __DRIimageExtensionRec __DRIimageExtension; struct __DRIimageExtensionRec { @@ -1239,6 +1246,21 @@ struct __DRIimageExtensionRec { enum __DRIChromaSiting vert_siting, unsigned *error, void *loaderPrivate); + + /** +* Blit a part of a __DRIimage to another and flushes +* +* flush_flag: +*0: no flush +*__BLIT_FLAG_FLUSH: flush after the blit operation +*__BLIT_FLAG_FINISH: flush and wait the blit finished +* +* \since 9 +*/ + void (*blitImage)(__DRIcontext *context, __DRIimage *dst, __DRIimage *src, + int dstx0, int dsty0, int dstwidth, int dstheight, + int srcx0, int srcy0, int srcwidth, int srcheight, + int flush_flag); }; -- 1.9.1 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 01/11] gallium: Add __DRIimageDriverExtension support to gallium
__DRIimageDriverExtension is used by GLX DRI3 and Wayland. This patch is a rewrite of http://lists.freedesktop.org/archives/mesa-dev/2014-May/060318.html and http://lists.freedesktop.org/archives/mesa-dev/2014-May/060317.html Signed-off-by: Axel Davy Reviewed-by: Marek Olšák Previous patches were: Signed-off-by: Ben Skeggs Signed-off-by: Keith Packard --- src/gallium/state_trackers/dri/drm/dri2.c | 469 ++ 1 file changed, 286 insertions(+), 183 deletions(-) diff --git a/src/gallium/state_trackers/dri/drm/dri2.c b/src/gallium/state_trackers/dri/drm/dri2.c index 7dccc5e..124d91b 100644 --- a/src/gallium/state_trackers/dri/drm/dri2.c +++ b/src/gallium/state_trackers/dri/drm/dri2.c @@ -201,32 +201,192 @@ dri2_drawable_get_buffers(struct dri_drawable *drawable, return buffers; } -/** - * Process __DRIbuffer and convert them into pipe_resources. +static bool +dri_image_drawable_get_buffers(struct dri_drawable *drawable, + struct __DRIimageList *images, + const enum st_attachment_type *statts, + unsigned statts_count) +{ + __DRIdrawable *dPriv = drawable->dPriv; + __DRIscreen *sPriv = drawable->sPriv; + unsigned int image_format = __DRI_IMAGE_FORMAT_NONE; + enum pipe_format pf; + uint32_t buffer_mask = 0; + unsigned i, bind; + + for (i = 0; i < statts_count; i++) { + dri_drawable_get_format(drawable, statts[i], &pf, &bind); + if (pf == PIPE_FORMAT_NONE) + continue; + + switch (statts[i]) { + case ST_ATTACHMENT_FRONT_LEFT: + buffer_mask |= __DRI_IMAGE_BUFFER_FRONT; + break; + case ST_ATTACHMENT_BACK_LEFT: + buffer_mask |= __DRI_IMAGE_BUFFER_BACK; + break; + default: + continue; + } + + switch (pf) { + case PIPE_FORMAT_B5G6R5_UNORM: + image_format = __DRI_IMAGE_FORMAT_RGB565; + break; + case PIPE_FORMAT_B8G8R8X8_UNORM: + image_format = __DRI_IMAGE_FORMAT_XRGB; + break; + case PIPE_FORMAT_B8G8R8A8_UNORM: + image_format = __DRI_IMAGE_FORMAT_ARGB; + break; + case PIPE_FORMAT_R8G8B8A8_UNORM: + image_format = __DRI_IMAGE_FORMAT_ABGR; + break; + default: + image_format = __DRI_IMAGE_FORMAT_NONE; + break; + } + } + + return (*sPriv->image.loader->getBuffers) (dPriv, image_format, + (uint32_t *) &drawable->base.stamp, + dPriv->loaderPrivate, buffer_mask, + images); +} + +static __DRIbuffer * +dri2_allocate_buffer(__DRIscreen *sPriv, + unsigned attachment, unsigned format, + int width, int height) +{ + struct dri_screen *screen = dri_screen(sPriv); + struct dri2_buffer *buffer; + struct pipe_resource templ; + enum pipe_format pf; + unsigned bind = 0; + struct winsys_handle whandle; + + switch (attachment) { + case __DRI_BUFFER_FRONT_LEFT: + case __DRI_BUFFER_FAKE_FRONT_LEFT: + bind = PIPE_BIND_RENDER_TARGET | PIPE_BIND_SAMPLER_VIEW; + break; + case __DRI_BUFFER_BACK_LEFT: + bind = PIPE_BIND_RENDER_TARGET | PIPE_BIND_SAMPLER_VIEW; + break; + case __DRI_BUFFER_DEPTH: + case __DRI_BUFFER_DEPTH_STENCIL: + case __DRI_BUFFER_STENCIL: +bind = PIPE_BIND_DEPTH_STENCIL; /* XXX sampler? */ + break; + } + + /* because we get the handle and stride */ + bind |= PIPE_BIND_SHARED; + + switch (format) { + case 32: + pf = PIPE_FORMAT_B8G8R8A8_UNORM; + break; + case 24: + pf = PIPE_FORMAT_B8G8R8X8_UNORM; + break; + case 16: + pf = PIPE_FORMAT_Z16_UNORM; + break; + default: + return NULL; + } + + buffer = CALLOC_STRUCT(dri2_buffer); + if (!buffer) + return NULL; + + memset(&templ, 0, sizeof(templ)); + templ.bind = bind; + templ.format = pf; + templ.target = PIPE_TEXTURE_2D; + templ.last_level = 0; + templ.width0 = width; + templ.height0 = height; + templ.depth0 = 1; + templ.array_size = 1; + + buffer->resource = + screen->base.screen->resource_create(screen->base.screen, &templ); + if (!buffer->resource) { + FREE(buffer); + return NULL; + } + + memset(&whandle, 0, sizeof(whandle)); + whandle.type = DRM_API_HANDLE_TYPE_SHARED; + screen->base.screen->resource_get_handle(screen->base.screen, + buffer->resource, &whandle); + + buffer->base.attachment = attachment; + buffer->base.name = whandle.handle; + buffer->base.cpp = util_format_get_blocksize(pf); + buffer->base.pitch = whandle.stride; + + return &buffer->base; +} + +static void +dri2_release_buffer(__DRIscreen *sPriv, __DRIbuffer *bPriv) +{ + struct dri2_buffer *buffer = dri2_buffer(bPriv); + + pipe_resource_reference(&buffer->resou
Re: [Mesa-dev] [ Open source project]
Thanks that was helpful.! On Tue, Jun 17, 2014 at 12:35 PM, Thomas Helland wrote: > Hi Roshan, > > I've just gotten into the game myself, so I guess i can provide some > newbie-info. > I guess by "the list" you are referring to the suggestions for GSoC > projects posted. > I think the application date for GSoC is long passed (so no pay :/ ), > but I don't think anyone will bash you for taking on the work if you want > to. > Just in case you haven't seen there's also the newbie-projects page[1]. > There might be some smaller interesting projects there to. > These are not to difficult, and will get you accustomed to > the community, review-process, submitting patches, etc > > I would love to be proven wrong, but I think the best source of > documentation is > poking around in the source code. There's also some documentation at [2]. > There's also a readme-file in the glsl-directory explaining a bit the IR / > compiler. > > The shader-compiler does a lot of "optimization passes" over the supplied > shaders, > trying to do things like dead-code elimination, cse, algebraic > optimizations, etc. > As of now I think (but I'm not sure) this is done more or less randomly. > Try a bunch of them multiple times, until there's no proof that we're > getting further. > > The static ordering part you are referring to i think is basically the > following: > Find a defined sequence that you can perform these passes > (maybe repeat some of them multiple times), a defined number of times, that > when completed yields a result that is as good as today's solution; > 1. Less or equal amount of instructions > 2. Less or equal time to compile shaders. > > Let's say, as an example, dead-code elimination to get rid of some cruft, > then do some algebraic optimizations, then some constant propagation, > followed by more dead-code elimination, then cse, then repeat everything 2 > times. > Basically trying out a lot of sequences, and testing the results. > > For testing you should use a toolkit to verify your results. > This can be done by compiling mesa with debug-symbols, and using shader-db > [3]. > Shader-db does a compilation of a boatload of shaders, and reports time > spent > compiling them along with the end instruction count for each. > Obviously startup-times and FPS in shader-heavy games like Dota2 > are also always a nice addition. > > I will not guarantee that what I've written here is 100% correct, and I'm > not really in a position to give any more details than that, as I'm really > new to the game myself, and just getting my feet wet with some > code-cleanups. > > [1] http://wiki.freedesktop.org/dri/NewbieProjects/ > [2] http://mesa3d.org/ > [3] http://cgit.freedesktop.org/~anholt/shader-db > > regard, > Thomas > ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 4/5] gallium: Add PIPE_SHADER_CAP_DOUBLES
Am 19.06.2014 03:14, schrieb Dave Airlie: > On 18 June 2014 23:50, Roland Scheidegger wrote: >> Am 18.06.2014 01:54, schrieb Dave Airlie: >>> On 18 June 2014 05:08, Roland Scheidegger wrote: This looks ok to me though since tgsi currently doesn't have any double opcodes (well the docs have them...) it doesn't really apply to most drivers (at least I assume you don't want to add support for it for tgsi). >>> >>> I've mostly forward ported the old gallium double code, and have >>> written most of ARB_gpu_shader_fp64 on top, >>> >>> Though the question I did want to ask Tom is if he is just going to >>> expose hw that has doubles, or does >>> he plan on emulating doubles. >>> >>> For a lot of GLSL4.0 GPUs from AMD fglrx emulates doubles using >>> massive magic shaders, I'm unsure >>> if we should have a lowering pass above/below the TGSI line for these >>> types of situations and what that >>> would mean for this CAP. >> >> Oh that's interesting. I always thought drivers didn't emulate that, and >> if apps want doubles but the device doesn't provide them it needs to do >> that itself. For which chips does fglrx do that? > > Quite a lot of the evergreen family, only CAYMAN and CYPRESS seem > to have native FP64 support in the hw according to the version of the > AMD shader compiler I'm using, all other VLIW4/5 chips seem to emulate > fp64. I assume so they could advertise GL 4.0/SM5. They also expose > the fp64 extension on rv670, rv790, rv770 and rv740 gpus. Oh I was mistakenly thinking it's optional (that is an extension) for GL 4.0 (is is optional for SM5). That explains why they'd emulate it indeed... > >> If you'd want to emulate this, the other question is if you can do it at >> the tgsi level, or if this was exploiting some hw specific bits (well of >> course you could still do it at tgsi level, but if the hw has some bits >> to make this easier, then this isn't efficient). In any case I guess >> this could be decided later. > > Yeah I'm not sure where would be the best place to lower it, doing it at > the GLSL level might be more generic, but I'm not really sure what algorithm > fglrx uses to do it and so if it takes advantage of other hw features to help. > > Dave > I guess if the hw really doesn't have anything to help with a generic translation is probably fine (though of course with vliw maybe you could do better in the driver even if the hw doesn't have anything special for it). Roland ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH] gallivm: set mcpu when initializing llvm execution engine
From: Roland Scheidegger Previously llvm detected cpu features automatically when the execution engine was created (based on host cpu). This is no longer the case, which meant llvm was then not able to emit some of the intrinsics we used as we didn't specify any sse attributes (only on avx supporting systems this was not a problem since despite at least some llvm versions enabling it anyway we always set this manually). So, instead of trying to figure out which MAttrs to set just set MCPU. This fixes https://bugs.freedesktop.org/show_bug.cgi?id=77493. --- src/gallium/auxiliary/gallivm/lp_bld_misc.cpp | 22 -- 1 file changed, 20 insertions(+), 2 deletions(-) diff --git a/src/gallium/auxiliary/gallivm/lp_bld_misc.cpp b/src/gallium/auxiliary/gallivm/lp_bld_misc.cpp index 38fbe1f..6bea964 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_misc.cpp +++ b/src/gallium/auxiliary/gallivm/lp_bld_misc.cpp @@ -468,8 +468,8 @@ lp_build_create_jit_compiler_for_module(LLVMExecutionEngineRef *OutJIT, /* * AVX feature is not automatically detected from CPUID by the X86 target * yet, because the old (yet default) JIT engine is not capable of - * emitting the opcodes. But as we're using MCJIT here, it is safe to - * add set this attribute. + * emitting the opcodes. On newer llvm versions it is and at least some + * versions (tested with 3.3) will emit avx opcodes without this anyway. */ MAttrs.push_back("+avx"); if (util_cpu_caps.has_f16c) { @@ -478,12 +478,30 @@ lp_build_create_jit_compiler_for_module(LLVMExecutionEngineRef *OutJIT, builder.setMAttrs(MAttrs); } +#if HAVE_LLVM >= 0x0305 + StringRef MCPU = llvm::sys::getHostCPUName(); + /* +* The cpu bits are no longer set automatically, so need to set mcpu manually. +* Note that the MAttrs set above will be sort of ignored (since we should +* not set any which would not be set by specifying the cpu anyway). +* It ought to be safe though since getHostCPUName() should include bits +* not only from the cpu but environment as well (for instance if it's safe +* to use avx instructions which need OS support). According to +* http://llvm.org/bugs/show_bug.cgi?id=19429 however if I understand this +* right it may be necessary to specify older cpu (or disable mattrs) though +* when not using MCJIT so no instructions are generated which the old JIT +* can't handle. Not entirely sure if we really need to do anything yet. +*/ + builder.setMCPU(MCPU); +#endif + ShaderMemoryManager *MM = new ShaderMemoryManager(); *OutCode = MM->getGeneratedCode(); builder.setJITMemoryManager(MM); ExecutionEngine *JIT; + #if HAVE_LLVM >= 0x0302 JIT = builder.create(); #else -- 1.9.1 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 11/11] clover: Use PIPE_TRANSFER_MAP_DIRECTLY when writing/reading buffers
On 19.06.2014 00:02, Bruno Jiménez wrote: > Note: This is just a proof of concept. > --- > src/gallium/state_trackers/clover/api/transfer.cpp | 4 ++-- > src/gallium/state_trackers/clover/core/object.hpp | 4 > src/gallium/state_trackers/clover/core/resource.cpp | 2 ++ > 3 files changed, 8 insertions(+), 2 deletions(-) > > diff --git a/src/gallium/state_trackers/clover/api/transfer.cpp > b/src/gallium/state_trackers/clover/api/transfer.cpp > index 404ceb0..f34ae8b 100644 > --- a/src/gallium/state_trackers/clover/api/transfer.cpp > +++ b/src/gallium/state_trackers/clover/api/transfer.cpp > @@ -174,8 +174,8 @@ namespace { >static mapping >get(command_queue &q, T obj, cl_map_flags flags, >size_t offset, size_t size) { > - return { q, obj->resource(q), flags, true, > - {{ offset }}, {{ size, 1, 1 }} }; > + return { q, obj->resource(q), flags | CLOVER_TRANSFER_MAP_DIRECTLY, > + true, {{ offset }}, {{ size, 1, 1 }} }; >} > }; > > diff --git a/src/gallium/state_trackers/clover/core/object.hpp > b/src/gallium/state_trackers/clover/core/object.hpp > index 697565c..7d5adf9 100644 > --- a/src/gallium/state_trackers/clover/core/object.hpp > +++ b/src/gallium/state_trackers/clover/core/object.hpp > @@ -33,6 +33,10 @@ > #include "core/property.hpp" > #include "api/dispatch.hpp" > > +#ifndef CLOVER_TRANSFER_MAP_DIRECTLY > +#define CLOVER_TRANSFER_MAP_DIRECTLY (1<<8) > +#endif > + > /// > /// Main namespace of the CL state tracker. > /// > diff --git a/src/gallium/state_trackers/clover/core/resource.cpp > b/src/gallium/state_trackers/clover/core/resource.cpp > index 7b8a40a..c8e97db 100644 > --- a/src/gallium/state_trackers/clover/core/resource.cpp > +++ b/src/gallium/state_trackers/clover/core/resource.cpp > @@ -174,6 +174,8 @@ mapping::mapping(command_queue &q, resource &r, > pctx(q.pipe) { > unsigned usage = ((flags & CL_MAP_WRITE ? PIPE_TRANSFER_WRITE : 0 ) | > (flags & CL_MAP_READ ? PIPE_TRANSFER_READ : 0 ) | > + (flags & CLOVER_TRANSFER_MAP_DIRECTLY ? > + PIPE_TRANSFER_MAP_DIRECTLY : 0 ) | > (!blocking ? PIPE_TRANSFER_UNSYNCHRONIZED : 0)); > > p = pctx->transfer_map(pctx, r.pipe, 0, usage, > The driver can return NULL when PIPE_TRANSFER_MAP_DIRECTLY is set (if the driver can't actually map the resource directly), so you'd need to add code to try again without PIPE_TRANSFER_MAP_DIRECTLY in that case. -- Earthling Michel Dänzer| http://www.amd.com Libre software enthusiast |Mesa and X developer ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 4/5] gallium: Add PIPE_SHADER_CAP_DOUBLES
On 18 June 2014 23:50, Roland Scheidegger wrote: > Am 18.06.2014 01:54, schrieb Dave Airlie: >> On 18 June 2014 05:08, Roland Scheidegger wrote: >>> This looks ok to me though since tgsi currently doesn't have any double >>> opcodes (well the docs have them...) it doesn't really apply to most >>> drivers (at least I assume you don't want to add support for it for tgsi). >> >> I've mostly forward ported the old gallium double code, and have >> written most of ARB_gpu_shader_fp64 on top, >> >> Though the question I did want to ask Tom is if he is just going to >> expose hw that has doubles, or does >> he plan on emulating doubles. >> >> For a lot of GLSL4.0 GPUs from AMD fglrx emulates doubles using >> massive magic shaders, I'm unsure >> if we should have a lowering pass above/below the TGSI line for these >> types of situations and what that >> would mean for this CAP. > > Oh that's interesting. I always thought drivers didn't emulate that, and > if apps want doubles but the device doesn't provide them it needs to do > that itself. For which chips does fglrx do that? Quite a lot of the evergreen family, only CAYMAN and CYPRESS seem to have native FP64 support in the hw according to the version of the AMD shader compiler I'm using, all other VLIW4/5 chips seem to emulate fp64. I assume so they could advertise GL 4.0/SM5. They also expose the fp64 extension on rv670, rv790, rv770 and rv740 gpus. > If you'd want to emulate this, the other question is if you can do it at > the tgsi level, or if this was exploiting some hw specific bits (well of > course you could still do it at tgsi level, but if the hw has some bits > to make this easier, then this isn't efficient). In any case I guess > this could be decided later. Yeah I'm not sure where would be the best place to lower it, doing it at the GLSL level might be more generic, but I'm not really sure what algorithm fglrx uses to do it and so if it takes advantage of other hw features to help. Dave. ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] Broadcom VC4 project
We already have transfer_inline_write. Marek On Thu, Jun 19, 2014 at 2:14 AM, Michel Dänzer wrote: > On 18.06.2014 16:28, Eric Anholt wrote: >> >> I'm probably going to start out doing a gallium driver for simplicity, >> to avoid having to do all the DRI crap we've got in brw_context.c and >> texture miptree validation and getting user data into VBOs and all that >> other almost-boilerplate. Long term I may end up switching to classic >> so I can get swrast fallbacks and the ability to implement single-copy >> manually-tiled TexImage uploads like. > > FWIW, I don't think it'll be hard to do the latter with Gallium. We > probably just need to add transfer API entrypoints which take a pointer > from the caller instead of returning a pointer to it. > > > Good luck for your new project! > > > -- > Earthling Michel Dänzer| http://www.amd.com > Libre software enthusiast |Mesa and X developer > > > ___ > mesa-dev mailing list > mesa-dev@lists.freedesktop.org > http://lists.freedesktop.org/mailman/listinfo/mesa-dev > ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH] egl: Fix OpenGL ES version checks in _eglParseContextAttribList()
Fixes gles3 Khronos CTS test: egl_create_context.egl_create_context Cc: Signed-off-by: Anuj Phogat --- src/egl/main/eglcontext.c | 11 --- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/src/egl/main/eglcontext.c b/src/egl/main/eglcontext.c index 70277ab..b8b30cb 100644 --- a/src/egl/main/eglcontext.c +++ b/src/egl/main/eglcontext.c @@ -322,10 +322,15 @@ _eglParseContextAttribList(_EGLContext *ctx, _EGLDisplay *dpy, break; case 3: - default: - /* Don't put additional version checks here. We don't know that - * there won't be versions > 3.0. + /* Update this condition if new OpenGL ES 3.x (x > 1) version is + * announced. */ + if (ctx->ClientMinorVersion > 1) +err = EGL_BAD_MATCH; + break; + + default: + err = EGL_BAD_MATCH; break; } } -- 1.8.3.1 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] Broadcom VC4 project
On 18.06.2014 16:28, Eric Anholt wrote: > > I'm probably going to start out doing a gallium driver for simplicity, > to avoid having to do all the DRI crap we've got in brw_context.c and > texture miptree validation and getting user data into VBOs and all that > other almost-boilerplate. Long term I may end up switching to classic > so I can get swrast fallbacks and the ability to implement single-copy > manually-tiled TexImage uploads like. FWIW, I don't think it'll be hard to do the latter with Gallium. We probably just need to add transfer API entrypoints which take a pointer from the caller instead of returning a pointer to it. Good luck for your new project! -- Earthling Michel Dänzer| http://www.amd.com Libre software enthusiast |Mesa and X developer signature.asc Description: OpenPGP digital signature ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] Broadcom VC4 project
> > I'm working toward building a Mesa driver for Broadcom VC4 (aka > Raspberry Pi). At the moment I'm still bringing up the DRM side of > things, but I hope to be doing bits of userspace in the next few days. > Current status is I have a skeleton DRM KMS driver that's going to talk > to the firmware for modesetting, and now I'm starting on the execution > side of things. > > I'm probably going to start out doing a gallium driver for simplicity, > to avoid having to do all the DRI crap we've got in brw_context.c and > texture miptree validation and getting user data into VBOs and all that > other almost-boilerplate. Long term I may end up switching to classic > so I can get swrast fallbacks and the ability to implement single-copy > manually-tiled TexImage uploads like. For now I want to get to drawing > triangles as soon as I can. Do we know anywhere swrast fallbacks make sense? like except for conformance testing, You've got an armv6 swrast fallbacks are going to be punishing, I don't even think it has neon extensions. Dave. ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 1/3 v2] mesa: Handle uninitialized textures like other textures in get_tex_level_parameter_image
On 06/18/2014 04:11 PM, Ian Romanick wrote: From: Ian Romanick Instead of catching the special case early, handle it by constructing a fake gl_texture_image that will cause the values required by the OpenGL 4.0 spec to be returned. Previously, calling glGenTextures(1, &t); glBindTexture(GL_TEXTURE_2D, t); glGetTexLevelParameteriv(GL_TEXTURE_2D, 0, 0xDEADBEEF, &value); would not generate an error. Anuj: Can you verify this does not regress proxy_textures_invalid_size? Signed-off-by: Ian Romanick Suggested-by: Brian Paul Cc: "10.2" Cc: Anuj Phogat Cc: Brian Paul --- src/mesa/main/texparam.c | 11 ++- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/src/mesa/main/texparam.c b/src/mesa/main/texparam.c index dc17ea5..6767f32 100644 --- a/src/mesa/main/texparam.c +++ b/src/mesa/main/texparam.c @@ -1051,6 +1051,7 @@ get_tex_level_parameter_image(struct gl_context *ctx, GLenum pname, GLint *params) { const struct gl_texture_image *img = NULL; + struct gl_texture_image dummy_image; mesa_format texFormat; img = _mesa_select_tex_image(ctx, texObj, target, level); @@ -1062,12 +1063,12 @@ get_tex_level_parameter_image(struct gl_context *ctx, * instead of 1. TEXTURE_COMPONENTS is deprecated; always * use TEXTURE_INTERNAL_FORMAT." */ + memset(&dummy_image, 0, sizeof(dummy_image)); + dummy_image.TexFormat = MESA_FORMAT_NONE; + dummy_image.InternalFormat = GL_RGBA; + dummy_image._BaseFormat = GL_NONE; - if (pname == GL_TEXTURE_INTERNAL_FORMAT) - *params = GL_RGBA; - else - *params = 0; - return; + img = &dummy_image; } texFormat = img->TexFormat; Reviewed-by: Brian Paul ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [v8 9/9] i965: enable image external sampling for imported dma-buffers
On Wed, Jul 24, 2013 at 5:06 PM, Chad Versace wrote: > This patch is > Reviewed-by: Chad Versace > with the understanding that it depends on previous patches. > > I'll begin reviewing your Piglit tests now. > > > On 07/24/2013 03:23 AM, Topi Pohjolainen wrote: >> >> Signed-off-by: Topi Pohjolainen >> --- >> src/mesa/drivers/dri/i965/intel_extensions.c | 1 + >> src/mesa/drivers/dri/i965/intel_tex_image.c | 7 +++ >> 2 files changed, 8 insertions(+) >> >> diff --git a/src/mesa/drivers/dri/i965/intel_extensions.c >> b/src/mesa/drivers/dri/i965/intel_extensions.c >> index 409d40b..655467c 100644 >> --- a/src/mesa/drivers/dri/i965/intel_extensions.c >> +++ b/src/mesa/drivers/dri/i965/intel_extensions.c >> @@ -122,6 +122,7 @@ intelInitExtensions(struct gl_context *ctx) >> ctx->Extensions.OES_EGL_image = true; >> ctx->Extensions.OES_draw_texture = true; >> ctx->Extensions.OES_standard_derivatives = true; >> + ctx->Extensions.OES_EGL_image_external = true; >> >> if (brw->gen >= 6) >> ctx->Const.GLSLVersion = 140; >> diff --git a/src/mesa/drivers/dri/i965/intel_tex_image.c >> b/src/mesa/drivers/dri/i965/intel_tex_image.c >> index 7d478d1..0f0a27d 100644 >> --- a/src/mesa/drivers/dri/i965/intel_tex_image.c >> +++ b/src/mesa/drivers/dri/i965/intel_tex_image.c >> @@ -378,6 +378,13 @@ intel_image_target_texture_2d(struct gl_context *ctx, >> GLenum target, >> return; >> } >> >> + if (target == GL_TEXTURE_EXTERNAL_OES && !image->dma_buf_imported) { >> + _mesa_error(ctx, GL_INVALID_OPERATION, >> +"glEGLImageTargetTexture2DOES(external target is enabled only >> " >> + "for images created with EGL_EXT_image_dma_buf_import"); >> + return; >> + } >> + This condition makes following GLES3 CTS tests to fail: egl_image_external.TestTargetTextureValid egl_image_external.TestSimple Tests pass if we get rid of this error condition. >> /* Disallow depth/stencil textures: we don't have a way to pass the >> * separate stencil miptree of a GL_DEPTH_STENCIL texture through. >> */ >> > > ___ > mesa-dev mailing list > mesa-dev@lists.freedesktop.org > http://lists.freedesktop.org/mailman/listinfo/mesa-dev ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 1/3 v2] mesa: Handle uninitialized textures like other textures in get_tex_level_parameter_image
From: Ian Romanick Instead of catching the special case early, handle it by constructing a fake gl_texture_image that will cause the values required by the OpenGL 4.0 spec to be returned. Previously, calling glGenTextures(1, &t); glBindTexture(GL_TEXTURE_2D, t); glGetTexLevelParameteriv(GL_TEXTURE_2D, 0, 0xDEADBEEF, &value); would not generate an error. Anuj: Can you verify this does not regress proxy_textures_invalid_size? Signed-off-by: Ian Romanick Suggested-by: Brian Paul Cc: "10.2" Cc: Anuj Phogat Cc: Brian Paul --- src/mesa/main/texparam.c | 11 ++- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/src/mesa/main/texparam.c b/src/mesa/main/texparam.c index dc17ea5..6767f32 100644 --- a/src/mesa/main/texparam.c +++ b/src/mesa/main/texparam.c @@ -1051,6 +1051,7 @@ get_tex_level_parameter_image(struct gl_context *ctx, GLenum pname, GLint *params) { const struct gl_texture_image *img = NULL; + struct gl_texture_image dummy_image; mesa_format texFormat; img = _mesa_select_tex_image(ctx, texObj, target, level); @@ -1062,12 +1063,12 @@ get_tex_level_parameter_image(struct gl_context *ctx, * instead of 1. TEXTURE_COMPONENTS is deprecated; always * use TEXTURE_INTERNAL_FORMAT." */ + memset(&dummy_image, 0, sizeof(dummy_image)); + dummy_image.TexFormat = MESA_FORMAT_NONE; + dummy_image.InternalFormat = GL_RGBA; + dummy_image._BaseFormat = GL_NONE; - if (pname == GL_TEXTURE_INTERNAL_FORMAT) - *params = GL_RGBA; - else - *params = 0; - return; + img = &dummy_image; } texFormat = img->TexFormat; -- 1.8.1.4 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 2/3 v2] mesa: Don't allow GL_TEXTURE_BORDER queries outside compat profile
From: Ian Romanick There are no texture borders in any version of OpenGL ES or desktop OpenGL core profile. Fixes piglit's gl-3.2-texture-border-deprecated. v2: Rebase on different initial change. Signed-off-by: Ian Romanick Reviewed-by: Brian Paul Cc: "10.2 --- src/mesa/main/texparam.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/mesa/main/texparam.c b/src/mesa/main/texparam.c index 6767f32..6bf116a 100644 --- a/src/mesa/main/texparam.c +++ b/src/mesa/main/texparam.c @@ -1108,6 +1108,8 @@ get_tex_level_parameter_image(struct gl_context *ctx, } break; case GL_TEXTURE_BORDER: + if (ctx->API != API_OPENGL_COMPAT) +goto invalid_pname; *params = img->Border; break; case GL_TEXTURE_RED_SIZE: -- 1.8.1.4 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 3/3 v2] mesa: Don't allow GL_TEXTURE_{LUMINANCE, INTENSITY}_* queries outside compat profile
From: Ian Romanick There are no queries for GL_TEXTURE_LUMINANCE_SIZE, GL_TEXTURE_INTENSITY_SIZE, GL_TEXTURE_LUMINANCE_TYPE, or GL_TEXTURE_INTENSITY_TYPE in any version of OpenGL ES or desktop OpenGL core profile. NOTE: Without changes to piglit, this regresses required-sized-texture-formats. v2: Rebase on different initial change. Signed-off-by: Ian Romanick Reviewed-by: Brian Paul Cc: "10.2 --- src/mesa/main/texparam.c | 9 +++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/src/mesa/main/texparam.c b/src/mesa/main/texparam.c index 6bf116a..30dd0b9 100644 --- a/src/mesa/main/texparam.c +++ b/src/mesa/main/texparam.c @@ -1123,6 +1123,8 @@ get_tex_level_parameter_image(struct gl_context *ctx, break; case GL_TEXTURE_INTENSITY_SIZE: case GL_TEXTURE_LUMINANCE_SIZE: + if (ctx->API != API_OPENGL_COMPAT) +goto invalid_pname; if (_mesa_base_format_has_channel(img->_BaseFormat, pname)) { *params = _mesa_get_format_bits(texFormat, pname); if (*params == 0) { @@ -1169,12 +1171,15 @@ get_tex_level_parameter_image(struct gl_context *ctx, break; /* GL_ARB_texture_float */ + case GL_TEXTURE_LUMINANCE_TYPE_ARB: + case GL_TEXTURE_INTENSITY_TYPE_ARB: + if (ctx->API != API_OPENGL_COMPAT) +goto invalid_pname; + /* FALLTHROUGH */ case GL_TEXTURE_RED_TYPE_ARB: case GL_TEXTURE_GREEN_TYPE_ARB: case GL_TEXTURE_BLUE_TYPE_ARB: case GL_TEXTURE_ALPHA_TYPE_ARB: - case GL_TEXTURE_LUMINANCE_TYPE_ARB: - case GL_TEXTURE_INTENSITY_TYPE_ARB: case GL_TEXTURE_DEPTH_TYPE_ARB: if (!ctx->Extensions.ARB_texture_float) goto invalid_pname; -- 1.8.1.4 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH v2] glsl: Expand matrix flip optimization pass to cover more cases.
On 06/18/2014 05:03 AM, Iago Toral Quiroga wrote: > Also, as suggested by Ian Romanick, make it so we don't need a bunch of > individual handles to flippable matrices, instead we register > matrix/transpose_matrix pairs in a hash table for all built-in matrices > using the non-transpose matrix name as key. > --- > src/glsl/opt_flip_matrices.cpp | 145 > ++--- > 1 file changed, 108 insertions(+), 37 deletions(-) > > diff --git a/src/glsl/opt_flip_matrices.cpp b/src/glsl/opt_flip_matrices.cpp > index 9044fd6..d0b8125 100644 > --- a/src/glsl/opt_flip_matrices.cpp > +++ b/src/glsl/opt_flip_matrices.cpp > @@ -29,44 +29,132 @@ > * On some hardware, this is more efficient. > * > * This currently only does the conversion for built-in matrices which > - * already have transposed equivalents. Namely, gl_ModelViewProjectionMatrix > - * and gl_TextureMatrix. > + * already have transposed equivalents. > */ > #include "ir.h" > #include "ir_optimization.h" > #include "main/macros.h" > +#include "program/hash_table.h" > > namespace { > + > class matrix_flipper : public ir_hierarchical_visitor { > public: > + struct matrix_and_transpose { > + ir_variable *matrix; > + ir_variable *transpose_matrix; > + }; > + > matrix_flipper(exec_list *instructions) > { >progress = false; > - mvp_transpose = NULL; > - texmat_transpose = NULL; > + > + /* Build a hash table of built-in matrices and their transposes. > + * > + * The key for the entries in the hash table is the non-transpose > matrix > + * name. This assumes that all built-in transpose matrices have the > + * "Transpose" suffix. > + */ > + ht = hash_table_ctor(0, hash_table_string_hash, > + hash_table_string_compare); > >foreach_list(n, instructions) { > ir_instruction *ir = (ir_instruction *) n; > ir_variable *var = ir->as_variable(); > - if (!var) > + > + /* Must be a matrix */ > + if (!var || !var->type->is_matrix()) > continue; gl_TextureMatrix is an array of matrices, so var->type->is_matrix() will fail. I think you want: if (!var) continue; /* Must be a matrix or array of matrices. */ if (!var->type->is_matrix() && !(var->type->is_array() && var->type->fields.array->is_matrix())) continue; > - if (strcmp(var->name, "gl_ModelViewProjectionMatrixTranspose") == 0) > -mvp_transpose = var; > - if (strcmp(var->name, "gl_TextureMatrixTranspose") == 0) > -texmat_transpose = var; > + /* Must be a built-in */ > + if (strstr(var->name, "gl_") != var->name) > +continue; The name has to start with gl_, not just contain it. Use is_gl_identifier(var->name) instead. > + > + /* Create a new entry for this matrix if we don't have one yet */ > + bool new_entry = false; > + struct matrix_and_transpose *entry = > +(struct matrix_and_transpose *) hash_table_find(ht, var->name); > + if (!entry) { > +new_entry = true; > +entry = new struct matrix_and_transpose(); > +entry->matrix = NULL; > +entry->transpose_matrix = NULL; > + } > + > + const char *transpose_ptr = strstr(var->name, "Transpose"); > + if (transpose_ptr == NULL) { > +entry->matrix = var; > + } else { It's probably worth adding an assertion in case a built-in is ever added with something after Transpose. The probability is very, very low, but I'd rather be safe. assert(transpose_ptr[9] == 0); > +entry->transpose_matrix = var; > + } > + > + if (new_entry) { > +char *entry_key; > +if (transpose_ptr == NULL) { > + entry_key = strdup(var->name); > +} else { > + entry_key = strndup(var->name, transpose_ptr - var->name); > +} hash_table_dtor doesn't free the keys, so all of this memory leaks. Use ralloc_strndup, and only copy the name in the transpose_ptr != NULL case. > +hash_table_insert(ht, entry, entry_key); > + } >} > } > > + ~matrix_flipper() > + { > + hash_table_dtor(ht); > + } > + > ir_visitor_status visit_enter(ir_expression *ir); > > bool progress; > > private: > - ir_variable *mvp_transpose; > - ir_variable *texmat_transpose; > + void transform_operands(ir_expression *ir, > + ir_variable *mat_var, > + ir_variable *mat_transpose); > + void transform_operands_array_of_matrix(ir_expression *ir, > + ir_variable *mat_var, > + ir_variable *mat_transpose); > + struct hash_table *ht; > }; > } > > +void > +matrix
Re: [Mesa-dev] [PATCH v2 09/23] glsl: Store info about geometry shaders that emit vertices to non-zero streams.
On 06/18/2014 02:05 PM, Ian Romanick wrote: On 06/18/2014 02:51 AM, Iago Toral Quiroga wrote: On Intel hardware when a geometry shader outputs GL_POINTS primitives we only need to emit vertex control bits if it emits vertices to non-zero streams, so use a flag to track this. This flag will be set to TRUE when a geometry shader calls EmitStreamVertex() or EndStreamPrimitive() with a non-zero stream parameter in a later patch. --- src/mesa/main/mtypes.h | 2 ++ src/mesa/main/shaderapi.c | 1 + src/mesa/main/shaderobj.c | 1 + src/mesa/program/program.c | 1 + 4 files changed, 5 insertions(+) diff --git a/src/mesa/main/mtypes.h b/src/mesa/main/mtypes.h index f45dde4..5bc710e 100644 --- a/src/mesa/main/mtypes.h +++ b/src/mesa/main/mtypes.h @@ -2178,6 +2178,7 @@ struct gl_geometry_program GL_TRIANGLES, or GL_TRIANGLES_ADJACENCY_ARB */ GLenum OutputType; /**< GL_POINTS, GL_LINE_STRIP or GL_TRIANGLE_STRIP */ GLboolean UsesEndPrimitive; + GLboolean UsesStreams; For things that are not visible to the GL API, we've been trying to transition away from GL types. Unless Brian or Chris object, I think I'd rather have this be bool (and true / false below). Sounds fine. -Brian ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH v2 20/23] i965: Enable vertex streams up to MAX_VERTEX_STREAMS.
On Wednesday, June 18, 2014 01:45:52 PM Ian Romanick wrote: > On 06/18/2014 02:51 AM, Iago Toral Quiroga wrote: > > --- > > src/mesa/drivers/dri/i965/brw_context.c | 4 > > 1 file changed, 4 insertions(+) > > > > diff --git a/src/mesa/drivers/dri/i965/brw_context.c b/src/mesa/drivers/dri/i965/brw_context.c > > index cfb0be0..6ba58d3 100644 > > --- a/src/mesa/drivers/dri/i965/brw_context.c > > +++ b/src/mesa/drivers/dri/i965/brw_context.c > > @@ -507,6 +507,10 @@ brw_initialize_context_constants(struct brw_context *brw) > >ctx->Const.ViewportBounds.Min = -(float)ctx- >Const.MaxViewportWidth; > >ctx->Const.ViewportBounds.Max = ctx->Const.MaxViewportWidth; > > } > > + > > + /* ARB_gpu_shader5 */ > > + if (brw->gen >= 7) > > + ctx->Const.MaxVertexStreams = MAX_VERTEX_STREAMS; > > I'd rather have this be MIN2(4, MAX_VERTEX_STREAMS). That way > increasing MAX_VERTEX_STREAMS to support some new hardware won't break GEN7. That seems like a good idea. --Ken signature.asc Description: This is a digitally signed message part. ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH] gallium/radeon: Only print a message for LLVM diagnostic errors
We were printing messages for all diagnostic types, which was spamming the console for some OpenCL programs. --- src/gallium/drivers/radeon/radeon_llvm_emit.c | 6 -- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/gallium/drivers/radeon/radeon_llvm_emit.c b/src/gallium/drivers/radeon/radeon_llvm_emit.c index 891781f..6a394b2 100644 --- a/src/gallium/drivers/radeon/radeon_llvm_emit.c +++ b/src/gallium/drivers/radeon/radeon_llvm_emit.c @@ -87,11 +87,13 @@ static void radeonDiagnosticHandler(LLVMDiagnosticInfoRef di, void *context) { char *diaginfo_message; diaginfo_message = LLVMGetDiagInfoDescription(di); - fprintf(stderr,"LLVM triggered Diagnostic Handler: %s\n", diaginfo_message); LLVMDisposeMessage(diaginfo_message); diagnosticflag = (unsigned int *)context; - *diagnosticflag = ((LLVMDSError == LLVMGetDiagInfoSeverity(di)) ? 1 : 0); + if (LLVMGetDiagInfoSeverity(di) == LLVMDSError) { + *diagnosticflag = 1; + fprintf(stderr,"LLVM triggered Diagnostic Handler: %s\n", diaginfo_message); + } } #endif -- 1.8.1.5 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH v2 23/23] mesa: Copy Geom.UsesEndPrimitive when cloning a geometry program.
This patch is Reviewed-by: Ian Romanick Cc: "10.1 10.2" Clearly this field should have been copied as long as it has existed. Right? On 06/18/2014 02:51 AM, Iago Toral Quiroga wrote: > --- > src/mesa/program/program.c | 1 + > 1 file changed, 1 insertion(+) > > diff --git a/src/mesa/program/program.c b/src/mesa/program/program.c > index 1263cea..aedce3e 100644 > --- a/src/mesa/program/program.c > +++ b/src/mesa/program/program.c > @@ -552,6 +552,7 @@ _mesa_clone_program(struct gl_context *ctx, const struct > gl_program *prog) > gpc->InputType = gp->InputType; > gpc->Invocations = gp->Invocations; > gpc->OutputType = gp->OutputType; > + gpc->UsesEndPrimitive = gp->UsesEndPrimitive; > gpc->UsesStreams = gp->UsesStreams; >} >break; ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH v2 22/23] mesa: Init Geom.UsesEndPrimitive in shader programs.
I think this patch and 23 should be moved first in the series... the can certainly land before the other patches in the series. This patch is Reviewed-by: Ian Romanick On 06/18/2014 02:51 AM, Iago Toral Quiroga wrote: > --- > src/mesa/main/shaderobj.c | 1 + > 1 file changed, 1 insertion(+) > > diff --git a/src/mesa/main/shaderobj.c b/src/mesa/main/shaderobj.c > index 03db862..b3d428c 100644 > --- a/src/mesa/main/shaderobj.c > +++ b/src/mesa/main/shaderobj.c > @@ -248,6 +248,7 @@ _mesa_init_shader_program(struct gl_context *ctx, struct > gl_shader_program *prog > prog->Geom.VerticesOut = 0; > prog->Geom.InputType = GL_TRIANGLES; > prog->Geom.OutputType = GL_TRIANGLE_STRIP; > + prog->Geom.UsesEndPrimitive = GL_FALSE; > prog->Geom.UsesStreams = GL_FALSE; > > prog->TransformFeedback.BufferMode = GL_INTERLEAVED_ATTRIBS; > ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH v2 20/23] i965: Enable vertex streams up to MAX_VERTEX_STREAMS.
On 06/18/2014 02:51 AM, Iago Toral Quiroga wrote: > --- > src/mesa/drivers/dri/i965/brw_context.c | 4 > 1 file changed, 4 insertions(+) > > diff --git a/src/mesa/drivers/dri/i965/brw_context.c > b/src/mesa/drivers/dri/i965/brw_context.c > index cfb0be0..6ba58d3 100644 > --- a/src/mesa/drivers/dri/i965/brw_context.c > +++ b/src/mesa/drivers/dri/i965/brw_context.c > @@ -507,6 +507,10 @@ brw_initialize_context_constants(struct brw_context *brw) >ctx->Const.ViewportBounds.Min = -(float)ctx->Const.MaxViewportWidth; >ctx->Const.ViewportBounds.Max = ctx->Const.MaxViewportWidth; > } > + > + /* ARB_gpu_shader5 */ > + if (brw->gen >= 7) > + ctx->Const.MaxVertexStreams = MAX_VERTEX_STREAMS; I'd rather have this be MIN2(4, MAX_VERTEX_STREAMS). That way increasing MAX_VERTEX_STREAMS to support some new hardware won't break GEN7. > } > > /** > ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH v2 01/23] glsl: Add parsing support for multi-stream output in geometry shaders.
On Wednesday, June 18, 2014 11:16:47 AM Ian Romanick wrote: > On 06/18/2014 02:51 AM, Iago Toral Quiroga wrote: > > From: Samuel Iglesias Gonsalvez > > > > This implements parsing requirements for multi-stream support in > > geometry shaders as defined in ARB_gpu_shader5. > > > > Signed-off-by: Samuel Iglesias Gonsalvez > > A few minor nits below. With those fixed, this patch is > > Reviewed-by: Ian Romanick > > > --- > > src/glsl/ast.h| 5 + > > src/glsl/ast_to_hir.cpp | 17 +++ > > src/glsl/ast_type.cpp | 39 +- > > src/glsl/glsl_parser.yy | 49 +++ > > src/glsl/glsl_parser_extras.h | 18 > > src/glsl/glsl_types.h | 5 + > > src/glsl/ir.h | 5 + > > 7 files changed, 137 insertions(+), 1 deletion(-) > > > > diff --git a/src/glsl/ast.h b/src/glsl/ast.h > > index 56e7bd8..c8a3394 100644 > > --- a/src/glsl/ast.h > > +++ b/src/glsl/ast.h > > @@ -509,6 +509,8 @@ struct ast_type_qualifier { > > /** \name Layout qualifiers for GL_ARB_gpu_shader5 */ > > /** \{ */ > > unsigned invocations:1; > > + unsigned stream:1; /* Has stream value assigned */ > > + unsigned explicit_stream:1; /* stream value assigned explicitly by shader code */ > > End-of-line comments should begin with /**< for Doxygen. > > > /** \} */ > >} > >/** \brief Set of flags, accessed by name. */ > > @@ -542,6 +544,9 @@ struct ast_type_qualifier { > > /** Maximum output vertices in GLSL 1.50 geometry shaders. */ > > int max_vertices; > > > > + /** Stream in GLSL 1.50 geometry shaders. */ > > + unsigned stream; > > + > > /** Input or output primitive type in GLSL 1.50 geometry shaders */ > > GLenum prim_type; > > > > diff --git a/src/glsl/ast_to_hir.cpp b/src/glsl/ast_to_hir.cpp > > index 132a955..c1bc0f9 100644 > > --- a/src/glsl/ast_to_hir.cpp > > +++ b/src/glsl/ast_to_hir.cpp > > @@ -2461,6 +2461,11 @@ apply_type_qualifier_to_variable(const struct ast_type_qualifier *qual, > > if (qual->flags.q.sample) > >var->data.sample = 1; > > > > + if (state->stage == MESA_SHADER_GEOMETRY && > > + qual->flags.q.out && qual->flags.q.stream) { > > + var->data.stream = qual->stream; > > + } > > + > > if (qual->flags.q.attribute && state->stage != MESA_SHADER_VERTEX) { > >var->type = glsl_type::error_type; > >_mesa_glsl_error(loc, state, > > @@ -5092,6 +5097,8 @@ ast_process_structure_or_interface_block(exec_list *instructions, > > interpret_interpolation_qualifier(qual, var_mode, state, &loc); > > fields[i].centroid = qual->flags.q.centroid ? 1 : 0; > > fields[i].sample = qual->flags.q.sample ? 1 : 0; > > Add a blank link here. > > > + /* Only save explicitly defined streams in block's field */ > > And put the */ on it's own line. Ian, I think you are one of the few people that do that. Mesa overwhelmingly starts and ends comments on the same line, where they fit on one line: (attempt at finding /* on one line and */ on the next line): $ git grep -A1 '/\*' | grep '\*/' | grep -v '/\*.*\*/' | wc -l 2354 (attempt at finding one line /* ... */ comments): $ git grep '/\*.*\*/' | wc -l 35025 So I would leave it as is. But it's not a big deal... --Ken signature.asc Description: This is a digitally signed message part. ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH v2 19/23] mesa: Enable simultaneous queries on different streams.
Reviewed-by: Ian Romanick On 06/18/2014 02:51 AM, Iago Toral Quiroga wrote: > It should be possible to query the number of primitives written to each > individual stream by a geometry shader in a single draw call. For that > we need to have up to MAX_VERTEX_STREAM separate query objects. > --- > src/mesa/main/mtypes.h | 4 ++-- > src/mesa/main/queryobj.c | 17 + > 2 files changed, 11 insertions(+), 10 deletions(-) > > diff --git a/src/mesa/main/mtypes.h b/src/mesa/main/mtypes.h > index 2eaf2f5..7d5c789 100644 > --- a/src/mesa/main/mtypes.h > +++ b/src/mesa/main/mtypes.h > @@ -2912,8 +2912,8 @@ struct gl_query_state > struct gl_query_object *CondRenderQuery; > > /** GL_EXT_transform_feedback */ > - struct gl_query_object *PrimitivesGenerated; > - struct gl_query_object *PrimitivesWritten; > + struct gl_query_object *PrimitivesGenerated[MAX_VERTEX_STREAMS]; > + struct gl_query_object *PrimitivesWritten[MAX_VERTEX_STREAMS]; > > /** GL_ARB_timer_query */ > struct gl_query_object *TimeElapsed; > diff --git a/src/mesa/main/queryobj.c b/src/mesa/main/queryobj.c > index 512f45a..932359c 100644 > --- a/src/mesa/main/queryobj.c > +++ b/src/mesa/main/queryobj.c > @@ -144,11 +144,12 @@ _mesa_init_query_object_functions(struct > dd_function_table *driver) > > > /** > - * Return pointer to the query object binding point for the given target. > + * Return pointer to the query object binding point for the given target and > + * index. > * \return NULL if invalid target, else the address of binding point > */ > static struct gl_query_object ** > -get_query_binding_point(struct gl_context *ctx, GLenum target) > +get_query_binding_point(struct gl_context *ctx, GLenum target, GLuint index) > { > switch (target) { > case GL_SAMPLES_PASSED_ARB: > @@ -174,12 +175,12 @@ get_query_binding_point(struct gl_context *ctx, GLenum > target) > return NULL; > case GL_PRIMITIVES_GENERATED: >if (ctx->Extensions.EXT_transform_feedback) > - return &ctx->Query.PrimitivesGenerated; > + return &ctx->Query.PrimitivesGenerated[index]; >else > return NULL; > case GL_TRANSFORM_FEEDBACK_PRIMITIVES_WRITTEN: >if (ctx->Extensions.EXT_transform_feedback) > - return &ctx->Query.PrimitivesWritten; > + return &ctx->Query.PrimitivesWritten[index]; >else > return NULL; > default: > @@ -240,7 +241,7 @@ _mesa_DeleteQueries(GLsizei n, const GLuint *ids) > if (q) { > if (q->Active) { > struct gl_query_object **bindpt; > - bindpt = get_query_binding_point(ctx, q->Target); > + bindpt = get_query_binding_point(ctx, q->Target, q->Stream); > assert(bindpt); /* Should be non-null for active q. */ > if (bindpt) { >*bindpt = NULL; > @@ -313,7 +314,7 @@ _mesa_BeginQueryIndexed(GLenum target, GLuint index, > GLuint id) > > FLUSH_VERTICES(ctx, 0); > > - bindpt = get_query_binding_point(ctx, target); > + bindpt = get_query_binding_point(ctx, target, index); > if (!bindpt) { >_mesa_error(ctx, GL_INVALID_ENUM, "glBeginQuery{Indexed}(target)"); >return; > @@ -391,7 +392,7 @@ _mesa_EndQueryIndexed(GLenum target, GLuint index) > > FLUSH_VERTICES(ctx, 0); > > - bindpt = get_query_binding_point(ctx, target); > + bindpt = get_query_binding_point(ctx, target, index); > if (!bindpt) { >_mesa_error(ctx, GL_INVALID_ENUM, "glEndQuery{Indexed}(target)"); >return; > @@ -518,7 +519,7 @@ _mesa_GetQueryIndexediv(GLenum target, GLuint index, > GLenum pname, >} > } > else { > - bindpt = get_query_binding_point(ctx, target); > + bindpt = get_query_binding_point(ctx, target, index); >if (!bindpt) { > _mesa_error(ctx, GL_INVALID_ENUM, "glGetQuery{Indexed}iv(target)"); > return; > ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH v2 14/23] glsl: include streamId when reading/printing emit-vertex and end-primitive IR.
Patches 14, 15, and 16 are Reviewed-by: Ian Romanick On 06/18/2014 02:51 AM, Iago Toral Quiroga wrote: > --- > src/glsl/ir_print_visitor.cpp | 13 + > src/glsl/ir_reader.cpp| 22 ++ > 2 files changed, 27 insertions(+), 8 deletions(-) > > diff --git a/src/glsl/ir_print_visitor.cpp b/src/glsl/ir_print_visitor.cpp > index c4a6f9c..dcb53b3 100644 > --- a/src/glsl/ir_print_visitor.cpp > +++ b/src/glsl/ir_print_visitor.cpp > @@ -560,13 +560,18 @@ ir_print_visitor::visit(ir_loop_jump *ir) > } > > void > -ir_print_visitor::visit(ir_emit_vertex *) > +ir_print_visitor::visit(ir_emit_vertex *ir) > { > - fprintf(f, "(emit-vertex)"); > + fprintf(f, "(emit-vertex "); > + ir->stream->accept(this); > + fprintf(f, ")\n"); > } > > void > -ir_print_visitor::visit(ir_end_primitive *) > +ir_print_visitor::visit(ir_end_primitive *ir) > { > - fprintf(f, "(end-primitive)"); > + fprintf(f, "(end-primitive "); > + ir->stream->accept(this); > + fprintf(f, ")\n"); > + > } > diff --git a/src/glsl/ir_reader.cpp b/src/glsl/ir_reader.cpp > index 28923f3..ba166eb 100644 > --- a/src/glsl/ir_reader.cpp > +++ b/src/glsl/ir_reader.cpp > @@ -1109,10 +1109,17 @@ ir_reader::read_texture(s_expression *expr) > ir_emit_vertex * > ir_reader::read_emit_vertex(s_expression *expr) > { > - s_pattern pat[] = { "emit-vertex" }; > + s_expression *s_stream = NULL; > + > + s_pattern pat[] = { "emit-vertex", s_stream }; > > if (MATCH(expr, pat)) { > - return new(mem_ctx) ir_emit_vertex(); > + ir_rvalue *stream = read_dereference(s_stream); > + if (stream == NULL) { > + ir_read_error(NULL, "when reading stream info in emit-vertex"); > + return NULL; > + } > + return new(mem_ctx) ir_emit_vertex(stream); > } > ir_read_error(NULL, "when reading emit-vertex"); > return NULL; > @@ -1121,10 +1128,17 @@ ir_reader::read_emit_vertex(s_expression *expr) > ir_end_primitive * > ir_reader::read_end_primitive(s_expression *expr) > { > - s_pattern pat[] = { "end-primitive" }; > + s_expression *s_stream = NULL; > + > + s_pattern pat[] = { "end-primitive", s_stream }; > > if (MATCH(expr, pat)) { > - return new(mem_ctx) ir_end_primitive(); > + ir_rvalue *stream = read_dereference(s_stream); > + if (stream == NULL) { > + ir_read_error(NULL, "when reading stream info in end-primitive"); > + return NULL; > + } > + return new(mem_ctx) ir_end_primitive(stream); > } > ir_read_error(NULL, "when reading end-primitive"); > return NULL; > ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH v2 12/23] glsl: Validate vertex emission in geometry shaders.
On 06/18/2014 02:51 AM, Iago Toral Quiroga wrote: > Check if non-zero streams are used. Fail to link if emitting to unsupported > streams or emitting to non-zero streams with output type other than GL_POINTS. > --- > src/glsl/linker.cpp | 148 > +++- > 1 file changed, 134 insertions(+), 14 deletions(-) > > diff --git a/src/glsl/linker.cpp b/src/glsl/linker.cpp > index 0b6a716..f8ff138 100644 > --- a/src/glsl/linker.cpp > +++ b/src/glsl/linker.cpp > @@ -250,31 +250,100 @@ public: > } > }; > > - > /** > - * Visitor that determines whether or not a shader uses ir_end_primitive. > + * Visitor that determines the highest stream id to which a (geometry) shader > + * emits vertices. It also checks whether End{Stream}Primitive is ever > called. > */ > -class find_end_primitive_visitor : public ir_hierarchical_visitor { > +class find_emit_vertex_visitor : public ir_hierarchical_visitor { > public: > - find_end_primitive_visitor() > - : found(false) > + find_emit_vertex_visitor(int max_allowed) > + : max_stream_allowed(max_allowed), > +invalid_stream_id(0), > +invalid_stream_id_from_emit_vertex(false), > +end_primitive_found(false), > +uses_non_zero_stream(false) > { >/* empty */ > } > > - virtual ir_visitor_status visit(ir_end_primitive *) > + virtual ir_visitor_status visit_leave(ir_emit_vertex *ir) > { > - found = true; > - return visit_stop; > + int stream_id = ir->stream_id(); > + > + if (stream_id < 0) { > + invalid_stream_id = stream_id; > + invalid_stream_id_from_emit_vertex = true; > + return visit_stop; > + } > + > + if (stream_id > max_stream_allowed) { > + invalid_stream_id = stream_id; > + invalid_stream_id_from_emit_vertex = true; > + return visit_stop; > + } > + > + if (stream_id != 0) > + uses_non_zero_stream = true; > + > + return visit_continue; > } > > - bool end_primitive_found() > + virtual ir_visitor_status visit_leave(ir_end_primitive *ir) > { > - return found; > + end_primitive_found = true; > + > + int stream_id = ir->stream_id(); > + > + if (stream_id < 0) { > + invalid_stream_id = stream_id; > + invalid_stream_id_from_emit_vertex = false; > + return visit_stop; > + } > + > + if (stream_id > max_stream_allowed) { > + invalid_stream_id = stream_id; > + invalid_stream_id_from_emit_vertex = false; > + return visit_stop; > + } > + > + if (stream_id != 0) > + uses_non_zero_stream = true; > + > + return visit_continue; > + } > + > + bool error() > + { > + return invalid_stream_id != 0; > + } > + > + const char *error_func() > + { > + return invalid_stream_id_from_emit_vertex ? > + "EmitStreamVertex" : "EndStreamPrimitive"; > + } > + > + int error_stream() > + { > + return invalid_stream_id; > + } > + > + bool uses_streams() > + { > + return uses_non_zero_stream; > + } > + > + bool uses_end_primitive() > + { > + return end_primitive_found; > } > > private: > - bool found; > + int max_stream_allowed; > + int invalid_stream_id; > + bool invalid_stream_id_from_emit_vertex; > + bool end_primitive_found; > + bool uses_non_zero_stream; > }; > > } /* anonymous namespace */ > @@ -551,10 +620,58 @@ validate_geometry_shader_executable(struct > gl_shader_program *prog, > > analyze_clip_usage(prog, shader, &prog->Geom.UsesClipDistance, >&prog->Geom.ClipDistanceArraySize); > +} > + > +/** > + * Check if geometry shaders emit to non-zero streams and do corresponding > + * validations. > + */ > +static void > +validate_geometry_shader_emissions(struct gl_context *ctx, > + struct gl_shader_program *prog) > +{ > + if (prog->_LinkedShaders[MESA_SHADER_GEOMETRY] != NULL) { > + find_emit_vertex_visitor emit_vertex(ctx->Const.MaxVertexStreams - 1); > + emit_vertex.run(prog->_LinkedShaders[MESA_SHADER_GEOMETRY]->ir); > + if (emit_vertex.error()) { > + linker_error(prog, "Invalid call %s(%d). Accepted values for the " > + "stream parameter are in the range [0, %d].", > + emit_vertex.error_func(), > + emit_vertex.error_stream(), > + ctx->Const.MaxVertexStreams - 1); > + } > + prog->Geom.UsesStreams = emit_vertex.uses_streams(); > + prog->Geom.UsesEndPrimitive = emit_vertex.uses_end_primitive(); > > - find_end_primitive_visitor end_primitive; > - end_primitive.run(shader->ir); > - prog->Geom.UsesEndPrimitive = end_primitive.end_primitive_found(); > + /* From the ARB_gpu_shader5 spec: > + * > + * "Multiple vertex streams are supported only if the output > primitive > + *t
Re: [Mesa-dev] [PATCH v2 06/23] glsl: Add methods to retrive a varying's name and streamId.
Patches 6, 7, 8, 9, and 10 are Reviewed-by: Ian Romanick On 06/18/2014 02:51 AM, Iago Toral Quiroga wrote: > --- > src/glsl/link_varyings.h | 10 ++ > 1 file changed, 10 insertions(+) > > diff --git a/src/glsl/link_varyings.h b/src/glsl/link_varyings.h > index 4e485c3..167363e 100644 > --- a/src/glsl/link_varyings.h > +++ b/src/glsl/link_varyings.h > @@ -112,6 +112,16 @@ public: >return !this->next_buffer_separator && !this->skip_components; > } > > + const char *name() const > + { > + return this->orig_name; > + } > + > + unsigned get_stream_id() const > + { > + return this->stream_id; > + } > + > /** > * The total number of varying components taken up by this variable. Only > * valid if assign_location() has been called. > ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH v2 11/23] glsl: Add support for EmitStreamVertex() and EndStreamPrimitive().
This patch should be split into several patches: 1. Modify ir_emit_vertex to have a stream. This patch also needs to update ir_to_mesa.cpp and st_glsl_to_tgsi.cpp. 2. Modify ir_end_primitive to have a stream. This patch also needs to update ir_to_mesa.cpp and st_glsl_to_tgsi.cpp. 3. Add the new built-in functions. A couple other minor comments below... On 06/18/2014 02:51 AM, Iago Toral Quiroga wrote: > --- > src/glsl/builtin_functions.cpp | 52 > ++-- > src/glsl/ir.h| 34 +-- > src/glsl/ir_hierarchical_visitor.cpp | 50 +- > src/glsl/ir_hierarchical_visitor.h | 6 +++-- > src/glsl/ir_hv_accept.cpp| 21 --- > src/glsl/ir_rvalue_visitor.cpp | 37 + > src/glsl/ir_rvalue_visitor.h | 6 + > src/glsl/lower_output_reads.cpp | 4 +-- > src/glsl/lower_packed_varyings.cpp | 4 +-- > src/glsl/opt_dead_code_local.cpp | 2 +- > 10 files changed, 178 insertions(+), 38 deletions(-) > > diff --git a/src/glsl/builtin_functions.cpp b/src/glsl/builtin_functions.cpp > index f9f0686..07a0722 100644 > --- a/src/glsl/builtin_functions.cpp > +++ b/src/glsl/builtin_functions.cpp > @@ -359,6 +359,12 @@ shader_image_load_store(const _mesa_glsl_parse_state > *state) > state->ARB_shader_image_load_store_enable); > } > > +static bool > +gs_streams(const _mesa_glsl_parse_state *state) > +{ > + return gpu_shader5(state) && gs_only(state); > +} > + > /** @} */ > > > /**/ > @@ -594,6 +600,10 @@ private: > > B0(EmitVertex) > B0(EndPrimitive) > + ir_function_signature *_EmitStreamVertex(builtin_available_predicate > avail, > +const glsl_type *stream_type); > + ir_function_signature *_EndStreamPrimitive(builtin_available_predicate > avail, > + const glsl_type *stream_type); > > B2(textureQueryLod); > B1(textureQueryLevels); > @@ -1708,6 +1718,14 @@ builtin_builder::create_builtins() > > add_function("EmitVertex", _EmitVertex(), NULL); > add_function("EndPrimitive", _EndPrimitive(), NULL); > + add_function("EmitStreamVertex", > +_EmitStreamVertex(gs_streams, glsl_type::uint_type), > +_EmitStreamVertex(gs_streams, glsl_type::int_type), > +NULL); > + add_function("EndStreamPrimitive", > +_EndStreamPrimitive(gs_streams, glsl_type::uint_type), > +_EndStreamPrimitive(gs_streams, glsl_type::int_type), > +NULL); > > add_function("textureQueryLOD", > _textureQueryLod(glsl_type::sampler1D_type, > glsl_type::float_type), > @@ -3872,7 +3890,22 @@ builtin_builder::_EmitVertex() > { > MAKE_SIG(glsl_type::void_type, gs_only, 0); > > - body.emit(new(mem_ctx) ir_emit_vertex()); > + ir_rvalue *stream = new(mem_ctx) ir_constant(0, 1); > + body.emit(new(mem_ctx) ir_emit_vertex(stream)); > + > + return sig; > +} > + > +ir_function_signature * > +builtin_builder::_EmitStreamVertex(builtin_available_predicate avail, > + const glsl_type *stream_type) > +{ Please add a spec quotation for this. I had to go look it up to be sure ir_var_const_in was correct. /* Section 8.12 (Geometry Shader Functions) of the OpenGL 4.0 spec says: * * "Completes the current output primitive on stream stream and starts * a new one. The argument to stream must be a constant integral * expression." */ > + ir_variable *stream = > + new(mem_ctx) ir_variable(stream_type, "stream", ir_var_const_in); > + > + MAKE_SIG(glsl_type::void_type, avail, 1, stream); > + > + body.emit(new(mem_ctx) ir_emit_vertex(var_ref(stream))); > > return sig; > } > @@ -3882,7 +3915,22 @@ builtin_builder::_EndPrimitive() > { > MAKE_SIG(glsl_type::void_type, gs_only, 0); > > - body.emit(new(mem_ctx) ir_end_primitive()); > + ir_rvalue *stream = new(mem_ctx) ir_constant(0, 1); > + body.emit(new(mem_ctx) ir_end_primitive(stream)); > + > + return sig; > +} > + > +ir_function_signature * > +builtin_builder::_EndStreamPrimitive(builtin_available_predicate avail, > + const glsl_type *stream_type) > +{ Same spec reference here. > + ir_variable *stream = > + new(mem_ctx) ir_variable(stream_type, "stream", ir_var_const_in); > + > + MAKE_SIG(glsl_type::void_type, avail, 1, stream); > + > + body.emit(new(mem_ctx) ir_end_primitive(var_ref(stream))); > > return sig; > } > diff --git a/src/glsl/ir.h b/src/glsl/ir.h > index dbbabb5..ea5ba27 100644 > --- a/src/glsl/ir.h > +++ b/src/glsl/ir.h > @@ -2159,9 +2159,11 @@ private: > */ > class ir_emit_vertex : public ir_instruction { > public: > - ir_emit_vertex
Re: [Mesa-dev] [PATCH v2 09/23] glsl: Store info about geometry shaders that emit vertices to non-zero streams.
On 06/18/2014 02:51 AM, Iago Toral Quiroga wrote: > On Intel hardware when a geometry shader outputs GL_POINTS primitives we > only need to emit vertex control bits if it emits vertices to non-zero > streams, so use a flag to track this. > > This flag will be set to TRUE when a geometry shader calls EmitStreamVertex() > or EndStreamPrimitive() with a non-zero stream parameter in a later patch. > --- > src/mesa/main/mtypes.h | 2 ++ > src/mesa/main/shaderapi.c | 1 + > src/mesa/main/shaderobj.c | 1 + > src/mesa/program/program.c | 1 + > 4 files changed, 5 insertions(+) > > diff --git a/src/mesa/main/mtypes.h b/src/mesa/main/mtypes.h > index f45dde4..5bc710e 100644 > --- a/src/mesa/main/mtypes.h > +++ b/src/mesa/main/mtypes.h > @@ -2178,6 +2178,7 @@ struct gl_geometry_program > GL_TRIANGLES, or GL_TRIANGLES_ADJACENCY_ARB */ > GLenum OutputType; /**< GL_POINTS, GL_LINE_STRIP or GL_TRIANGLE_STRIP */ > GLboolean UsesEndPrimitive; > + GLboolean UsesStreams; For things that are not visible to the GL API, we've been trying to transition away from GL types. Unless Brian or Chris object, I think I'd rather have this be bool (and true / false below). > }; > > > @@ -2681,6 +2682,7 @@ struct gl_shader_program >GLuint ClipDistanceArraySize; /**< Size of the gl_ClipDistance array, > or > 0 if not present. */ >GLboolean UsesEndPrimitive; > + GLboolean UsesStreams; > } Geom; > > /** Vertex shader state */ > diff --git a/src/mesa/main/shaderapi.c b/src/mesa/main/shaderapi.c > index 2ec2444..2bbef35 100644 > --- a/src/mesa/main/shaderapi.c > +++ b/src/mesa/main/shaderapi.c > @@ -1888,6 +1888,7 @@ _mesa_copy_linked_program_data(gl_shader_stage type, >dst_gp->OutputType = src->Geom.OutputType; >dst->UsesClipDistanceOut = src->Geom.UsesClipDistance; >dst_gp->UsesEndPrimitive = src->Geom.UsesEndPrimitive; > + dst_gp->UsesStreams = src->Geom.UsesStreams; > } >break; > case MESA_SHADER_FRAGMENT: { > diff --git a/src/mesa/main/shaderobj.c b/src/mesa/main/shaderobj.c > index b0f0bfa..03db862 100644 > --- a/src/mesa/main/shaderobj.c > +++ b/src/mesa/main/shaderobj.c > @@ -248,6 +248,7 @@ _mesa_init_shader_program(struct gl_context *ctx, struct > gl_shader_program *prog > prog->Geom.VerticesOut = 0; > prog->Geom.InputType = GL_TRIANGLES; > prog->Geom.OutputType = GL_TRIANGLE_STRIP; > + prog->Geom.UsesStreams = GL_FALSE; > > prog->TransformFeedback.BufferMode = GL_INTERLEAVED_ATTRIBS; > > diff --git a/src/mesa/program/program.c b/src/mesa/program/program.c > index b7332fc..1263cea 100644 > --- a/src/mesa/program/program.c > +++ b/src/mesa/program/program.c > @@ -552,6 +552,7 @@ _mesa_clone_program(struct gl_context *ctx, const struct > gl_program *prog) > gpc->InputType = gp->InputType; > gpc->Invocations = gp->Invocations; > gpc->OutputType = gp->OutputType; > + gpc->UsesStreams = gp->UsesStreams; >} >break; > default: > ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH v2 05/23] glsl: Fail to link if inter-stage input/outputs are not assigned to stream 0
Reviewed-by: Ian Romanick On 06/18/2014 02:51 AM, Iago Toral Quiroga wrote: > Outputs that are linked to inputs in the next stage must be output to stream > 0, > otherwise we should fail to link. > --- > src/glsl/link_varyings.cpp | 8 > 1 file changed, 8 insertions(+) > > diff --git a/src/glsl/link_varyings.cpp b/src/glsl/link_varyings.cpp > index 9725a43..3b20594 100644 > --- a/src/glsl/link_varyings.cpp > +++ b/src/glsl/link_varyings.cpp > @@ -1345,6 +1345,14 @@ assign_varying_locations(struct gl_context *ctx, > if (input_var || (prog->SeparateShader && consumer == NULL)) { > matches.record(output_var, input_var); > } > + > + /* Only stream 0 outputs can be consumed in the next stage */ > + if (input_var && output_var->data.stream != 0) { > +linker_error(prog, "output %s is assigned to stream=%d but " > + "is linked to an input, which requires stream=0", > + output_var->name, output_var->data.stream); > +return false; > + } >} > } else { >/* If there's no producer stage, then this must be a separable program. > ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH v2 04/23] glsl: Assign GLSL StreamIds to transform feedback outputs.
On 06/18/2014 02:51 AM, Iago Toral Quiroga wrote: > Inter-shader outputs must be on stream 0, which is the default. > --- > src/glsl/link_varyings.cpp | 12 +--- > src/glsl/link_varyings.h | 7 +++ > 2 files changed, 16 insertions(+), 3 deletions(-) > > diff --git a/src/glsl/link_varyings.cpp b/src/glsl/link_varyings.cpp > index f765d37..9725a43 100644 > --- a/src/glsl/link_varyings.cpp > +++ b/src/glsl/link_varyings.cpp > @@ -291,6 +291,7 @@ tfeedback_decl::init(struct gl_context *ctx, const void > *mem_ctx, > this->skip_components = 0; > this->next_buffer_separator = false; > this->matched_candidate = NULL; > + this->stream_id = 0; > > if (ctx->Extensions.ARB_transform_feedback3) { >/* Parse gl_NextBuffer. */ > @@ -355,8 +356,8 @@ tfeedback_decl::is_same(const tfeedback_decl &x, const > tfeedback_decl &y) > > > /** > - * Assign a location for this tfeedback_decl object based on the transform > - * feedback candidate found by find_candidate. > + * Assign a location and stream ID for this tfeedback_decl object based on > the > + * transform feedback candidate found by find_candidate. > * > * If an error occurs, the error is reported through linker_error() and false > * is returned. > @@ -437,6 +438,11 @@ tfeedback_decl::assign_location(struct gl_context *ctx, >return false; > } > > + /* Only transform feedback varyings can be assigned to non-zero streams, > +* so assign the stream id here. > +*/ > + this->stream_id = this->matched_candidate->toplevel_var->data.stream; > + > return true; > } > > @@ -495,7 +501,7 @@ tfeedback_decl::store(struct gl_context *ctx, struct > gl_shader_program *prog, >info->Outputs[info->NumOutputs].ComponentOffset = location_frac; >info->Outputs[info->NumOutputs].OutputRegister = location; >info->Outputs[info->NumOutputs].NumComponents = output_size; > - info->Outputs[info->NumOutputs].StreamId = 0; > + info->Outputs[info->NumOutputs].StreamId = stream_id; >info->Outputs[info->NumOutputs].OutputBuffer = buffer; >info->Outputs[info->NumOutputs].DstOffset = info->BufferStride[buffer]; >++info->NumOutputs; > diff --git a/src/glsl/link_varyings.h b/src/glsl/link_varyings.h > index 6fa2681..4e485c3 100644 > --- a/src/glsl/link_varyings.h > +++ b/src/glsl/link_varyings.h > @@ -210,6 +210,13 @@ private: > * data structure that was found. Otherwise NULL. > */ > const tfeedback_candidate *matched_candidate; > + > + /** > +* StreamId assigned to this varying (defaults to 0). Can only be set to > +* values other than 0 in geometry shaders that use the stream layout > +* modifier. Accepted values must be in the range [0, MAX_VERTEX_STREAMS]. MAX_VERTEX_STREAMS-1, right? > +*/ > + unsigned stream_id; > }; > > > ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH v2 03/23] i965: Enable transform feedback for streams > 0
Reviewed-by: Ian Romanick On 06/18/2014 02:51 AM, Iago Toral Quiroga wrote: > Configure hardware to read vertex data for all streams and have all streams > write their varyings to the corresponsing output buffers. > --- > src/mesa/drivers/dri/i965/gen7_sol_state.c | 67 > +++--- > 1 file changed, 43 insertions(+), 24 deletions(-) > > diff --git a/src/mesa/drivers/dri/i965/gen7_sol_state.c > b/src/mesa/drivers/dri/i965/gen7_sol_state.c > index 8e554af..11b2e2e 100644 > --- a/src/mesa/drivers/dri/i965/gen7_sol_state.c > +++ b/src/mesa/drivers/dri/i965/gen7_sol_state.c > @@ -104,12 +104,14 @@ gen7_upload_3dstate_so_decl_list(struct brw_context > *brw, >ctx->TransformFeedback.CurrentObject; > const struct gl_transform_feedback_info *linked_xfb_info = >&xfb_obj->shader_program->LinkedTransformFeedback; > - uint16_t so_decl[128]; > - int buffer_mask = 0; > - int next_offset[4] = {0, 0, 0, 0}; > - int decls = 0; > + uint16_t so_decl[MAX_VERTEX_STREAMS][128]; > + int buffer_mask[MAX_VERTEX_STREAMS] = {0, 0, 0, 0}; > + int next_offset[MAX_VERTEX_STREAMS] = {0, 0, 0, 0}; > + int decls[MAX_VERTEX_STREAMS] = {0, 0, 0, 0}; > + int max_decls = 0; > + STATIC_ASSERT(ARRAY_SIZE(so_decl[0]) >= MAX_PROGRAM_OUTPUTS); > > - STATIC_ASSERT(ARRAY_SIZE(so_decl) >= MAX_PROGRAM_OUTPUTS); > + memset(so_decl, 0, sizeof(so_decl)); > > /* Construct the list of SO_DECLs to be emitted. The formatting of the > * command is feels strange -- each dword pair contains a SO_DECL per > stream. > @@ -120,6 +122,9 @@ gen7_upload_3dstate_so_decl_list(struct brw_context *brw, >int varying = linked_xfb_info->Outputs[i].OutputRegister; >const unsigned components = linked_xfb_info->Outputs[i].NumComponents; >unsigned component_mask = (1 << components) - 1; > + unsigned stream_id = linked_xfb_info->Outputs[i].StreamId; > + > + assert(stream_id < MAX_VERTEX_STREAMS); > >/* gl_PointSize is stored in VARYING_SLOT_PSIZ.w > * gl_Layer is stored in VARYING_SLOT_PSIZ.y > @@ -138,7 +143,7 @@ gen7_upload_3dstate_so_decl_list(struct brw_context *brw, > component_mask <<= linked_xfb_info->Outputs[i].ComponentOffset; >} > > - buffer_mask |= 1 << buffer; > + buffer_mask[stream_id] |= 1 << buffer; > >decl |= buffer << SO_DECL_OUTPUT_BUFFER_SLOT_SHIFT; >if (varying == VARYING_SLOT_LAYER || varying == VARYING_SLOT_VIEWPORT) > { > @@ -167,35 +172,41 @@ gen7_upload_3dstate_so_decl_list(struct brw_context > *brw, >next_offset[buffer] += skip_components; > >while (skip_components >= 4) { > - so_decl[decls++] = SO_DECL_HOLE_FLAG | 0xf; > + so_decl[stream_id][decls[stream_id]++] = SO_DECL_HOLE_FLAG | 0xf; > skip_components -= 4; >} >if (skip_components > 0) > - so_decl[decls++] = SO_DECL_HOLE_FLAG | ((1 << skip_components) - 1); > + so_decl[stream_id][decls[stream_id]++] = > +SO_DECL_HOLE_FLAG | ((1 << skip_components) - 1); > >assert(linked_xfb_info->Outputs[i].DstOffset == next_offset[buffer]); > >next_offset[buffer] += components; > > - so_decl[decls++] = decl; > + so_decl[stream_id][decls[stream_id]++] = decl; > + > + if (decls[stream_id] > max_decls) > + max_decls = decls[stream_id]; > } > > - BEGIN_BATCH(decls * 2 + 3); > - OUT_BATCH(_3DSTATE_SO_DECL_LIST << 16 | (decls * 2 + 1)); > + BEGIN_BATCH(max_decls * 2 + 3); > + OUT_BATCH(_3DSTATE_SO_DECL_LIST << 16 | (max_decls * 2 + 1)); > > - OUT_BATCH((buffer_mask << SO_STREAM_TO_BUFFER_SELECTS_0_SHIFT) | > - (0 << SO_STREAM_TO_BUFFER_SELECTS_1_SHIFT) | > - (0 << SO_STREAM_TO_BUFFER_SELECTS_2_SHIFT) | > - (0 << SO_STREAM_TO_BUFFER_SELECTS_3_SHIFT)); > + OUT_BATCH((buffer_mask[0] << SO_STREAM_TO_BUFFER_SELECTS_0_SHIFT) | > + (buffer_mask[1] << SO_STREAM_TO_BUFFER_SELECTS_1_SHIFT) | > + (buffer_mask[2] << SO_STREAM_TO_BUFFER_SELECTS_2_SHIFT) | > + (buffer_mask[3] << SO_STREAM_TO_BUFFER_SELECTS_3_SHIFT)); > > - OUT_BATCH((decls << SO_NUM_ENTRIES_0_SHIFT) | > - (0 << SO_NUM_ENTRIES_1_SHIFT) | > - (0 << SO_NUM_ENTRIES_2_SHIFT) | > - (0 << SO_NUM_ENTRIES_3_SHIFT)); > + OUT_BATCH((decls[0] << SO_NUM_ENTRIES_0_SHIFT) | > + (decls[1] << SO_NUM_ENTRIES_1_SHIFT) | > + (decls[2] << SO_NUM_ENTRIES_2_SHIFT) | > + (decls[3] << SO_NUM_ENTRIES_3_SHIFT)); > > - for (int i = 0; i < decls; i++) { > - OUT_BATCH(so_decl[i]); > - OUT_BATCH(0); > + for (int i = 0; i < max_decls; i++) { > + /* Stream 1 | Stream 0 */ > + OUT_BATCH(((uint32_t) so_decl[1][i]) << 16 | so_decl[0][i]); > + /* Stream 3 | Stream 2 */ > + OUT_BATCH(((uint32_t) so_decl[3][i]) << 16 | so_decl[2][i]); > } > > ADVANCE_BATCH(); > @@ -235,8 +246,16 @@ upload_3dstate_
Re: [Mesa-dev] [PATCH v2 02/23] mesa: add StreamId information to transform feedback outputs.
Reviewed-by: Ian Romanick On 06/18/2014 02:51 AM, Iago Toral Quiroga wrote: > For now initialized to the default stream 0. > --- > src/glsl/link_varyings.cpp | 1 + > src/mesa/main/mtypes.h | 1 + > 2 files changed, 2 insertions(+) > > diff --git a/src/glsl/link_varyings.cpp b/src/glsl/link_varyings.cpp > index 6863298..f765d37 100644 > --- a/src/glsl/link_varyings.cpp > +++ b/src/glsl/link_varyings.cpp > @@ -495,6 +495,7 @@ tfeedback_decl::store(struct gl_context *ctx, struct > gl_shader_program *prog, >info->Outputs[info->NumOutputs].ComponentOffset = location_frac; >info->Outputs[info->NumOutputs].OutputRegister = location; >info->Outputs[info->NumOutputs].NumComponents = output_size; > + info->Outputs[info->NumOutputs].StreamId = 0; >info->Outputs[info->NumOutputs].OutputBuffer = buffer; >info->Outputs[info->NumOutputs].DstOffset = info->BufferStride[buffer]; >++info->NumOutputs; > diff --git a/src/mesa/main/mtypes.h b/src/mesa/main/mtypes.h > index 8b7ee30..f45dde4 100644 > --- a/src/mesa/main/mtypes.h > +++ b/src/mesa/main/mtypes.h > @@ -1791,6 +1791,7 @@ struct gl_transform_feedback_output > unsigned OutputRegister; > unsigned OutputBuffer; > unsigned NumComponents; > + unsigned StreamId; > > /** offset (in DWORDs) of this output within the interleaved structure */ > unsigned DstOffset; > ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] Broadcom VC4 project
It would be more precise to say that i915 uses the Draw module, which can do these per-vertex operations on the CPU (some of them use LLVM): - vertex fetching - vertex shader - geometry shader - culling and clipping - viewport transformation - translates all primitives to either point, line, or triangle lists (if you can't do quads) - line stippling by breaking lines into shorter lines - converts triangles to points or lines if polygon mode is not "fill" - generates point sprite coordinates - ... and a bunch of other things I don't remember A lot of these are optional. It also optionally supports these per-fragment operations by plugging itself into the driver, but you have to use the Draw module for vertex processing: - polygon stippling - line smoothing - point smoothing A full software fallback by switching between the hw driver and llvmpipe at run time would be possible if somebody did it. ;) The failover module was one attempt at it, but it was never finished. I briefly played with it during r300g development and it did work for glxgears with some modifications: http://cgit.freedesktop.org/mesa/mesa/commit/?id=0a22fb6a5d519ca26d423fa4a9404697dca56cbf Marek On Wed, Jun 18, 2014 at 8:17 PM, Stéphane Marchesin wrote: > On Wed, Jun 18, 2014 at 10:47 AM, Emil Velikov > wrote: >> On 18/06/14 08:28, Eric Anholt wrote: >>> To those who have been curious what I was up to: I wasn't sure when I >>> could announce my new projecct, I just got the ack day before yesterday, >>> and I've been a little busy. >>> >>> I'm working toward building a Mesa driver for Broadcom VC4 (aka >>> Raspberry Pi). At the moment I'm still bringing up the DRM side of >>> things, but I hope to be doing bits of userspace in the next few days. >>> Current status is I have a skeleton DRM KMS driver that's going to talk >>> to the firmware for modesetting, and now I'm starting on the execution >>> side of things. >>> >>> I'm probably going to start out doing a gallium driver for simplicity, >>> to avoid having to do all the DRI crap we've got in brw_context.c and >>> texture miptree validation and getting user data into VBOs and all that >>> other almost-boilerplate. Long term I may end up switching to classic >>> so I can get swrast fallbacks and the ability to implement single-copy >>> manually-tiled TexImage uploads like. For now I want to get to drawing >>> triangles as soon as I can. >>> >> Hi Eric, >> >> IIRC the i915 driver already uses softpipe/llvmpipe as a fallback for some >> operations. > > It doesn't, that experiment went away a long time ago. Well, unless > you consider "the vertex pipeline" as one of those operations, in > which case yes it's always using a fallback to llvmpipe :) > > Stéphane > ___ > mesa-dev mailing list > mesa-dev@lists.freedesktop.org > http://lists.freedesktop.org/mailman/listinfo/mesa-dev ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 00/23] Megadrivers galore
On 18/06/14 14:09, Tom Stellard wrote: > On Tue, Jun 17, 2014 at 07:38:16PM +0100, Emil Velikov wrote: >> Hi all, >> >> As a follow up to the static/shared pipe-drivers series here is the final >> series (if anyone is interested I may take a look at egl + opencl) of >> refactoring the gallium dri targets into a single library/provider. >> > > Hi Emil, > > One common problem I run into when using the pipe drivers is if there > is an undefined symbol in the pipe_*.so then it will silently fail to > load, and with OpenCL for example it won't report any devices which can > be confusing for users. I would recommend adding some error handling to > util_dlopen, so that it prints an error message when it fails to load a > shared object. > > Other than that, it's hard to review a series like this, but I'll > give it an: > Hmm I'm pretty sure that should not happen as of commit d187a150d45 (mesa 10.2) which adds a few linker flags that moves the issue to build time. Pretty much everything (galliumwise) other than the dri targets should have its symbols resolved, (and export only the required ones) with mesa 10.2. Although I do plan on adding a "WARNING: YOU'RE USING PIPE-DRIVERS, THINGS WILL BREAK" etc. kind of warning, as I do make it possible (hidden behind a key) to use them with vdpau, omx, dri More on that at 11. Thanks for chipping in. -Emil > Acked-by: Tom Stellard > > Since I like the general approach. > > -Tom > >> In a nutshell: >> - Convert one target per patch. >> - Merge the drm and sw backends of our dri state-tracker. >> - Adds __driDriverGetExtensions_$drivername symbol for each driver. >> - Megadrivers. >> - *** >> - Profit. >> >> Things works like a charm for nouveau and swrast, and testing on other >> platforms is greatly appreciated. >> >> The complete series can be found in the static-or-shared-pipe-drivers-v2 >> branch at my github repo. >> >> I would like to get this reviewed/pushed over the next month, although >> that depends on the number of bugs that come up with the previous batch. >> >> As always comments, suggestions and flame is welcome. >> >> Cheers, >> Emil >> >> Emil Velikov (23): >> targets/dri-swrast: use drm aware dricommon when building more than >> swrast >> st/dri: Allow separate dri-targets >> st/dri/drm: Add a second libdridrm library >> targets/dri-nouveau: Convert to static/shared pipe-drivers >> targets/(r300|r600|radeonsi)/dri: Convert to static/shared pipe-drivers >> targets/dri-freedreno: Convert to static/shared pipe-drivers >> targets/dri-i915: Convert to static/shared pipe-drivers >> targets/dri-ilo: Convert to static/shared pipe-driver >> targets/dri-vmwgfx: Convert to static/shared pipe-drivers >> st/dri: Remove the old libdridrm library >> targets/dri: Add __driDriverGetExtensions_nouveau symbol >> targets/dri: Add __driDriverGetExtensions_(r300|r600|radeonsi) symbols >> targets/dri: Add __driDriverGetExtensions_freedreno symbol >> targets/dri: Add __driDriverGetExtensions_i915 symbol >> targets/dri: Add __driDriverGetExtensions_i965 symbol >> targets/dri: Add __driDriverGetExtensions_vmwgfx >> targets/dri: update scons build to handle >> __driDriverGetExtensions_vmwgfx >> targets/dri: cleanup conversion leftovers >> st/dri/drm: remove __driDriverExtensions and driDriverAPI >> scons: build and use a single dri_common library >> targets/dri-swrast: convert to gallium megadrivers :) >> st/dri: merge dri/drm and dri/sw backends >> targets/dri-swrast: Convert to static/shared pipe-driver >> >> 61 files changed, 536 insertions(+), 1375 deletions(-) >> >> ___ >> mesa-dev mailing list >> mesa-dev@lists.freedesktop.org >> http://lists.freedesktop.org/mailman/listinfo/mesa-dev ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [Bug 73846] [llvmpipe] lp_test_format fails with llvm-3.5svn >= r199602
https://bugs.freedesktop.org/show_bug.cgi?id=73846 Roland Scheidegger changed: What|Removed |Added Status|REOPENED|RESOLVED Resolution|--- |FIXED --- Comment #5 from Roland Scheidegger --- (In reply to comment #4) > mesa: 56335b44417bc3d49625f9637e2b95457f522ad2 (master 10.3.0-devel) > > lp_test_format is still failing with llvm-3.5.0svn. > > LLVM ERROR: Cannot select: 0x371c9b0: v4i32 = X86ISD::UMIN 0x3747978, > 0x371bf60 [ORD=5] [ID=33] > 0x3747978: v4i32 = X86ISD::MOVSS 0x3747870, 0x371cab8 [ORD=4] [ID=32] > 0x3747870: v4i32 = bitcast 0x3747660 [ORD=3] [ID=30] > 0x3747660: v2i64 = scalar_to_vector 0x371f8d8 [ORD=3] [ID=28] > 0x371f8d8: i64,ch = load 0x3738e80, 0x36f7008, > 0x36f7638 [ORD=3] [ID=26] That is due to an entirely different reason however, in particular looks like https://bugs.freedesktop.org/show_bug.cgi?id=77493 (it works as long as avx is enabled here), so it should be tracked there. -- You are receiving this mail because: You are the assignee for the bug. ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] Broadcom VC4 project
On Wed, Jun 18, 2014 at 10:47 AM, Emil Velikov wrote: > On 18/06/14 08:28, Eric Anholt wrote: >> To those who have been curious what I was up to: I wasn't sure when I >> could announce my new projecct, I just got the ack day before yesterday, >> and I've been a little busy. >> >> I'm working toward building a Mesa driver for Broadcom VC4 (aka >> Raspberry Pi). At the moment I'm still bringing up the DRM side of >> things, but I hope to be doing bits of userspace in the next few days. >> Current status is I have a skeleton DRM KMS driver that's going to talk >> to the firmware for modesetting, and now I'm starting on the execution >> side of things. >> >> I'm probably going to start out doing a gallium driver for simplicity, >> to avoid having to do all the DRI crap we've got in brw_context.c and >> texture miptree validation and getting user data into VBOs and all that >> other almost-boilerplate. Long term I may end up switching to classic >> so I can get swrast fallbacks and the ability to implement single-copy >> manually-tiled TexImage uploads like. For now I want to get to drawing >> triangles as soon as I can. >> > Hi Eric, > > IIRC the i915 driver already uses softpipe/llvmpipe as a fallback for some > operations. It doesn't, that experiment went away a long time ago. Well, unless you consider "the vertex pipeline" as one of those operations, in which case yes it's always using a fallback to llvmpipe :) Stéphane ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH v2 01/23] glsl: Add parsing support for multi-stream output in geometry shaders.
On 06/18/2014 02:51 AM, Iago Toral Quiroga wrote: > From: Samuel Iglesias Gonsalvez > > This implements parsing requirements for multi-stream support in > geometry shaders as defined in ARB_gpu_shader5. > > Signed-off-by: Samuel Iglesias Gonsalvez A few minor nits below. With those fixed, this patch is Reviewed-by: Ian Romanick > --- > src/glsl/ast.h| 5 + > src/glsl/ast_to_hir.cpp | 17 +++ > src/glsl/ast_type.cpp | 39 +- > src/glsl/glsl_parser.yy | 49 > +++ > src/glsl/glsl_parser_extras.h | 18 > src/glsl/glsl_types.h | 5 + > src/glsl/ir.h | 5 + > 7 files changed, 137 insertions(+), 1 deletion(-) > > diff --git a/src/glsl/ast.h b/src/glsl/ast.h > index 56e7bd8..c8a3394 100644 > --- a/src/glsl/ast.h > +++ b/src/glsl/ast.h > @@ -509,6 +509,8 @@ struct ast_type_qualifier { > /** \name Layout qualifiers for GL_ARB_gpu_shader5 */ > /** \{ */ > unsigned invocations:1; > + unsigned stream:1; /* Has stream value assigned */ > + unsigned explicit_stream:1; /* stream value assigned explicitly by > shader code */ End-of-line comments should begin with /**< for Doxygen. > /** \} */ >} >/** \brief Set of flags, accessed by name. */ > @@ -542,6 +544,9 @@ struct ast_type_qualifier { > /** Maximum output vertices in GLSL 1.50 geometry shaders. */ > int max_vertices; > > + /** Stream in GLSL 1.50 geometry shaders. */ > + unsigned stream; > + > /** Input or output primitive type in GLSL 1.50 geometry shaders */ > GLenum prim_type; > > diff --git a/src/glsl/ast_to_hir.cpp b/src/glsl/ast_to_hir.cpp > index 132a955..c1bc0f9 100644 > --- a/src/glsl/ast_to_hir.cpp > +++ b/src/glsl/ast_to_hir.cpp > @@ -2461,6 +2461,11 @@ apply_type_qualifier_to_variable(const struct > ast_type_qualifier *qual, > if (qual->flags.q.sample) >var->data.sample = 1; > > + if (state->stage == MESA_SHADER_GEOMETRY && > + qual->flags.q.out && qual->flags.q.stream) { > + var->data.stream = qual->stream; > + } > + > if (qual->flags.q.attribute && state->stage != MESA_SHADER_VERTEX) { >var->type = glsl_type::error_type; >_mesa_glsl_error(loc, state, > @@ -5092,6 +5097,8 @@ ast_process_structure_or_interface_block(exec_list > *instructions, > interpret_interpolation_qualifier(qual, var_mode, state, &loc); > fields[i].centroid = qual->flags.q.centroid ? 1 : 0; > fields[i].sample = qual->flags.q.sample ? 1 : 0; Add a blank link here. > + /* Only save explicitly defined streams in block's field */ And put the */ on it's own line. > + fields[i].stream = qual->flags.q.explicit_stream ? qual->stream : > -1; > > if (qual->flags.q.row_major || qual->flags.q.column_major) { > if (!qual->flags.q.uniform) { > @@ -5533,6 +5540,16 @@ ast_interface_block::hir(exec_list *instructions, > var->data.sample = fields[i].sample; > var->init_interface_type(block_type); > > + if (fields[i].stream != -1 && > + ((unsigned)fields[i].stream) != this->layout.stream) { > +_mesa_glsl_error(&loc, state, > + "stream layout qualifier on " > + "interface block member `%s' does not match " > + "the interface block (%d and %d)", In other places we generally say "%d vs %d". > + var->name, fields[i].stream, > this->layout.stream); > + } Blank line here. > + var->data.stream = this->layout.stream; > + > if (redeclaring_per_vertex) { > ir_variable *earlier = > get_variable_being_redeclared(var, loc, state, > diff --git a/src/glsl/ast_type.cpp b/src/glsl/ast_type.cpp > index 77053d5..daa3594 100644 > --- a/src/glsl/ast_type.cpp > +++ b/src/glsl/ast_type.cpp > @@ -125,9 +125,13 @@ ast_type_qualifier::merge_qualifier(YYLTYPE *loc, > /* Uniform block layout qualifiers get to overwrite each > * other (rightmost having priority), while all other > * qualifiers currently don't allow duplicates. > +* > +* Geometry shaders can have several layout qualifiers > +* assigning different stream values. > */ > > - if ((this->flags.i & q.flags.i & ~(ubo_mat_mask.flags.i | > + if ((state->stage != MESA_SHADER_GEOMETRY) && > + (this->flags.i & q.flags.i & ~(ubo_mat_mask.flags.i | > ubo_layout_mask.flags.i | >ubo_binding_mask.flags.i)) != 0) { >_mesa_glsl_error(loc, state, > @@ -154,6 +158,39 @@ ast_type_qualifier::merge_qualifier(YYLTYPE *loc, >this->max_vertices = q.max_vertices; > } > > + if (state->stage == MESA_SHADER_GEOMETRY && > +
Re: [Mesa-dev] Static/shared pipe-drivers (was megadriver/pipe-loader-to-all)
On 18/06/14 15:07, Chia-I Wu wrote: > On Wed, Jun 18, 2014 at 8:14 PM, Emil Velikov > wrote: [...] >> Interesting, I do not see this as an attempt to define an API, but to >> cleanup all the mayhem that our targets currently are: >> * Cleanup the build system - drop symlinks, including the same source >> files from different locations. >> * Make targets less error prone by using static pipe-drivers by >> default. Shared ones we lack versioning and ... are a big can of >> worms. >> * Minimize all the target.c duplication across each target. Makefiles >> are in similar boat. > The reason that the state trackers can manage statically linked pipe > drivers, or the duplications in target.c can be killed is because of > the introduction of an API (inline_drm_helper.h), or if you prefer, > helper functions. > Seems like my definition of "API" slightly differs :) /me drops down the EE books and picks up a CS one. > Either way, a set of functions are defined to help manage statically > linked pipe drivers. State trackers tend to do > > #ifdef GALLIUM_STATIC_TARGETS > /* use inline_drm_helper.h */ > ... > #else > /* use pipe_loader.h */ > ... > #endif > > IMHO, we should be able to define a single API, or a single set of > helper functions, to manage pipe drivers, no matter they are > statically linked or dynamically loaded. Note that > inline_drm_helper.h is not stateless: dd_create_screen must be called > first to initialize a static variable. It may even be possible to > extend pipe loader for the statically linked case. > I.e. move the "static or shared pipe-drivers" decision to the pipe-loader ? Currently we need the latter due to the opencl target, although with that sorted it sounds doable. >> * Allow people to use the unstable pipe-drivers if they are really >> short on size and know what they are doing. >> >>> The lack of such API previously led us to "targets", where each target >>> knows how to load a specific driver. With your changes, state >>> trackers that need to work with pipe drivers have a way to do so. As >>> a result, files such as >>> >>> dri/target.c, >>> xa/target.c, >>> xvmc/target.c, >>> vdpau/target.c, and >>> omx/target.c >>> >>> become quite dummy and redundant. Do you see a way to get rid of >>> "targets" entirely? >>> >> Indeed if/when an API comes around these targets may become redundant. > Well, with inline_drm_helper.h and the like, there is already > basically nothing in those target.c. I do not even get why we need > those #include's in them :) As things are right now, for most state > trackers (dri, gbm, omx, vdpau, xa, and xvmc), there is a matching > target, and the only things in the target are rules to build it. > > I am not suggesting you should update the series to kill the targets. > As I said, I believe it takes a lot of time to get things to where > they are now, and I am fine to see any of this resolved later. > Cheers, I just wanted to have a clear picture what you have in mind before I start working on it. -Emil ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [Bug 73846] [llvmpipe] lp_test_format fails with llvm-3.5svn >= r199602
https://bugs.freedesktop.org/show_bug.cgi?id=73846 Vinson Lee changed: What|Removed |Added Status|RESOLVED|REOPENED Resolution|FIXED |--- --- Comment #4 from Vinson Lee --- mesa: 56335b44417bc3d49625f9637e2b95457f522ad2 (master 10.3.0-devel) lp_test_format is still failing with llvm-3.5.0svn. $ ./build/linux-x86_64-debug/bin/lp_test_format Testing PIPE_FORMAT_B8G8R8A8_UNORM (float) ... Testing PIPE_FORMAT_B8G8R8A8_UNORM (unorm8) ... Testing PIPE_FORMAT_B8G8R8X8_UNORM (float) ... Testing PIPE_FORMAT_B8G8R8X8_UNORM (unorm8) ... Testing PIPE_FORMAT_A8R8G8B8_UNORM (float) ... Testing PIPE_FORMAT_A8R8G8B8_UNORM (unorm8) ... Testing PIPE_FORMAT_X8R8G8B8_UNORM (float) ... Testing PIPE_FORMAT_X8R8G8B8_UNORM (unorm8) ... Testing PIPE_FORMAT_B5G5R5A1_UNORM (float) ... Testing PIPE_FORMAT_B5G5R5A1_UNORM (unorm8) ... Testing PIPE_FORMAT_B4G4R4A4_UNORM (float) ... Testing PIPE_FORMAT_B4G4R4A4_UNORM (unorm8) ... Testing PIPE_FORMAT_B5G6R5_UNORM (float) ... Testing PIPE_FORMAT_B5G6R5_UNORM (unorm8) ... Testing PIPE_FORMAT_R10G10B10A2_UNORM (float) ... Testing PIPE_FORMAT_R10G10B10A2_UNORM (unorm8) ... Testing PIPE_FORMAT_L8_UNORM (float) ... Testing PIPE_FORMAT_L8_UNORM (unorm8) ... Testing PIPE_FORMAT_A8_UNORM (float) ... Testing PIPE_FORMAT_A8_UNORM (unorm8) ... Testing PIPE_FORMAT_I8_UNORM (float) ... Testing PIPE_FORMAT_I8_UNORM (unorm8) ... Testing PIPE_FORMAT_L8A8_UNORM (float) ... Testing PIPE_FORMAT_L8A8_UNORM (unorm8) ... Testing PIPE_FORMAT_L16_UNORM (float) ... Testing PIPE_FORMAT_L16_UNORM (unorm8) ... Testing PIPE_FORMAT_UYVY (float) ... Testing PIPE_FORMAT_UYVY (unorm8) ... Testing PIPE_FORMAT_YUYV (float) ... Testing PIPE_FORMAT_YUYV (unorm8) ... Testing PIPE_FORMAT_R32_FLOAT (float) ... Testing PIPE_FORMAT_R32_FLOAT (unorm8) ... Testing PIPE_FORMAT_R32G32_FLOAT (float) ... Testing PIPE_FORMAT_R32G32_FLOAT (unorm8) ... Testing PIPE_FORMAT_R32G32B32_FLOAT (float) ... Testing PIPE_FORMAT_R32G32B32_FLOAT (unorm8) ... Testing PIPE_FORMAT_R32G32B32A32_FLOAT (float) ... Testing PIPE_FORMAT_R32G32B32A32_FLOAT (unorm8) ... Testing PIPE_FORMAT_R32_UNORM (float) ... Testing PIPE_FORMAT_R32_UNORM (unorm8) ... Testing PIPE_FORMAT_R32G32_UNORM (float) ... Testing PIPE_FORMAT_R32G32_UNORM (unorm8) ... Testing PIPE_FORMAT_R32G32B32_UNORM (float) ... Testing PIPE_FORMAT_R32G32B32_UNORM (unorm8) ... Testing PIPE_FORMAT_R32G32B32A32_UNORM (float) ... Testing PIPE_FORMAT_R32G32B32A32_UNORM (unorm8) ... Testing PIPE_FORMAT_R32_USCALED (float) ... Testing PIPE_FORMAT_R32_USCALED (unorm8) ... Testing PIPE_FORMAT_R32G32_USCALED (float) ... LLVM ERROR: Cannot select: 0x371c9b0: v4i32 = X86ISD::UMIN 0x3747978, 0x371bf60 [ORD=5] [ID=33] 0x3747978: v4i32 = X86ISD::MOVSS 0x3747870, 0x371cab8 [ORD=4] [ID=32] 0x3747870: v4i32 = bitcast 0x3747660 [ORD=3] [ID=30] 0x3747660: v2i64 = scalar_to_vector 0x371f8d8 [ORD=3] [ID=28] 0x371f8d8: i64,ch = load 0x3738e80, 0x36f7008, 0x36f7638 [ORD=3] [ID=26] 0x36f7008: i64,ch = CopyFromReg 0x3738e80, 0x36f6f00 [ORD=1] [ID=23] 0x36f6f00: i64 = Register %vreg1 [ID=2] 0x36f7638: i64 = undef [ID=4] 0x371cab8: v4i32 = X86ISD::PSHUFD 0x3747870, 0x3747a80 [ORD=3] [ID=31] 0x3747870: v4i32 = bitcast 0x3747660 [ORD=3] [ID=30] 0x3747660: v2i64 = scalar_to_vector 0x371f8d8 [ORD=3] [ID=28] 0x371f8d8: i64,ch = load 0x3738e80, 0x36f7008, 0x36f7638 [ORD=3] [ID=26] 0x36f7008: i64,ch = CopyFromReg 0x3738e80, 0x36f6f00 [ORD=1] [ID=23] 0x36f6f00: i64 = Register %vreg1 [ID=2] 0x36f7638: i64 = undef [ID=4] 0x3747a80: i8 = Constant<16> [ID=15] 0x371bf60: v4i32 = bitcast 0x36f7b60 [ORD=5] [ID=29] 0x36f7b60: v2i64,ch = load 0x3738e80, 0x3749db8, 0x36f7638 [ORD=5] [ID=27] 0x3749db8: i64 = X86ISD::Wrapper 0x3749cb0 [ID=25] 0x3749cb0: i64 = TargetConstantPool<<4 x i32> > 0 [ID=16] 0x36f7638: i64 = undef [ID=4] In function: fetch_r32g32_uscaled_unorm8 -- You are receiving this mail because: You are the assignee for the bug. ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [Bug 73846] [llvmpipe] lp_test_format fails with llvm-3.5svn >= r199602
https://bugs.freedesktop.org/show_bug.cgi?id=73846 Roland Scheidegger changed: What|Removed |Added Status|NEW |RESOLVED Resolution|--- |FIXED --- Comment #3 from Roland Scheidegger --- Fixed by 56335b44417bc3d49625f9637e2b95457f522ad2. -- You are receiving this mail because: You are the assignee for the bug. ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] Broadcom VC4 project
On 18/06/14 08:28, Eric Anholt wrote: > To those who have been curious what I was up to: I wasn't sure when I > could announce my new projecct, I just got the ack day before yesterday, > and I've been a little busy. > > I'm working toward building a Mesa driver for Broadcom VC4 (aka > Raspberry Pi). At the moment I'm still bringing up the DRM side of > things, but I hope to be doing bits of userspace in the next few days. > Current status is I have a skeleton DRM KMS driver that's going to talk > to the firmware for modesetting, and now I'm starting on the execution > side of things. > > I'm probably going to start out doing a gallium driver for simplicity, > to avoid having to do all the DRI crap we've got in brw_context.c and > texture miptree validation and getting user data into VBOs and all that > other almost-boilerplate. Long term I may end up switching to classic > so I can get swrast fallbacks and the ability to implement single-copy > manually-tiled TexImage uploads like. For now I want to get to drawing > triangles as soon as I can. > Hi Eric, IIRC the i915 driver already uses softpipe/llvmpipe as a fallback for some operations. Not sure how much coverage it has for what you have in might though :\ -Emil > Major thanks go to Simon Hall, whose port of the 2708 android driver > code drop is what I've been using as a reference for how the 3D engine > commands are submitted. > > > > ___ > mesa-dev mailing list > mesa-dev@lists.freedesktop.org > http://lists.freedesktop.org/mailman/listinfo/mesa-dev > ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCHv2] targets/radeonsi/vdpau: convert to static/shared pipe-drivers
On 18/06/14 11:14, Christian König wrote: > Hi Emil, > > This patch is Reviewed-by: Christian König > > But there is still something looking odd: >> if NEED_RADEON_DRM_WINSYS >> if !HAVE_GALLIUM_R300 >> -if !HAVE_GALLIUM_RADEONSI >> STATIC_TARGET_LIB_DEPS += \ >> $(top_builddir)/src/gallium/winsys/radeon/drm/libradeonwinsys.la >> endif >> endif >> -endif > You add the radeon winsys only when it's not not used for R300 or RADEONSI? > That doesn't sounds right and should probably be fixed in one of the previous > patches in your patchset. > The previous patch adds the radeon winsys only when building vdpau-r600. This one extends the winsys inclusion to handle build requests for vdpau-radeonsi. I.e. the winsys is included only once when either one or both radeon drivers are requested. If we include the winsys twice we causes build breakage. Don't think I see your concern here. Can you please elaborate ? Thanks for having a look at the patches. Shout out as soon as (if) any issues come up :) -Emil > Regards, > Christian. > > Am 17.06.2014 20:01, schrieb Emil Velikov: >> Similar to previous commits, this allows us to minimise some >> of the duplication by compacting all vdpau targets into a >> single library. >> >> v2: >> - Include the radeon winsys only when there is a user for it. >> >> Cc: Christian König >> Signed-off-by: Emil Velikov >> --- >> configure.ac| 3 +- >> install-gallium-links.mk| 3 +- >> src/gallium/Automake.inc| 20 -- >> src/gallium/targets/Makefile.am | 4 -- >> src/gallium/targets/radeonsi/vdpau/Makefile.am | 50 >> - >> src/gallium/targets/radeonsi/vdpau/drm_target.c | 1 - >> src/gallium/targets/vdpau/Makefile.am | 10 - >> 7 files changed, 10 insertions(+), 81 deletions(-) >> delete mode 100644 src/gallium/targets/radeonsi/vdpau/Makefile.am >> delete mode 12 src/gallium/targets/radeonsi/vdpau/drm_target.c >> >> diff --git a/configure.ac b/configure.ac >> index ca70ab4..7ff14ad 100644 >> --- a/configure.ac >> +++ b/configure.ac >> @@ -1964,7 +1964,7 @@ if test -n "$with_gallium_drivers"; then >> GALLIUM_DRIVERS_DIRS="$GALLIUM_DRIVERS_DIRS radeonsi" >> radeon_llvm_check "radeonsi" >> require_egl_drm "radeonsi" >> -gallium_check_st "radeon/drm" "radeonsi/dri" "" "" >> "radeonsi/vdpau" "radeonsi/omx" >> +gallium_check_st "radeon/drm" "radeonsi/dri" "" "" >> "vdpau/radeonsi" "radeonsi/omx" >> DRICOMMON_NEED_LIBDRM=yes >> ;; >> xnouveau) >> @@ -2229,7 +2229,6 @@ AC_CONFIG_FILES([Makefile >> src/gallium/targets/r600/xvmc/Makefile >> src/gallium/targets/radeonsi/dri/Makefile >> src/gallium/targets/radeonsi/omx/Makefile >> -src/gallium/targets/radeonsi/vdpau/Makefile >> src/gallium/targets/vdpau/Makefile >> src/gallium/targets/xa/Makefile >> src/gallium/targets/xa/xatracker.pc >> diff --git a/install-gallium-links.mk b/install-gallium-links.mk >> index 757b288..f45f1b4 100644 >> --- a/install-gallium-links.mk >> +++ b/install-gallium-links.mk >> @@ -5,7 +5,7 @@ if BUILD_SHARED >> if HAVE_COMPAT_SYMLINKS >> all-local : .libs/install-gallium-links >> -.libs/install-gallium-links : $(dri_LTLIBRARIES) $(vdpau_LTLIBRARIES) >> $(egl_LTLIBRARIES) $(lib_LTLIBRARIES) >> +.libs/install-gallium-links : $(dri_LTLIBRARIES) $(egl_LTLIBRARIES) >> $(lib_LTLIBRARIES) >> $(AM_V_GEN)$(MKDIR_P) $(top_builddir)/$(LIB_DIR);\ >> link_dir=$(top_builddir)/$(LIB_DIR)/gallium;\ >> if test x$(egl_LTLIBRARIES) != x; then\ >> @@ -13,7 +13,6 @@ all-local : .libs/install-gallium-links >> fi;\ >> $(MKDIR_P) $$link_dir;\ >> file_list=$(dri_LTLIBRARIES:%.la=.libs/%.so);\ >> -file_list+=$(vdpau_LTLIBRARIES:%.la=.libs/%.$(LIB_EXT)*); \ >> file_list+=$(egl_LTLIBRARIES:%.la=.libs/%.$(LIB_EXT)*);\ >> file_list+=$(lib_LTLIBRARIES:%.la=.libs/%.$(LIB_EXT)*);\ >> for f in $$file_list; do \ >> diff --git a/src/gallium/Automake.inc b/src/gallium/Automake.inc >> index f216fc9..21a4a80 100644 >> --- a/src/gallium/Automake.inc >> +++ b/src/gallium/Automake.inc >> @@ -59,14 +59,6 @@ GALLIUM_DRI_LINKER_FLAGS = \ >> -avoid-version \ >> $(GC_SECTIONS) >> -GALLIUM_VDPAU_LINKER_FLAGS = \ >> --shared \ >> --module \ >> --no-undefined \ >> --version-number $(VDPAU_MAJOR):$(VDPAU_MINOR) \ >> -$(GC_SECTIONS) \ >> -$(LD_NO_UNDEFINED) >> - >> GALLIUM_XVMC_LINKER_FLAGS = \ >> -shared \ >> -module \ >> @@ -87,9 +79,6 @@ if HAVE_LD_VERSION_SCRIPT >> GALLIUM_DRI_LINKER_FLAGS += \ >> -Wl,--version-script=$(top_srcdir)/src/gallium/targets/dri.sym >> -GALLIUM_VDPAU_LINKER_FL
Re: [Mesa-dev] [PATCH] gallivm: fix SCALED -> NORM conversions
Looks good to me AFAICT. Thanks for the update. Jose From: srol...@vmware.com Sent: 18 June 2014 00:36 To: Jose Fonseca; mesa-dev@lists.freedesktop.org Cc: Roland Scheidegger Subject: [PATCH] gallivm: fix SCALED -> NORM conversions From: Roland Scheidegger Such conversions (which are most likely rather pointless in practice) were resulting in shifts with negative shift counts and shifts with counts the same as the bit width. This was always undefined in llvm, the code generated was rather horrendous but happened to work. So make sure such shifts are filtered out and replaced with something that works (the generated code is still just as horrendous as before). This fixes lp_test_format, https://bugs.freedesktop.org/show_bug.cgi?id=73846. v2: prettify by using build context shift helpers. --- src/gallium/auxiliary/gallivm/lp_bld_conv.c | 39 + 1 file changed, 23 insertions(+), 16 deletions(-) diff --git a/src/gallium/auxiliary/gallivm/lp_bld_conv.c b/src/gallium/auxiliary/gallivm/lp_bld_conv.c index d3bf621..1424447 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_conv.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_conv.c @@ -792,29 +792,23 @@ lp_build_conv(struct gallivm_state *gallivm, unsigned dst_shift = lp_const_shift(dst_type); unsigned src_offset = lp_const_offset(src_type); unsigned dst_offset = lp_const_offset(dst_type); + struct lp_build_context bld; + lp_build_context_init(&bld, gallivm, tmp_type); /* Compensate for different offsets */ - if (dst_offset > src_offset && src_type.width > dst_type.width) { + /* sscaled -> unorm and similar would cause negative shift count, skip */ + if (dst_offset > src_offset && src_type.width > dst_type.width && src_shift > 0) { for (i = 0; i < num_tmps; ++i) { LLVMValueRef shifted; -LLVMValueRef shift = lp_build_const_int_vec(gallivm, tmp_type, src_shift - 1); -if(src_type.sign) - shifted = LLVMBuildAShr(builder, tmp[i], shift, ""); -else - shifted = LLVMBuildLShr(builder, tmp[i], shift, ""); +shifted = lp_build_shr_imm(&bld, tmp[i], src_shift - 1); tmp[i] = LLVMBuildSub(builder, tmp[i], shifted, ""); } } if(src_shift > dst_shift) { - LLVMValueRef shift = lp_build_const_int_vec(gallivm, tmp_type, - src_shift - dst_shift); for(i = 0; i < num_tmps; ++i) -if(src_type.sign) - tmp[i] = LLVMBuildAShr(builder, tmp[i], shift, ""); -else - tmp[i] = LLVMBuildLShr(builder, tmp[i], shift, ""); +tmp[i] = lp_build_shr_imm(&bld, tmp[i], src_shift - dst_shift); } } @@ -900,14 +894,27 @@ lp_build_conv(struct gallivm_state *gallivm, unsigned dst_shift = lp_const_shift(dst_type); unsigned src_offset = lp_const_offset(src_type); unsigned dst_offset = lp_const_offset(dst_type); + struct lp_build_context bld; + lp_build_context_init(&bld, gallivm, tmp_type); if (src_shift < dst_shift) { LLVMValueRef pre_shift[LP_MAX_VECTOR_LENGTH]; - LLVMValueRef shift = lp_build_const_int_vec(gallivm, tmp_type, dst_shift - src_shift); - for (i = 0; i < num_tmps; ++i) { - pre_shift[i] = tmp[i]; - tmp[i] = LLVMBuildShl(builder, tmp[i], shift, ""); + if (dst_shift - src_shift < dst_type.width) { + for (i = 0; i < num_tmps; ++i) { +pre_shift[i] = tmp[i]; +tmp[i] = lp_build_shl_imm(&bld, tmp[i], dst_shift - src_shift); + } + } + else { + /* + * This happens for things like sscaled -> unorm conversions. Shift + * counts equal to bit width cause undefined results, so hack around it. + */ + for (i = 0; i < num_tmps; ++i) { +pre_shift[i] = tmp[i]; +tmp[i] = lp_build_zero(gallivm, dst_type); + } } /* Compensate for different offsets */ -- 1.9.1 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 03/11] r600g/compute: Add statuses to the compute_memory_items
These statuses will help track whether the items are mapped or if they should be promoted to or demoted from the pool v2: Use the new is_item_in_pool util function, as suggested by Tom Stellard --- src/gallium/drivers/r600/compute_memory_pool.h | 7 ++- src/gallium/drivers/r600/evergreen_compute.c | 12 2 files changed, 18 insertions(+), 1 deletion(-) diff --git a/src/gallium/drivers/r600/compute_memory_pool.h b/src/gallium/drivers/r600/compute_memory_pool.h index d8201c4..cd93a19 100644 --- a/src/gallium/drivers/r600/compute_memory_pool.h +++ b/src/gallium/drivers/r600/compute_memory_pool.h @@ -27,13 +27,18 @@ #include +#define ITEM_MAPPED_FOR_READING (1<<0) +#define ITEM_MAPPED_FOR_WRITING (1<<1) +#define ITEM_FOR_PROMOTING (1<<2) +#define ITEM_FOR_DEMOTING (1<<3) + struct compute_memory_pool; struct compute_memory_item { int64_t id; ///ID of the memory chunk - int untouched; ///True if the memory contains only junk, no need to save it for defrag + uint32_t status; ///Will track the status of the item int64_t start_in_dw; ///Start pointer in dwords relative in the pool bo int64_t size_in_dw; ///Size of the chunk in dwords diff --git a/src/gallium/drivers/r600/evergreen_compute.c b/src/gallium/drivers/r600/evergreen_compute.c index c152e54..8657071 100644 --- a/src/gallium/drivers/r600/evergreen_compute.c +++ b/src/gallium/drivers/r600/evergreen_compute.c @@ -659,6 +659,15 @@ static void evergreen_set_global_binding( return; } + /* We mark these items for promotion to the pool if they +* aren't already there */ + for (int i = 0; i < n; i++) { + struct compute_memory_item *item = buffers[i]->chunk; + + if (!is_item_in_pool(item)) + buffers[i]->chunk->status |= ITEM_FOR_PROMOTING; + } + compute_memory_finalize_pending(pool, ctx_); for (int i = 0; i < n; i++) @@ -969,6 +978,9 @@ void *r600_compute_global_transfer_map( offset += (buffer->chunk->start_in_dw * 4); } + if (usage & PIPE_TRANSFER_READ) + buffer->chunk->status |= ITEM_MAPPED_FOR_READING; + COMPUTE_DBG(rctx->screen, "* r600_compute_global_transfer_map()\n" "level = %u, usage = %u, box(x = %u, y = %u, z = %u " "width = %u, height = %u, depth = %u)\n", level, usage, -- 2.0.0 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 01/11] r600g/compute: Add an intermediate resource for OpenCL buffers
This patch changes completely the way buffers are added to the compute_memory_pool. Before this, whenever we were going to map a buffer or write to or read from it, it would get placed into the pool. Now, every unallocated buffer has its own r600_resource until it is allocated in the pool. NOTE: This patch also increase the GPU memory usage at the moment of putting every buffer in it's place. More or less, the memory usage is ~2x(sum of every buffer size) v2: Cleanup v3: Use temporary variables to avoid so many castings in functions, as suggested by Tom Stellard --- src/gallium/drivers/r600/compute_memory_pool.c | 27 +- src/gallium/drivers/r600/compute_memory_pool.h | 2 ++ src/gallium/drivers/r600/evergreen_compute.c | 18 - 3 files changed, 41 insertions(+), 6 deletions(-) diff --git a/src/gallium/drivers/r600/compute_memory_pool.c b/src/gallium/drivers/r600/compute_memory_pool.c index ec8c470..0b126a8 100644 --- a/src/gallium/drivers/r600/compute_memory_pool.c +++ b/src/gallium/drivers/r600/compute_memory_pool.c @@ -71,7 +71,6 @@ static void compute_memory_pool_init(struct compute_memory_pool * pool, if (pool->shadow == NULL) return; - pool->next_id = 1; pool->size_in_dw = initial_size_in_dw; pool->bo = (struct r600_resource*)r600_compute_buffer_alloc_vram(pool->screen, pool->size_in_dw * 4); @@ -314,6 +313,14 @@ int compute_memory_finalize_pending(struct compute_memory_pool* pool, for (item = pending_list; item; item = next) { next = item->next; + struct pipe_screen *screen = (struct pipe_screen *)pool->screen; + struct r600_context *rctx = (struct r600_context *)pipe; + struct pipe_resource *dst = (struct pipe_resource *)pool->bo; + struct pipe_resource *src = (struct pipe_resource *)item->real_buffer; + struct pipe_box box; + + u_box_1d(0, item->size_in_dw * 4, &box); + /* Search for free space in the pool for this item. */ while ((start_in_dw=compute_memory_prealloc_chunk(pool, item->size_in_dw)) == -1) { @@ -365,6 +372,14 @@ int compute_memory_finalize_pending(struct compute_memory_pool* pool, pool->item_list = item; } + rctx->b.b.resource_copy_region(pipe, + dst, 0, item->start_in_dw * 4, 0 ,0, + src, 0, &box); + + pool->screen->b.b.resource_destroy( + screen, src); + item->real_buffer = NULL; + allocated += item->size_in_dw; } @@ -375,6 +390,8 @@ int compute_memory_finalize_pending(struct compute_memory_pool* pool, void compute_memory_free(struct compute_memory_pool* pool, int64_t id) { struct compute_memory_item *item, *next; + struct pipe_screen *screen = (struct pipe_screen *)pool->screen; + struct pipe_resource *res; COMPUTE_DBG(pool->screen, "* compute_memory_free() id + %ld \n", id); @@ -393,6 +410,12 @@ void compute_memory_free(struct compute_memory_pool* pool, int64_t id) item->next->prev = item->prev; } + if (item->real_buffer) { + res = (struct pipe_resource *)item->real_buffer; + pool->screen->b.b.resource_destroy( + screen, res); + } + free(item); return; @@ -426,6 +449,8 @@ struct compute_memory_item* compute_memory_alloc( new_item->start_in_dw = -1; /* mark pending */ new_item->id = pool->next_id++; new_item->pool = pool; + new_item->real_buffer = (struct r600_resource*)r600_compute_buffer_alloc_vram( + pool->screen, size_in_dw * 4); if (pool->item_list) { for (last_item = pool->item_list; last_item->next; diff --git a/src/gallium/drivers/r600/compute_memory_pool.h b/src/gallium/drivers/r600/compute_memory_pool.h index c711c59..e94159c 100644 --- a/src/gallium/drivers/r600/compute_memory_pool.h +++ b/src/gallium/drivers/r600/compute_memory_pool.h @@ -38,6 +38,8 @@ struct compute_memory_item int64_t start_in_dw; ///Start pointer in dwords relative in the pool bo int64_t size_in_dw; ///Size of the chunk in dwords + struct r600_resource *real_buffer; + struct compute_memory_pool* pool; struct compute_memory_item* prev; diff --git a/src/gallium/drivers/r600/evergreen_compute.c b/src/gallium/drivers/r600/evergreen_compute.c index a2abf15..c152e54 100644 --- a/src/gallium/drivers/r600/evergreen_compute.c +++ b/sr
[Mesa-dev] [PATCH 11/11] clover: Use PIPE_TRANSFER_MAP_DIRECTLY when writing/reading buffers
Note: This is just a proof of concept. --- src/gallium/state_trackers/clover/api/transfer.cpp | 4 ++-- src/gallium/state_trackers/clover/core/object.hpp | 4 src/gallium/state_trackers/clover/core/resource.cpp | 2 ++ 3 files changed, 8 insertions(+), 2 deletions(-) diff --git a/src/gallium/state_trackers/clover/api/transfer.cpp b/src/gallium/state_trackers/clover/api/transfer.cpp index 404ceb0..f34ae8b 100644 --- a/src/gallium/state_trackers/clover/api/transfer.cpp +++ b/src/gallium/state_trackers/clover/api/transfer.cpp @@ -174,8 +174,8 @@ namespace { static mapping get(command_queue &q, T obj, cl_map_flags flags, size_t offset, size_t size) { - return { q, obj->resource(q), flags, true, - {{ offset }}, {{ size, 1, 1 }} }; + return { q, obj->resource(q), flags | CLOVER_TRANSFER_MAP_DIRECTLY, + true, {{ offset }}, {{ size, 1, 1 }} }; } }; diff --git a/src/gallium/state_trackers/clover/core/object.hpp b/src/gallium/state_trackers/clover/core/object.hpp index 697565c..7d5adf9 100644 --- a/src/gallium/state_trackers/clover/core/object.hpp +++ b/src/gallium/state_trackers/clover/core/object.hpp @@ -33,6 +33,10 @@ #include "core/property.hpp" #include "api/dispatch.hpp" +#ifndef CLOVER_TRANSFER_MAP_DIRECTLY +#define CLOVER_TRANSFER_MAP_DIRECTLY (1<<8) +#endif + /// /// Main namespace of the CL state tracker. /// diff --git a/src/gallium/state_trackers/clover/core/resource.cpp b/src/gallium/state_trackers/clover/core/resource.cpp index 7b8a40a..c8e97db 100644 --- a/src/gallium/state_trackers/clover/core/resource.cpp +++ b/src/gallium/state_trackers/clover/core/resource.cpp @@ -174,6 +174,8 @@ mapping::mapping(command_queue &q, resource &r, pctx(q.pipe) { unsigned usage = ((flags & CL_MAP_WRITE ? PIPE_TRANSFER_WRITE : 0 ) | (flags & CL_MAP_READ ? PIPE_TRANSFER_READ : 0 ) | + (flags & CLOVER_TRANSFER_MAP_DIRECTLY ? + PIPE_TRANSFER_MAP_DIRECTLY : 0 ) | (!blocking ? PIPE_TRANSFER_UNSYNCHRONIZED : 0)); p = pctx->transfer_map(pctx, r.pipe, 0, usage, -- 2.0.0 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 07/11] r600g/compute: Implement compute_memory_demote_item
This function will be used when we want to map an item that it's already in the pool. v2: Use temporary variables to avoid so many castings in functions, as suggested by Tom Stellard --- src/gallium/drivers/r600/compute_memory_pool.c | 51 ++ src/gallium/drivers/r600/compute_memory_pool.h | 3 ++ 2 files changed, 54 insertions(+) diff --git a/src/gallium/drivers/r600/compute_memory_pool.c b/src/gallium/drivers/r600/compute_memory_pool.c index f232f9f..6409b34 100644 --- a/src/gallium/drivers/r600/compute_memory_pool.c +++ b/src/gallium/drivers/r600/compute_memory_pool.c @@ -387,6 +387,57 @@ int compute_memory_promote_item(struct compute_memory_pool *pool, return 0; } +void compute_memory_demote_item(struct compute_memory_pool *pool, + struct compute_memory_item *item, struct pipe_context *pipe) +{ + struct r600_context *rctx = (struct r600_context *)pipe; + struct pipe_resource *src = (struct pipe_resource *)pool->bo; + struct pipe_resource *dst; + struct pipe_box box; + + /* First, we remove the item from the item_list */ + if (item->prev == NULL) + pool->item_list = item->next; + else + item->prev->next = item->next; + + if (item->next != NULL) + item->next->prev = item->prev; + + + /* Now we add it to the beginning of the unallocated list +* NOTE: we could also add it to the end, but this is easier */ + item->next = NULL; + item->prev = NULL; + if (pool->unallocated_list) { + item->next = pool->unallocated_list; + item->next->prev = item; + pool->unallocated_list = item; + } + else + pool->unallocated_list = item; + + /* We check if the intermediate buffer exists, and if it +* doesn't, we create it again */ + if (item->real_buffer == NULL) { + item->real_buffer = (struct r600_resource*)r600_compute_buffer_alloc_vram( + pool->screen, item->size_in_dw * 4); + } + + dst = (struct pipe_resource *)item->real_buffer; + + /* We transfer the memory from the item in the pool to the +* temporary buffer */ + u_box_1d(item->start_in_dw * 4, item->size_in_dw * 4, &box); + + rctx->b.b.resource_copy_region(pipe, + dst, 0, 0, 0, 0, + src, 0, &box); + + /* Remember to mark the buffer as 'pending' by setting start_in_dw to -1 */ + item->start_in_dw = -1; +} + void compute_memory_free(struct compute_memory_pool* pool, int64_t id) { struct compute_memory_item *item, *next; diff --git a/src/gallium/drivers/r600/compute_memory_pool.h b/src/gallium/drivers/r600/compute_memory_pool.h index faadeea..0bb695c 100644 --- a/src/gallium/drivers/r600/compute_memory_pool.h +++ b/src/gallium/drivers/r600/compute_memory_pool.h @@ -90,6 +90,9 @@ int compute_memory_promote_item(struct compute_memory_pool *pool, struct compute_memory_item *item, struct pipe_context *pipe, int64_t allocated); +void compute_memory_demote_item(struct compute_memory_pool *pool, + struct compute_memory_item *item, struct pipe_context *pipe); + void compute_memory_free(struct compute_memory_pool* pool, int64_t id); struct compute_memory_item* compute_memory_alloc(struct compute_memory_pool* pool, int64_t size_in_dw); ///Creates pending allocations -- 2.0.0 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 05/11] r600g/compute: Only move to the pool the buffers marked for promoting
--- src/gallium/drivers/r600/compute_memory_pool.c | 146 +++-- src/gallium/drivers/r600/compute_memory_pool.h | 5 + 2 files changed, 91 insertions(+), 60 deletions(-) diff --git a/src/gallium/drivers/r600/compute_memory_pool.c b/src/gallium/drivers/r600/compute_memory_pool.c index 96769e5..5b1ee14 100644 --- a/src/gallium/drivers/r600/compute_memory_pool.c +++ b/src/gallium/drivers/r600/compute_memory_pool.c @@ -229,8 +229,6 @@ int compute_memory_finalize_pending(struct compute_memory_pool* pool, int64_t allocated = 0; int64_t unallocated = 0; - int64_t start_in_dw = 0; - int err = 0; COMPUTE_DBG(pool->screen, "* compute_memory_finalize_pending()\n"); @@ -247,10 +245,12 @@ int compute_memory_finalize_pending(struct compute_memory_pool* pool, allocated += align(item->size_in_dw, ITEM_ALIGNMENT); } - /* Calculate the total unallocated size */ + /* Calculate the total unallocated size of the items that +* will be promoted to the pool */ for (item = pool->unallocated_list; item; item = next) { next = item->next; - unallocated += align(item->size_in_dw, ITEM_ALIGNMENT); + if (item->status & ITEM_FOR_PROMOTING) + unallocated += align(item->size_in_dw, ITEM_ALIGNMENT); } /* If we require more space than the size of the pool, then grow the @@ -276,87 +276,113 @@ int compute_memory_finalize_pending(struct compute_memory_pool* pool, return -1; } - /* Loop through all the unallocated items, allocate space for them -* and add them to the item_list. */ + /* Loop through all the unallocated items, check if they are marked +* for promoting, allocate space for them and add them to the item_list. */ for (item = pool->unallocated_list; item; item = next) { next = item->next; - struct pipe_screen *screen = (struct pipe_screen *)pool->screen; - struct r600_context *rctx = (struct r600_context *)pipe; - struct pipe_resource *dst = (struct pipe_resource *)pool->bo; - struct pipe_resource *src = (struct pipe_resource *)item->real_buffer; - struct pipe_box box; + if (item->status & ITEM_FOR_PROMOTING) { + err = compute_memory_promote_item(pool, item, pipe, allocated); + item->status ^= ITEM_FOR_PROMOTING; - u_box_1d(0, item->size_in_dw * 4, &box); + allocated += align(item->size_in_dw, ITEM_ALIGNMENT); - /* Search for free space in the pool for this item. */ - while ((start_in_dw=compute_memory_prealloc_chunk(pool, - item->size_in_dw)) == -1) { - int64_t need = item->size_in_dw+2048 - - (pool->size_in_dw - allocated); + if (err == -1) + return -1; + } + } - if (need < 0) { - need = pool->size_in_dw / 10; - } + return 0; +} + +int compute_memory_promote_item(struct compute_memory_pool *pool, + struct compute_memory_item *item, struct pipe_context *pipe, + int64_t allocated) +{ + struct pipe_screen *screen = (struct pipe_screen *)pool->screen; + struct r600_context *rctx = (struct r600_context *)pipe; + struct pipe_resource *dst = (struct pipe_resource *)pool->bo; + struct pipe_resource *src = (struct pipe_resource *)item->real_buffer; + struct pipe_box box; - need = align(need, ITEM_ALIGNMENT); + int64_t start_in_dw; + int err = 0; - err = compute_memory_grow_pool(pool, - pipe, - pool->size_in_dw + need); - if (err == -1) - return -1; + /* Search for free space in the pool for this item. */ + while ((start_in_dw=compute_memory_prealloc_chunk(pool, + item->size_in_dw)) == -1) { + int64_t need = item->size_in_dw + 2048 - + (pool->size_in_dw - allocated); + + if (need < 0) { + need = pool->size_in_dw / 10; } - COMPUTE_DBG(pool->screen, " + Found space for Item %p id = %u " + + need = align(need, ITEM_ALIGNMENT); + + err = compute_memory_grow_pool(pool, + pipe, + pool->size_in_dw + need); + + if (err == -1) + return -1; + } + COMPUTE_DBG(pool->screen, " + F
[Mesa-dev] [PATCH 10/11] r600g/compute: Map directly the pool in some cases
All the *Enqueue* functions that read/write buffers (except clEnqueueCopyBuffer) would map the associated resource, making it to be demoted if it was in the pool. But we possitively know that this transfer will end before any kernel is launched, so there's no need to demote it. --- src/gallium/drivers/r600/evergreen_compute.c | 20 +++- 1 file changed, 15 insertions(+), 5 deletions(-) diff --git a/src/gallium/drivers/r600/evergreen_compute.c b/src/gallium/drivers/r600/evergreen_compute.c index c0dd0f3..2d6b9d3 100644 --- a/src/gallium/drivers/r600/evergreen_compute.c +++ b/src/gallium/drivers/r600/evergreen_compute.c @@ -967,18 +967,28 @@ void *r600_compute_global_transfer_map( struct r600_resource_global* buffer = (struct r600_resource_global*)resource; - struct pipe_resource *dst; + struct pipe_resource *dst = + (struct pipe_resource *) buffer->chunk->real_buffer; unsigned offset = box->x; + /* If the item is already in the pool, and we are going +* to read/write it, map it directly without demoting it */ if (is_item_in_pool(buffer->chunk)) { - compute_memory_demote_item(pool, buffer->chunk, ctx_); + if (usage & PIPE_TRANSFER_MAP_DIRECTLY) { + dst = (struct pipe_resource *) buffer->chunk->pool->bo; + offset += (buffer->chunk->start_in_dw * 4); + } + else { + compute_memory_demote_item(pool, buffer->chunk, ctx_); + dst = (struct pipe_resource *) buffer->chunk->real_buffer; + } } - dst = (struct pipe_resource*)buffer->chunk->real_buffer; - - if (usage & PIPE_TRANSFER_READ) + if ((usage & PIPE_TRANSFER_READ) && !(usage & PIPE_TRANSFER_MAP_DIRECTLY)) buffer->chunk->status |= ITEM_MAPPED_FOR_READING; + usage &= ~PIPE_TRANSFER_MAP_DIRECTLY; + COMPUTE_DBG(rctx->screen, "* r600_compute_global_transfer_map()\n" "level = %u, usage = %u, box(x = %u, y = %u, z = %u " "width = %u, height = %u, depth = %u)\n", level, usage, -- 2.0.0 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 04/11] r600g/compute: divide the item list in two
Now we will have a list with the items that are in the pool (item_list) and the items that are outside it (unallocated_list) --- src/gallium/drivers/r600/compute_memory_pool.c | 99 +- src/gallium/drivers/r600/compute_memory_pool.h | 1 + 2 files changed, 49 insertions(+), 51 deletions(-) diff --git a/src/gallium/drivers/r600/compute_memory_pool.c b/src/gallium/drivers/r600/compute_memory_pool.c index 0b126a8..96769e5 100644 --- a/src/gallium/drivers/r600/compute_memory_pool.c +++ b/src/gallium/drivers/r600/compute_memory_pool.c @@ -108,13 +108,11 @@ int64_t compute_memory_prealloc_chunk( size_in_dw); for (item = pool->item_list; item; item = item->next) { - if (item->start_in_dw > -1) { - if (last_end + size_in_dw <= item->start_in_dw) { - return last_end; - } - - last_end = item->start_in_dw + align(item->size_in_dw, ITEM_ALIGNMENT); + if (last_end + size_in_dw <= item->start_in_dw) { + return last_end; } + + last_end = item->start_in_dw + align(item->size_in_dw, ITEM_ALIGNMENT); } if (pool->size_in_dw - last_end < size_in_dw) { @@ -226,7 +224,6 @@ void compute_memory_shadow(struct compute_memory_pool* pool, int compute_memory_finalize_pending(struct compute_memory_pool* pool, struct pipe_context * pipe) { - struct compute_memory_item *pending_list = NULL, *end_p = NULL; struct compute_memory_item *item, *next; int64_t allocated = 0; @@ -244,45 +241,16 @@ int compute_memory_finalize_pending(struct compute_memory_pool* pool, item->size_in_dw, item->size_in_dw * 4); } - /* Search through the list of memory items in the pool */ + /* Calculate the total allocated size */ for (item = pool->item_list; item; item = next) { next = item->next; + allocated += align(item->size_in_dw, ITEM_ALIGNMENT); + } - /* Check if the item is pending. */ - if (item->start_in_dw == -1) { - /* It is pending, so add it to the pending_list... */ - if (end_p) { - end_p->next = item; - } - else { - pending_list = item; - } - - /* ... and then remove it from the item list. */ - if (item->prev) { - item->prev->next = next; - } - else { - pool->item_list = next; - } - - if (next) { - next->prev = item->prev; - } - - /* This sequence makes the item be at the end of the list */ - item->prev = end_p; - item->next = NULL; - end_p = item; - - /* Update the amount of space we will need to allocate. */ - unallocated += item->size_in_dw+1024; - } - else { - /* The item is not pending, so update the amount of space -* that has already been allocated. */ - allocated += item->size_in_dw; - } + /* Calculate the total unallocated size */ + for (item = pool->unallocated_list; item; item = next) { + next = item->next; + unallocated += align(item->size_in_dw, ITEM_ALIGNMENT); } /* If we require more space than the size of the pool, then grow the @@ -302,15 +270,15 @@ int compute_memory_finalize_pending(struct compute_memory_pool* pool, * In this case, there are 300 units of free space in the pool, but * they aren't contiguous, so it will be impossible to allocate Item D. */ - if (pool->size_in_dw < allocated+unallocated) { - err = compute_memory_grow_pool(pool, pipe, allocated+unallocated); + if (pool->size_in_dw < allocated + unallocated) { + err = compute_memory_grow_pool(pool, pipe, allocated + unallocated); if (err == -1) return -1; } - /* Loop through all the pending items, allocate space for them and -* add them back to the item_list. */ - for (item = pending_list; item; item = next) { + /* Loop through all the unallocated items, allocate space for them +* and add them to the item_list. */ + for (item = pool->unallocated_list; item; item = next) { next = item->next; struct pipe_screen *screen = (struct pipe_screen *)pool->screen; @@ -383,6 +351,8 @@ int co
[Mesa-dev] [PATCH 06/11] r600g/compute: Avoid problems when promoting items mapped for reading
Acording to the OpenCL spec, it is possible to have a buffer mapped for reading and at read from it using commands or buffers. With this we can keep the mapping (that exists against the temporary item) and read with a kernel (from the item we have just added to the pool) without problems. --- src/gallium/drivers/r600/compute_memory_pool.c | 12 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/src/gallium/drivers/r600/compute_memory_pool.c b/src/gallium/drivers/r600/compute_memory_pool.c index 5b1ee14..f232f9f 100644 --- a/src/gallium/drivers/r600/compute_memory_pool.c +++ b/src/gallium/drivers/r600/compute_memory_pool.c @@ -375,10 +375,14 @@ int compute_memory_promote_item(struct compute_memory_pool *pool, dst, 0, item->start_in_dw * 4, 0 ,0, src, 0, &box); - pool->screen->b.b.resource_destroy( - screen, src); - - item->real_buffer = NULL; + /* We check if the item is mapped for reading. +* In this case, we need to keep the temporary buffer 'alive' +* because it is possible to keep a map active for reading +* while a kernel (that reads from it) executes */ + if (!(item->status & ITEM_MAPPED_FOR_READING)) { + pool->screen->b.b.resource_destroy(screen, src); + item->real_buffer = NULL; + } return 0; } -- 2.0.0 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 08/11] r600g/compute: Map only against intermediate buffers
With this we can assure that mapped buffers will never change its position when relocating the pool. This patch should finally solve the mapping bug. v2: Use the new is_item_in_pool util function, as suggested by Tom Stellard --- src/gallium/drivers/r600/evergreen_compute.c | 10 -- 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/src/gallium/drivers/r600/evergreen_compute.c b/src/gallium/drivers/r600/evergreen_compute.c index 8657071..c0dd0f3 100644 --- a/src/gallium/drivers/r600/evergreen_compute.c +++ b/src/gallium/drivers/r600/evergreen_compute.c @@ -970,14 +970,12 @@ void *r600_compute_global_transfer_map( struct pipe_resource *dst; unsigned offset = box->x; - if (buffer->chunk->real_buffer) { - dst = (struct pipe_resource*)buffer->chunk->real_buffer; - } - else { - dst = (struct pipe_resource*)buffer->chunk->pool->bo; - offset += (buffer->chunk->start_in_dw * 4); + if (is_item_in_pool(buffer->chunk)) { + compute_memory_demote_item(pool, buffer->chunk, ctx_); } + dst = (struct pipe_resource*)buffer->chunk->real_buffer; + if (usage & PIPE_TRANSFER_READ) buffer->chunk->status |= ITEM_MAPPED_FOR_READING; -- 2.0.0 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 00/11] [RFC v2] Solve the mapping bug
Hi, This is my second attempt to fix the mapping bug adding all the suggestions that Tom Stellard sent, and, so far, it seems that it is resolved. This series changes completely how OpenCL buffers are handled by the r600g driver. Before this, we would add them directly to a pool, and this pool would grow whenever we needed more space. But this process implied destroying the pool and creating a new one. There could be cases where a buffer would be mapped and the pool would grow, leaving one side of the mapping pointed to where the item was. This is the 'mapping bug' Now, Items will have an intermediate resource, where all mappings can be done, and when a buffer is going to be used with a kernel it is promoted to the pool. In the case where a promoted item is going to be mapped, it is previously demoted, so even if the pool changes its location due to growing, the map remains valid. In the case of a buffer mapped for reading, and used by a kernel to read from it, we will duplicate this buffer, having the intermediate buffer, where the user has its map, and an item in the pool, which is the one that the kernel is going to use. As a summary for v2: Patches 1-8: These are the main part of the series, and solve the mapping bug. Patches 1 and 7 now use less explicit castings Patch 2 is new and introduces the 'is_item_in_pool' function, which is used in patches 3 and 8 Patch 9: Is a complete rewrite of v1 patch 8 using gallium utils for double lists Patches 10 and 11: These are just a proof of concept for avoiding transfers GPU <-> GPU when using all CL Read/Write functions. They are v1 patch 9 splited in two to separate r600g changes from clover changes. Now, in clover's side it introduces and uses 'CLOVER_TRANSFER_MAP_DIRECTLY' so it doesen't collide with any other OpenCL flag. Please review and Thanks :) Bruno Jiménez (11): r600g/compute: Add an intermediate resource for OpenCL buffers r600g/compute: Add an util function to know if an item is in the pool r600g/compute: Add statuses to the compute_memory_items r600g/compute: divide the item list in two r600g/compute: Only move to the pool the buffers marked for promoting r600g/compute: Avoid problems when promoting items mapped for reading r600g/compute: Implement compute_memory_demote_item r600g/compute: Map only against intermediate buffers r600g/compute: Use gallium util functions for double lists r600g/compute: Map directly the pool in some cases clover: Use PIPE_TRANSFER_MAP_DIRECTLY when writing/reading buffers src/gallium/drivers/r600/compute_memory_pool.c | 294 - src/gallium/drivers/r600/compute_memory_pool.h | 31 ++- src/gallium/drivers/r600/evergreen_compute.c | 38 ++- src/gallium/state_trackers/clover/api/transfer.cpp | 4 +- src/gallium/state_trackers/clover/core/object.hpp | 4 + .../state_trackers/clover/core/resource.cpp| 2 + 6 files changed, 233 insertions(+), 140 deletions(-) -- 2.0.0 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 02/11] r600g/compute: Add an util function to know if an item is in the pool
Every item that has been placed in the pool must have start_in_dw different from -1. --- src/gallium/drivers/r600/compute_memory_pool.h | 5 + 1 file changed, 5 insertions(+) diff --git a/src/gallium/drivers/r600/compute_memory_pool.h b/src/gallium/drivers/r600/compute_memory_pool.h index e94159c..d8201c4 100644 --- a/src/gallium/drivers/r600/compute_memory_pool.h +++ b/src/gallium/drivers/r600/compute_memory_pool.h @@ -59,6 +59,11 @@ struct compute_memory_pool }; +static inline int is_item_in_pool(struct compute_memory_item *item) +{ + return item->start_in_dw != -1; +} + struct compute_memory_pool* compute_memory_pool_new(struct r600_screen *rscreen); ///Creates a new pool void compute_memory_pool_delete(struct compute_memory_pool* pool); ///Frees all stuff in the pool and the pool struct itself too -- 2.0.0 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 09/11] r600g/compute: Use gallium util functions for double lists
--- src/gallium/drivers/r600/compute_memory_pool.c | 147 +++-- src/gallium/drivers/r600/compute_memory_pool.h | 10 +- 2 files changed, 46 insertions(+), 111 deletions(-) diff --git a/src/gallium/drivers/r600/compute_memory_pool.c b/src/gallium/drivers/r600/compute_memory_pool.c index 6409b34..518ea65 100644 --- a/src/gallium/drivers/r600/compute_memory_pool.c +++ b/src/gallium/drivers/r600/compute_memory_pool.c @@ -57,6 +57,12 @@ struct compute_memory_pool* compute_memory_pool_new( COMPUTE_DBG(rscreen, "* compute_memory_pool_new()\n"); pool->screen = rscreen; + pool->item_list = (struct list_head *) + CALLOC(sizeof(struct list_head), 1); + pool->unallocated_list = (struct list_head *) + CALLOC(sizeof(struct list_head), 1); + list_inithead(pool->item_list); + list_inithead(pool->unallocated_list); return pool; } @@ -107,7 +113,7 @@ int64_t compute_memory_prealloc_chunk( COMPUTE_DBG(pool->screen, "* compute_memory_prealloc_chunk() size_in_dw = %ld\n", size_in_dw); - for (item = pool->item_list; item; item = item->next) { + LIST_FOR_EACH_ENTRY(item, pool->item_list, link) { if (last_end + size_in_dw <= item->start_in_dw) { return last_end; } @@ -125,31 +131,37 @@ int64_t compute_memory_prealloc_chunk( /** * Search for the chunk where we can link our new chunk after it. */ -struct compute_memory_item* compute_memory_postalloc_chunk( +struct list_head *compute_memory_postalloc_chunk( struct compute_memory_pool* pool, int64_t start_in_dw) { - struct compute_memory_item* item; + struct compute_memory_item *item; + struct compute_memory_item *next; + struct list_head *next_link; COMPUTE_DBG(pool->screen, "* compute_memory_postalloc_chunck() start_in_dw = %ld\n", start_in_dw); /* Check if we can insert it in the front of the list */ - if (pool->item_list && pool->item_list->start_in_dw > start_in_dw) { - return NULL; + item = LIST_ENTRY(struct compute_memory_item, pool->item_list->next, link); + if (LIST_IS_EMPTY(pool->item_list) || item->start_in_dw > start_in_dw) { + return pool->item_list; } - for (item = pool->item_list; item; item = item->next) { - if (item->next) { + LIST_FOR_EACH_ENTRY(item, pool->item_list, link) { + next_link = item->link.next; + + if (next_link != pool->item_list) { + next = container_of(next_link, item, link); if (item->start_in_dw < start_in_dw - && item->next->start_in_dw > start_in_dw) { - return item; + && next->start_in_dw > start_in_dw) { + return &item->link; } } else { /* end of chain */ assert(item->start_in_dw < start_in_dw); - return item; + return &item->link; } } @@ -212,7 +224,6 @@ void compute_memory_shadow(struct compute_memory_pool* pool, chunk.id = 0; chunk.start_in_dw = 0; chunk.size_in_dw = pool->size_in_dw; - chunk.prev = chunk.next = NULL; compute_memory_transfer(pool, pipe, device_to_host, &chunk, pool->shadow, 0, pool->size_in_dw*4); } @@ -233,22 +244,20 @@ int compute_memory_finalize_pending(struct compute_memory_pool* pool, COMPUTE_DBG(pool->screen, "* compute_memory_finalize_pending()\n"); - for (item = pool->item_list; item; item = item->next) { + LIST_FOR_EACH_ENTRY(item, pool->item_list, link) { COMPUTE_DBG(pool->screen, " + list: offset = %i id = %i size = %i " "(%i bytes)\n",item->start_in_dw, item->id, item->size_in_dw, item->size_in_dw * 4); } /* Calculate the total allocated size */ - for (item = pool->item_list; item; item = next) { - next = item->next; + LIST_FOR_EACH_ENTRY(item, pool->item_list, link) { allocated += align(item->size_in_dw, ITEM_ALIGNMENT); } /* Calculate the total unallocated size of the items that * will be promoted to the pool */ - for (item = pool->unallocated_list; item; item = next) { - next = item->next; + LIST_FOR_EACH_ENTRY(item, pool->unallocated_list, link) { if (item->status & ITEM_FOR_PROMOTING) unallocated += align(item->size_in_dw, ITEM_ALIGNMENT); } @@ -278,9 +287,7 @@ int compute_memory_finalize_pending(struct compute_memory_pool* pool,
[Mesa-dev] [Bug 80183] [llvmpipe] triangles with vertices that map to raster positions > viewport width/height are not displayed
https://bugs.freedesktop.org/show_bug.cgi?id=80183 Roland Scheidegger changed: What|Removed |Added Attachment #101296|text/plain |application/zip mime type|| -- You are receiving this mail because: You are the assignee for the bug. ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 4/5] gallium: Add PIPE_SHADER_CAP_DOUBLES
On Wed, Jun 18, 2014 at 09:54:28AM +1000, Dave Airlie wrote: > On 18 June 2014 05:08, Roland Scheidegger wrote: > > This looks ok to me though since tgsi currently doesn't have any double > > opcodes (well the docs have them...) it doesn't really apply to most > > drivers (at least I assume you don't want to add support for it for tgsi). > > I've mostly forward ported the old gallium double code, and have > written most of ARB_gpu_shader_fp64 on top, > > Though the question I did want to ask Tom is if he is just going to > expose hw that has doubles, or does > he plan on emulating doubles. > My intention was that this CAP would be enabled for drivers with either native double support or with double emulation implemented in the driver. I'm not currently planning to implement double emulation, so I would only enable this for hardware with native double support. > For a lot of GLSL4.0 GPUs from AMD fglrx emulates doubles using > massive magic shaders, I'm unsure > if we should have a lowering pass above/below the TGSI line for these > types of situations and what that > would mean for this CAP. > If someone implemented double lowering above the TGSI line, then it should only been done with drivers that report 0 for this CAP. -Tom > Dave. > ___ > mesa-dev mailing list > mesa-dev@lists.freedesktop.org > http://lists.freedesktop.org/mailman/listinfo/mesa-dev ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] Static/shared pipe-drivers (was megadriver/pipe-loader-to-all)
On Wed, Jun 18, 2014 at 8:14 PM, Emil Velikov wrote: > On 18 June 2014 08:21, Chia-I Wu wrote: >> Hi Emil, >> >> On Fri, Jun 13, 2014 at 3:56 AM, Emil Velikov >> wrote: >>> Hi all, >>> >>> These patches add support for building (grouping) the various targets per >>> API, meaning that only one library will be created for e.g. vdpau >>> (libvdpau_gallium) with individual ones (libvdpau_r600) being a hardlink >>> to it. >>> >>> This allows us to have substantial space savings as the API(state-tracker) >>> is available only once. Additionally it adds support for shared >>> pipe-drivers via a _unstable_ interface, which saves us the duplication >>> across X APIs. >>> >>> The former method has been used by the egl-static while the latter by >>> opencl and gbm targets since they were introduced. >>> >>> By default we build with "static pipe-drivers". >>> >>> Some numbers + extra info [1] >>> >>> [Static] >>> dri:(r600|radeonsi|nouveau)_dri.so -> 6.5 MiB >>> vdpau: libvdpau_(r600|radeonsi|nouveau).so -> 3.5 MiB >>> >>> Total: 10MiB >>> >>> [Shared] >>> Libraries: >>> dri:(r600|radeonsi|nouveau)_dri.so -> 3.9 MiB >>> vdpau: libvdpau_(r600|radeonsi|nouveau).so -> 633 KiB >>> gallium-pipe: pipe_(r600|radeonsi|nouveau).so -> 5.3 MiB >>> >>> Total: 9.8MiB >>> >>> [Current] >>> dri:(r600|radeonsi|nouveau)_dri.so -> 5.0+4.5+5.3 = 14.8 >>> MiB >>> vdpau: libvdpau_(r600|radeonsi|nouveau).so -> 1.9+1.2+2.3 = 5.4 >>> MiB >>> >>> Total: 20.2MiB >>> >>> >>> The previous series can be found here [2] >>> Changes since then >>> - Convert targets individually. >>> - OMX targets now work, and the final library is now libomx-mesa.so >>> - Dropped the DRI targets for now >>> - A handfull of typos thinkos and bugs fixed. >>> >>> >>> My plan is to have these pushed in ~4 stages, with two stages per week. >>> This way I will be able to crack on with the remaining bits and have all >>> of it tested well before we branch the next release. >>> >>> Series is availabe at >>> https://github.com/evelikov/Mesa/tree/static-or-shared-pipe-drivers >>> >>> As always comments and suggestions are greatly appreciated. >> Thanks for working on this. This is a tough issue to tackle. I have >> a few questions/comments, which I am fine to see them resolved either >> before or after landing your series. >> >> I see this work as to define an internal API to manage pipe drivers. > Interesting, I do not see this as an attempt to define an API, but to > cleanup all the mayhem that our targets currently are: > * Cleanup the build system - drop symlinks, including the same source > files from different locations. > * Make targets less error prone by using static pipe-drivers by > default. Shared ones we lack versioning and ... are a big can of > worms. > * Minimize all the target.c duplication across each target. Makefiles > are in similar boat. The reason that the state trackers can manage statically linked pipe drivers, or the duplications in target.c can be killed is because of the introduction of an API (inline_drm_helper.h), or if you prefer, helper functions. Either way, a set of functions are defined to help manage statically linked pipe drivers. State trackers tend to do #ifdef GALLIUM_STATIC_TARGETS /* use inline_drm_helper.h */ ... #else /* use pipe_loader.h */ ... #endif IMHO, we should be able to define a single API, or a single set of helper functions, to manage pipe drivers, no matter they are statically linked or dynamically loaded. Note that inline_drm_helper.h is not stateless: dd_create_screen must be called first to initialize a static variable. It may even be possible to extend pipe loader for the statically linked case. > * Allow people to use the unstable pipe-drivers if they are really > short on size and know what they are doing. > >> The lack of such API previously led us to "targets", where each target >> knows how to load a specific driver. With your changes, state >> trackers that need to work with pipe drivers have a way to do so. As >> a result, files such as >> >> dri/target.c, >> xa/target.c, >> xvmc/target.c, >> vdpau/target.c, and >> omx/target.c >> >> become quite dummy and redundant. Do you see a way to get rid of >> "targets" entirely? >> > Indeed if/when an API comes around these targets may become redundant. Well, with inline_drm_helper.h and the like, there is already basically nothing in those target.c. I do not even get why we need those #include's in them :) As things are right now, for most state trackers (dri, gbm, omx, vdpau, xa, and xvmc), there is a matching target, and the only things in the target are rules to build it. I am not suggesting you should update the series to kill the targets. As I said, I believe it takes a lot of time to get things to where they are now, and I am fine to see any of this resolved later. >> In the same view, and noticing that all users of the API have thi
Re: [Mesa-dev] [PATCH 4/5] gallium: Add PIPE_SHADER_CAP_DOUBLES
Am 18.06.2014 01:54, schrieb Dave Airlie: > On 18 June 2014 05:08, Roland Scheidegger wrote: >> This looks ok to me though since tgsi currently doesn't have any double >> opcodes (well the docs have them...) it doesn't really apply to most >> drivers (at least I assume you don't want to add support for it for tgsi). > > I've mostly forward ported the old gallium double code, and have > written most of ARB_gpu_shader_fp64 on top, > > Though the question I did want to ask Tom is if he is just going to > expose hw that has doubles, or does > he plan on emulating doubles. > > For a lot of GLSL4.0 GPUs from AMD fglrx emulates doubles using > massive magic shaders, I'm unsure > if we should have a lowering pass above/below the TGSI line for these > types of situations and what that > would mean for this CAP. Oh that's interesting. I always thought drivers didn't emulate that, and if apps want doubles but the device doesn't provide them it needs to do that itself. For which chips does fglrx do that? If you'd want to emulate this, the other question is if you can do it at the tgsi level, or if this was exploiting some hw specific bits (well of course you could still do it at tgsi level, but if the hw has some bits to make this easier, then this isn't efficient). In any case I guess this could be decided later. Roland ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 00/23] Megadrivers galore
On Tue, Jun 17, 2014 at 07:38:16PM +0100, Emil Velikov wrote: > Hi all, > > As a follow up to the static/shared pipe-drivers series here is the final > series (if anyone is interested I may take a look at egl + opencl) of > refactoring the gallium dri targets into a single library/provider. > Hi Emil, One common problem I run into when using the pipe drivers is if there is an undefined symbol in the pipe_*.so then it will silently fail to load, and with OpenCL for example it won't report any devices which can be confusing for users. I would recommend adding some error handling to util_dlopen, so that it prints an error message when it fails to load a shared object. Other than that, it's hard to review a series like this, but I'll give it an: Acked-by: Tom Stellard Since I like the general approach. -Tom > In a nutshell: > - Convert one target per patch. > - Merge the drm and sw backends of our dri state-tracker. > - Adds __driDriverGetExtensions_$drivername symbol for each driver. > - Megadrivers. > - *** > - Profit. > > Things works like a charm for nouveau and swrast, and testing on other > platforms is greatly appreciated. > > The complete series can be found in the static-or-shared-pipe-drivers-v2 > branch at my github repo. > > I would like to get this reviewed/pushed over the next month, although > that depends on the number of bugs that come up with the previous batch. > > As always comments, suggestions and flame is welcome. > > Cheers, > Emil > > Emil Velikov (23): > targets/dri-swrast: use drm aware dricommon when building more than > swrast > st/dri: Allow separate dri-targets > st/dri/drm: Add a second libdridrm library > targets/dri-nouveau: Convert to static/shared pipe-drivers > targets/(r300|r600|radeonsi)/dri: Convert to static/shared pipe-drivers > targets/dri-freedreno: Convert to static/shared pipe-drivers > targets/dri-i915: Convert to static/shared pipe-drivers > targets/dri-ilo: Convert to static/shared pipe-driver > targets/dri-vmwgfx: Convert to static/shared pipe-drivers > st/dri: Remove the old libdridrm library > targets/dri: Add __driDriverGetExtensions_nouveau symbol > targets/dri: Add __driDriverGetExtensions_(r300|r600|radeonsi) symbols > targets/dri: Add __driDriverGetExtensions_freedreno symbol > targets/dri: Add __driDriverGetExtensions_i915 symbol > targets/dri: Add __driDriverGetExtensions_i965 symbol > targets/dri: Add __driDriverGetExtensions_vmwgfx > targets/dri: update scons build to handle > __driDriverGetExtensions_vmwgfx > targets/dri: cleanup conversion leftovers > st/dri/drm: remove __driDriverExtensions and driDriverAPI > scons: build and use a single dri_common library > targets/dri-swrast: convert to gallium megadrivers :) > st/dri: merge dri/drm and dri/sw backends > targets/dri-swrast: Convert to static/shared pipe-driver > > 61 files changed, 536 insertions(+), 1375 deletions(-) > > ___ > mesa-dev mailing list > mesa-dev@lists.freedesktop.org > http://lists.freedesktop.org/mailman/listinfo/mesa-dev ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [Bug 80185] Please do not hardcode SELinux identifiers in the code
https://bugs.freedesktop.org/show_bug.cgi?id=80185 --- Comment #3 from rhatdan --- In Fedora the allow_exemem boolean is not set, and this boolean would only effect user space. getsebool -a | grep execmem boinc_execmem --> on cluster_use_execmem --> off cups_execmem --> off deny_execmem --> off httpd_execmem --> off virt_use_execmem --> off xserver_execmem --> off Currently we add booleans for confined services if we see them as necessary. It would be better if your code just checked if you got EPERM or EACCESS for errno, and then if you want check if SELInux is enabled and continue with no error. Especially since you code works fine without WRITEABLE/EXECUTABLE memory. -- You are receiving this mail because: You are the assignee for the bug. ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [Bug 80185] Please do not hardcode SELinux identifiers in the code
https://bugs.freedesktop.org/show_bug.cgi?id=80185 --- Comment #2 from Laurent Bigonville --- An other question might also arise is why would a failure of the mmap() function considered as an error in one case and not in the case where SELinux would prevent it to succeed? -- You are receiving this mail because: You are the assignee for the bug. ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [Bug 80185] Please do not hardcode SELinux identifiers in the code
https://bugs.freedesktop.org/show_bug.cgi?id=80185 Laurent Bigonville changed: What|Removed |Added CC||l.bigonvi...@edpnet.be --- Comment #1 from Laurent Bigonville --- An other question might also arise is why -- You are receiving this mail because: You are the assignee for the bug. ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [Bug 80185] New: Please do not hardcode SELinux identifiers in the code
https://bugs.freedesktop.org/show_bug.cgi?id=80185 Priority: medium Bug ID: 80185 Assignee: mesa-dev@lists.freedesktop.org Summary: Please do not hardcode SELinux identifiers in the code Severity: normal Classification: Unclassified OS: All Reporter: l.bigonvi...@edpnet.be Hardware: Other Status: NEW Version: 10.2 Component: Mesa core Product: Mesa Hello, Currently the mesa code is checking for SELinux booleans ("allow_execmem") to check whether SELinux will allow mapped memory to be executed and otherwise turns the function in a noop. Hardcoding such booleans (and other identifiers) is probably a bad habit, since custom policies might be written and may perfectly omit these identifiers. Fedora policy for example doesn't have this boolean anymore. You should probably to try to mmap() in all cases and react accordingly in case of failure when SELinux is enabled (by checking errno maybe?) -- You are receiving this mail because: You are the assignee for the bug. ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] Static/shared pipe-drivers (was megadriver/pipe-loader-to-all)
On 18 June 2014 08:21, Chia-I Wu wrote: > Hi Emil, > > On Fri, Jun 13, 2014 at 3:56 AM, Emil Velikov > wrote: >> Hi all, >> >> These patches add support for building (grouping) the various targets per >> API, meaning that only one library will be created for e.g. vdpau >> (libvdpau_gallium) with individual ones (libvdpau_r600) being a hardlink >> to it. >> >> This allows us to have substantial space savings as the API(state-tracker) >> is available only once. Additionally it adds support for shared >> pipe-drivers via a _unstable_ interface, which saves us the duplication >> across X APIs. >> >> The former method has been used by the egl-static while the latter by >> opencl and gbm targets since they were introduced. >> >> By default we build with "static pipe-drivers". >> >> Some numbers + extra info [1] >> >> [Static] >> dri:(r600|radeonsi|nouveau)_dri.so -> 6.5 MiB >> vdpau: libvdpau_(r600|radeonsi|nouveau).so -> 3.5 MiB >> >> Total: 10MiB >> >> [Shared] >> Libraries: >> dri:(r600|radeonsi|nouveau)_dri.so -> 3.9 MiB >> vdpau: libvdpau_(r600|radeonsi|nouveau).so -> 633 KiB >> gallium-pipe: pipe_(r600|radeonsi|nouveau).so -> 5.3 MiB >> >> Total: 9.8MiB >> >> [Current] >> dri:(r600|radeonsi|nouveau)_dri.so -> 5.0+4.5+5.3 = 14.8 >> MiB >> vdpau: libvdpau_(r600|radeonsi|nouveau).so -> 1.9+1.2+2.3 = 5.4 MiB >> >> Total: 20.2MiB >> >> >> The previous series can be found here [2] >> Changes since then >> - Convert targets individually. >> - OMX targets now work, and the final library is now libomx-mesa.so >> - Dropped the DRI targets for now >> - A handfull of typos thinkos and bugs fixed. >> >> >> My plan is to have these pushed in ~4 stages, with two stages per week. >> This way I will be able to crack on with the remaining bits and have all >> of it tested well before we branch the next release. >> >> Series is availabe at >> https://github.com/evelikov/Mesa/tree/static-or-shared-pipe-drivers >> >> As always comments and suggestions are greatly appreciated. > Thanks for working on this. This is a tough issue to tackle. I have > a few questions/comments, which I am fine to see them resolved either > before or after landing your series. > > I see this work as to define an internal API to manage pipe drivers. Interesting, I do not see this as an attempt to define an API, but to cleanup all the mayhem that our targets currently are: * Cleanup the build system - drop symlinks, including the same source files from different locations. * Make targets less error prone by using static pipe-drivers by default. Shared ones we lack versioning and ... are a big can of worms. * Minimize all the target.c duplication across each target. Makefiles are in similar boat. * Allow people to use the unstable pipe-drivers if they are really short on size and know what they are doing. > The lack of such API previously led us to "targets", where each target > knows how to load a specific driver. With your changes, state > trackers that need to work with pipe drivers have a way to do so. As > a result, files such as > > dri/target.c, > xa/target.c, > xvmc/target.c, > vdpau/target.c, and > omx/target.c > > become quite dummy and redundant. Do you see a way to get rid of > "targets" entirely? > Indeed if/when an API comes around these targets may become redundant. > In the same view, and noticing that all users of the API have this snippet > > #if GALLIUM_STATIC_TARGETS >scrn->base.pscreen = dd_create_screen(fd); > #else >if (pipe_loader_drm_probe_fd(&scrn->base.dev, fd, true)) > scrn->base.pscreen = pipe_loader_create_screen(scrn->base.dev, > PIPE_SEARCH_DIR); > #endif // GALLIUM_STATIC_TARGETS > > I think it makes sense hide this difference behind the API. Another I'm not sure that is feasible, yet. Biggest obstacle is the pipe-loaders API, and mainly the software winsys' with their variation. > thing I noted is that the non-static path allows the user to auth the > fd while the static path doesn't. It is not clear to me how come the > static path works. > Some of the targets do not need an auth as the loader (libGL, libEGL...) already does that. I have not extensively tested the pipe-loader paths but they seems to work with simple tasks - glxgears, mplayer(vpdau). It could be that we might need to drop the auth in some cases - to be sorted out once confirmed to be an issue. > On the other hand, the implementation of the API extends itself a bit > when DRI_TARGET is defined. That is ugly from the API's point of > view. Could that be abstracted somehow so that it can be used > elsewhere or at least looks nicer? > > Finally, the API is limited to C API (i.e., inline_{drm,sw}_helper.h). > At the build system level, we also like a way to easily manage a pipe > driver. For example, we can see this snippet in many of the > Makefile.am > > if HAVE_GALLIUM_RADEONSI > STATIC_TARGET_CPPFLAGS += -DGALLIUM_R
Re: [Mesa-dev] [PATCH 1/5] st/st/omx: fix switch-case indentation in vid_enc.c
Am 12.06.2014 18:27, schrieb Leo Liu: Signed-off-by: Leo Liu I've pushed everything upstream. Thanks for the help, Christian. --- src/gallium/state_trackers/omx/vid_enc.c | 32 1 file changed, 16 insertions(+), 16 deletions(-) diff --git a/src/gallium/state_trackers/omx/vid_enc.c b/src/gallium/state_trackers/omx/vid_enc.c index d16fa01..70f63d1 100644 --- a/src/gallium/state_trackers/omx/vid_enc.c +++ b/src/gallium/state_trackers/omx/vid_enc.c @@ -626,22 +626,22 @@ static OMX_ERRORTYPE vid_enc_GetConfig(OMX_HANDLETYPE handle, OMX_INDEXTYPE idx, static enum pipe_video_profile enc_TranslateOMXProfileToPipe(unsigned omx_profile) { switch (omx_profile) { - case OMX_VIDEO_AVCProfileBaseline: - return PIPE_VIDEO_PROFILE_MPEG4_AVC_BASELINE; - case OMX_VIDEO_AVCProfileMain: - return PIPE_VIDEO_PROFILE_MPEG4_AVC_MAIN; - case OMX_VIDEO_AVCProfileExtended: - return PIPE_VIDEO_PROFILE_MPEG4_AVC_EXTENDED; - case OMX_VIDEO_AVCProfileHigh: - return PIPE_VIDEO_PROFILE_MPEG4_AVC_HIGH; - case OMX_VIDEO_AVCProfileHigh10: - return PIPE_VIDEO_PROFILE_MPEG4_AVC_HIGH10; - case OMX_VIDEO_AVCProfileHigh422: - return PIPE_VIDEO_PROFILE_MPEG4_AVC_HIGH422; - case OMX_VIDEO_AVCProfileHigh444: - return PIPE_VIDEO_PROFILE_MPEG4_AVC_HIGH444; - default: - return PIPE_VIDEO_PROFILE_UNKNOWN; + case OMX_VIDEO_AVCProfileBaseline: + return PIPE_VIDEO_PROFILE_MPEG4_AVC_BASELINE; + case OMX_VIDEO_AVCProfileMain: + return PIPE_VIDEO_PROFILE_MPEG4_AVC_MAIN; + case OMX_VIDEO_AVCProfileExtended: + return PIPE_VIDEO_PROFILE_MPEG4_AVC_EXTENDED; + case OMX_VIDEO_AVCProfileHigh: + return PIPE_VIDEO_PROFILE_MPEG4_AVC_HIGH; + case OMX_VIDEO_AVCProfileHigh10: + return PIPE_VIDEO_PROFILE_MPEG4_AVC_HIGH10; + case OMX_VIDEO_AVCProfileHigh422: + return PIPE_VIDEO_PROFILE_MPEG4_AVC_HIGH422; + case OMX_VIDEO_AVCProfileHigh444: + return PIPE_VIDEO_PROFILE_MPEG4_AVC_HIGH444; + default: + return PIPE_VIDEO_PROFILE_UNKNOWN; } } ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 2/3] radeonsi: add sampling of 4:2:2 subsampled textures
Ok, pushed the patches. Account requests usually take a while to complete, that's nothing to worry about. Regards, Christian. Am 18.06.2014 13:14, schrieb Grigori Goronzy: On 18.06.2014 13:11, Christian König wrote: @Grigori: Should I push it or did you got your account in the meantime? No account yet. I wonder what's going on. Please push. Best regards Grigori Christian. Am 17.06.2014 22:26, schrieb Marek Olšák: This looks good to me. Reviewed-by: Marek Olšák Marek On Wed, Jun 4, 2014 at 6:54 PM, Grigori Goronzy wrote: This makes 4:2:2 video surfaces work in VDPAU. --- src/gallium/drivers/radeon/r600_texture.c | 5 +- src/gallium/drivers/radeonsi/si_blit.c| 91 ++- src/gallium/drivers/radeonsi/si_state.c | 15 + 3 files changed, 71 insertions(+), 40 deletions(-) diff --git a/src/gallium/drivers/radeon/r600_texture.c b/src/gallium/drivers/radeon/r600_texture.c index 3a37465..a20b0c8 100644 --- a/src/gallium/drivers/radeon/r600_texture.c +++ b/src/gallium/drivers/radeon/r600_texture.c @@ -737,9 +737,8 @@ static unsigned r600_choose_tiling(struct r600_common_screen *rscreen, * Compressed textures must always be tiled. */ if (!(templ->flags & R600_RESOURCE_FLAG_FORCE_TILING) && !util_format_is_compressed(templ->format)) { - /* Tiling doesn't work with the 422 (SUBSAMPLED) formats on R600-Cayman. */ - if (rscreen->chip_class <= CAYMAN && - desc->layout == UTIL_FORMAT_LAYOUT_SUBSAMPLED) + /* Tiling doesn't work with the 422 (SUBSAMPLED) formats on R600+. */ + if (desc->layout == UTIL_FORMAT_LAYOUT_SUBSAMPLED) return RADEON_SURF_MODE_LINEAR_ALIGNED; /* Cursors are linear on SI. diff --git a/src/gallium/drivers/radeonsi/si_blit.c b/src/gallium/drivers/radeonsi/si_blit.c index e02615f..8c3e136 100644 --- a/src/gallium/drivers/radeonsi/si_blit.c +++ b/src/gallium/drivers/radeonsi/si_blit.c @@ -548,46 +548,63 @@ static void si_resource_copy_region(struct pipe_context *ctx, dstx = util_format_get_nblocksx(orig_info[1].format, dstx); dsty = util_format_get_nblocksy(orig_info[1].format, dsty); } else if (!util_blitter_is_copy_supported(sctx->blitter, dst, src)) { - unsigned blocksize = util_format_get_blocksize(src->format); - - switch (blocksize) { - case 1: - si_change_format(src, src_level, &orig_info[0], -PIPE_FORMAT_R8_UNORM); - si_change_format(dst, dst_level, &orig_info[1], -PIPE_FORMAT_R8_UNORM); - break; - case 2: - si_change_format(src, src_level, &orig_info[0], -PIPE_FORMAT_R8G8_UNORM); - si_change_format(dst, dst_level, &orig_info[1], -PIPE_FORMAT_R8G8_UNORM); - break; - case 4: - si_change_format(src, src_level, &orig_info[0], -PIPE_FORMAT_R8G8B8A8_UNORM); - si_change_format(dst, dst_level, &orig_info[1], -PIPE_FORMAT_R8G8B8A8_UNORM); - break; - case 8: - si_change_format(src, src_level, &orig_info[0], -PIPE_FORMAT_R16G16B16A16_UINT); - si_change_format(dst, dst_level, &orig_info[1], -PIPE_FORMAT_R16G16B16A16_UINT); - break; - case 16: + if (util_format_is_subsampled_422(src->format)) { + /* XXX untested */ si_change_format(src, src_level, &orig_info[0], -PIPE_FORMAT_R32G32B32A32_UINT); +PIPE_FORMAT_R8G8B8A8_UINT); si_change_format(dst, dst_level, &orig_info[1], -PIPE_FORMAT_R32G32B32A32_UINT); - break; - default: - fprintf(stderr, "Unhandled format %s with blocksize %u\n", - util_format_short_name(src->format), blocksize); - assert(0); +PIPE_FORMAT_R8G8B8A8_UINT); + + sbox = *src_box; + sbox.x = util_format_get_nblocksx(orig_info[0].format, src_box->x); + sbox.width = util_format_get_nblocksx(orig_info[0].format, src_box->width); + src_box = &sbox; + dstx = util_format_get_nblocksx(orig_info[1].format, dstx); + +
[Mesa-dev] [PATCH v2] glsl: Expand matrix flip optimization pass to cover more cases.
Also, as suggested by Ian Romanick, make it so we don't need a bunch of individual handles to flippable matrices, instead we register matrix/transpose_matrix pairs in a hash table for all built-in matrices using the non-transpose matrix name as key. --- src/glsl/opt_flip_matrices.cpp | 145 ++--- 1 file changed, 108 insertions(+), 37 deletions(-) diff --git a/src/glsl/opt_flip_matrices.cpp b/src/glsl/opt_flip_matrices.cpp index 9044fd6..d0b8125 100644 --- a/src/glsl/opt_flip_matrices.cpp +++ b/src/glsl/opt_flip_matrices.cpp @@ -29,44 +29,132 @@ * On some hardware, this is more efficient. * * This currently only does the conversion for built-in matrices which - * already have transposed equivalents. Namely, gl_ModelViewProjectionMatrix - * and gl_TextureMatrix. + * already have transposed equivalents. */ #include "ir.h" #include "ir_optimization.h" #include "main/macros.h" +#include "program/hash_table.h" namespace { + class matrix_flipper : public ir_hierarchical_visitor { public: + struct matrix_and_transpose { + ir_variable *matrix; + ir_variable *transpose_matrix; + }; + matrix_flipper(exec_list *instructions) { progress = false; - mvp_transpose = NULL; - texmat_transpose = NULL; + + /* Build a hash table of built-in matrices and their transposes. + * + * The key for the entries in the hash table is the non-transpose matrix + * name. This assumes that all built-in transpose matrices have the + * "Transpose" suffix. + */ + ht = hash_table_ctor(0, hash_table_string_hash, + hash_table_string_compare); foreach_list(n, instructions) { ir_instruction *ir = (ir_instruction *) n; ir_variable *var = ir->as_variable(); - if (!var) + + /* Must be a matrix */ + if (!var || !var->type->is_matrix()) continue; - if (strcmp(var->name, "gl_ModelViewProjectionMatrixTranspose") == 0) -mvp_transpose = var; - if (strcmp(var->name, "gl_TextureMatrixTranspose") == 0) -texmat_transpose = var; + /* Must be a built-in */ + if (strstr(var->name, "gl_") != var->name) +continue; + + /* Create a new entry for this matrix if we don't have one yet */ + bool new_entry = false; + struct matrix_and_transpose *entry = +(struct matrix_and_transpose *) hash_table_find(ht, var->name); + if (!entry) { +new_entry = true; +entry = new struct matrix_and_transpose(); +entry->matrix = NULL; +entry->transpose_matrix = NULL; + } + + const char *transpose_ptr = strstr(var->name, "Transpose"); + if (transpose_ptr == NULL) { +entry->matrix = var; + } else { +entry->transpose_matrix = var; + } + + if (new_entry) { +char *entry_key; +if (transpose_ptr == NULL) { + entry_key = strdup(var->name); +} else { + entry_key = strndup(var->name, transpose_ptr - var->name); +} +hash_table_insert(ht, entry, entry_key); + } } } + ~matrix_flipper() + { + hash_table_dtor(ht); + } + ir_visitor_status visit_enter(ir_expression *ir); bool progress; private: - ir_variable *mvp_transpose; - ir_variable *texmat_transpose; + void transform_operands(ir_expression *ir, + ir_variable *mat_var, + ir_variable *mat_transpose); + void transform_operands_array_of_matrix(ir_expression *ir, + ir_variable *mat_var, + ir_variable *mat_transpose); + struct hash_table *ht; }; } +void +matrix_flipper::transform_operands(ir_expression *ir, + ir_variable *mat_var, + ir_variable *mat_transpose) +{ +#ifndef NDEBUG + ir_dereference_variable *deref = ir->operands[0]->as_dereference_variable(); + assert(deref && deref->var == mat_var); +#endif + + void *mem_ctx = ralloc_parent(ir); + ir->operands[0] = ir->operands[1]; + ir->operands[1] = new(mem_ctx) ir_dereference_variable(mat_transpose); +} + +void +matrix_flipper::transform_operands_array_of_matrix(ir_expression *ir, + ir_variable *mat_var, + ir_variable *mat_transpose) +{ + ir_dereference_array *array_ref = ir->operands[0]->as_dereference_array(); + assert(array_ref != NULL); + ir_dereference_variable *var_ref = + array_ref->array->as_dereference_variable(); + assert(var_ref && var_ref->var == mat_var); + + ir->operands[0] = ir->operands[1]; + ir->operands[1] = array_ref; + + var_ref->var = mat_transpose; + + mat_transpo
[Mesa-dev] [Bug 80183] New: [llvmpipe] triangles with vertices that map to raster positions > viewport width/height are not displayed
https://bugs.freedesktop.org/show_bug.cgi?id=80183 Priority: medium Bug ID: 80183 Assignee: mesa-dev@lists.freedesktop.org Summary: [llvmpipe] triangles with vertices that map to raster positions > viewport width/height are not displayed Severity: normal Classification: Unclassified OS: All Reporter: florianl...@gmail.com Hardware: Other Status: NEW Version: 10.2 Component: Other Product: Mesa Created attachment 101296 --> https://bugs.freedesktop.org/attachment.cgi?id=101296&action=edit Traces that reproduce the problem When using llvmpipe/64bit, we experience missing triangles when one of the vertices of the triangle has a raster position outside of the viewport top/right. It only happens when a GLSL shader is active, not when fixed function rendering is used. It only happens with llvmpipe, using softpipe all is fine (also with GLSL shader). Attached you find a apitrace that works and a apitrace that shows the problem. While MeVisLab.ok.trace (Frame 11) shows a textured polygon, MeVisLab.wrong.trace (Frame 13) is rendered black, while it should show the same textured polygon, clipped to the viewport width/height. The traces contain some MeVisLab network rendering as well, these can be ignored. -- You are receiving this mail because: You are the assignee for the bug. ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 2/3] radeonsi: add sampling of 4:2:2 subsampled textures
On 18.06.2014 13:11, Christian König wrote: > @Grigori: Should I push it or did you got your account in the meantime? > No account yet. I wonder what's going on. Please push. Best regards Grigori > Christian. > > Am 17.06.2014 22:26, schrieb Marek Olšák: >> This looks good to me. >> >> Reviewed-by: Marek Olšák >> >> Marek >> >> On Wed, Jun 4, 2014 at 6:54 PM, Grigori Goronzy >> wrote: >>> This makes 4:2:2 video surfaces work in VDPAU. >>> --- >>> src/gallium/drivers/radeon/r600_texture.c | 5 +- >>> src/gallium/drivers/radeonsi/si_blit.c| 91 >>> ++- >>> src/gallium/drivers/radeonsi/si_state.c | 15 + >>> 3 files changed, 71 insertions(+), 40 deletions(-) >>> >>> diff --git a/src/gallium/drivers/radeon/r600_texture.c >>> b/src/gallium/drivers/radeon/r600_texture.c >>> index 3a37465..a20b0c8 100644 >>> --- a/src/gallium/drivers/radeon/r600_texture.c >>> +++ b/src/gallium/drivers/radeon/r600_texture.c >>> @@ -737,9 +737,8 @@ static unsigned r600_choose_tiling(struct >>> r600_common_screen *rscreen, >>> * Compressed textures must always be tiled. */ >>> if (!(templ->flags & R600_RESOURCE_FLAG_FORCE_TILING) && >>> !util_format_is_compressed(templ->format)) { >>> - /* Tiling doesn't work with the 422 (SUBSAMPLED) >>> formats on R600-Cayman. */ >>> - if (rscreen->chip_class <= CAYMAN && >>> - desc->layout == UTIL_FORMAT_LAYOUT_SUBSAMPLED) >>> + /* Tiling doesn't work with the 422 (SUBSAMPLED) >>> formats on R600+. */ >>> + if (desc->layout == UTIL_FORMAT_LAYOUT_SUBSAMPLED) >>> return RADEON_SURF_MODE_LINEAR_ALIGNED; >>> >>> /* Cursors are linear on SI. >>> diff --git a/src/gallium/drivers/radeonsi/si_blit.c >>> b/src/gallium/drivers/radeonsi/si_blit.c >>> index e02615f..8c3e136 100644 >>> --- a/src/gallium/drivers/radeonsi/si_blit.c >>> +++ b/src/gallium/drivers/radeonsi/si_blit.c >>> @@ -548,46 +548,63 @@ static void si_resource_copy_region(struct >>> pipe_context *ctx, >>> dstx = util_format_get_nblocksx(orig_info[1].format, >>> dstx); >>> dsty = util_format_get_nblocksy(orig_info[1].format, >>> dsty); >>> } else if (!util_blitter_is_copy_supported(sctx->blitter, >>> dst, src)) { >>> - unsigned blocksize = >>> util_format_get_blocksize(src->format); >>> - >>> - switch (blocksize) { >>> - case 1: >>> - si_change_format(src, src_level, &orig_info[0], >>> -PIPE_FORMAT_R8_UNORM); >>> - si_change_format(dst, dst_level, &orig_info[1], >>> -PIPE_FORMAT_R8_UNORM); >>> - break; >>> - case 2: >>> - si_change_format(src, src_level, &orig_info[0], >>> -PIPE_FORMAT_R8G8_UNORM); >>> - si_change_format(dst, dst_level, &orig_info[1], >>> -PIPE_FORMAT_R8G8_UNORM); >>> - break; >>> - case 4: >>> - si_change_format(src, src_level, &orig_info[0], >>> -PIPE_FORMAT_R8G8B8A8_UNORM); >>> - si_change_format(dst, dst_level, &orig_info[1], >>> -PIPE_FORMAT_R8G8B8A8_UNORM); >>> - break; >>> - case 8: >>> - si_change_format(src, src_level, &orig_info[0], >>> -PIPE_FORMAT_R16G16B16A16_UINT); >>> - si_change_format(dst, dst_level, &orig_info[1], >>> -PIPE_FORMAT_R16G16B16A16_UINT); >>> - break; >>> - case 16: >>> + if (util_format_is_subsampled_422(src->format)) { >>> + /* XXX untested */ >>> si_change_format(src, src_level, &orig_info[0], >>> -PIPE_FORMAT_R32G32B32A32_UINT); >>> +PIPE_FORMAT_R8G8B8A8_UINT); >>> si_change_format(dst, dst_level, &orig_info[1], >>> -PIPE_FORMAT_R32G32B32A32_UINT); >>> - break; >>> - default: >>> - fprintf(stderr, "Unhandled format %s with >>> blocksize %u\n", >>> - util_format_short_name(src->format), >>> blocksize); >>> - assert(0); >>> +PIPE_FORMAT_R8G8B8A8_UINT); >>> + >>> + sbox = *src_box; >>> + sbox.x = >>> util_format_get_nblocksx(orig_info[0].format, src_box->x); >>> + sbox.width =
Re: [Mesa-dev] [PATCH 2/3] radeonsi: add sampling of 4:2:2 subsampled textures
@Grigori: Should I push it or did you got your account in the meantime? Christian. Am 17.06.2014 22:26, schrieb Marek Olšák: This looks good to me. Reviewed-by: Marek Olšák Marek On Wed, Jun 4, 2014 at 6:54 PM, Grigori Goronzy wrote: This makes 4:2:2 video surfaces work in VDPAU. --- src/gallium/drivers/radeon/r600_texture.c | 5 +- src/gallium/drivers/radeonsi/si_blit.c| 91 ++- src/gallium/drivers/radeonsi/si_state.c | 15 + 3 files changed, 71 insertions(+), 40 deletions(-) diff --git a/src/gallium/drivers/radeon/r600_texture.c b/src/gallium/drivers/radeon/r600_texture.c index 3a37465..a20b0c8 100644 --- a/src/gallium/drivers/radeon/r600_texture.c +++ b/src/gallium/drivers/radeon/r600_texture.c @@ -737,9 +737,8 @@ static unsigned r600_choose_tiling(struct r600_common_screen *rscreen, * Compressed textures must always be tiled. */ if (!(templ->flags & R600_RESOURCE_FLAG_FORCE_TILING) && !util_format_is_compressed(templ->format)) { - /* Tiling doesn't work with the 422 (SUBSAMPLED) formats on R600-Cayman. */ - if (rscreen->chip_class <= CAYMAN && - desc->layout == UTIL_FORMAT_LAYOUT_SUBSAMPLED) + /* Tiling doesn't work with the 422 (SUBSAMPLED) formats on R600+. */ + if (desc->layout == UTIL_FORMAT_LAYOUT_SUBSAMPLED) return RADEON_SURF_MODE_LINEAR_ALIGNED; /* Cursors are linear on SI. diff --git a/src/gallium/drivers/radeonsi/si_blit.c b/src/gallium/drivers/radeonsi/si_blit.c index e02615f..8c3e136 100644 --- a/src/gallium/drivers/radeonsi/si_blit.c +++ b/src/gallium/drivers/radeonsi/si_blit.c @@ -548,46 +548,63 @@ static void si_resource_copy_region(struct pipe_context *ctx, dstx = util_format_get_nblocksx(orig_info[1].format, dstx); dsty = util_format_get_nblocksy(orig_info[1].format, dsty); } else if (!util_blitter_is_copy_supported(sctx->blitter, dst, src)) { - unsigned blocksize = util_format_get_blocksize(src->format); - - switch (blocksize) { - case 1: - si_change_format(src, src_level, &orig_info[0], -PIPE_FORMAT_R8_UNORM); - si_change_format(dst, dst_level, &orig_info[1], -PIPE_FORMAT_R8_UNORM); - break; - case 2: - si_change_format(src, src_level, &orig_info[0], -PIPE_FORMAT_R8G8_UNORM); - si_change_format(dst, dst_level, &orig_info[1], -PIPE_FORMAT_R8G8_UNORM); - break; - case 4: - si_change_format(src, src_level, &orig_info[0], -PIPE_FORMAT_R8G8B8A8_UNORM); - si_change_format(dst, dst_level, &orig_info[1], -PIPE_FORMAT_R8G8B8A8_UNORM); - break; - case 8: - si_change_format(src, src_level, &orig_info[0], -PIPE_FORMAT_R16G16B16A16_UINT); - si_change_format(dst, dst_level, &orig_info[1], -PIPE_FORMAT_R16G16B16A16_UINT); - break; - case 16: + if (util_format_is_subsampled_422(src->format)) { + /* XXX untested */ si_change_format(src, src_level, &orig_info[0], -PIPE_FORMAT_R32G32B32A32_UINT); +PIPE_FORMAT_R8G8B8A8_UINT); si_change_format(dst, dst_level, &orig_info[1], -PIPE_FORMAT_R32G32B32A32_UINT); - break; - default: - fprintf(stderr, "Unhandled format %s with blocksize %u\n", - util_format_short_name(src->format), blocksize); - assert(0); +PIPE_FORMAT_R8G8B8A8_UINT); + + sbox = *src_box; + sbox.x = util_format_get_nblocksx(orig_info[0].format, src_box->x); + sbox.width = util_format_get_nblocksx(orig_info[0].format, src_box->width); + src_box = &sbox; + dstx = util_format_get_nblocksx(orig_info[1].format, dstx); + + restore_orig[0] = TRUE; + restore_orig[1] = TRUE; + } else { + unsigned blocksize = util_format_get_blocksize(src->format); + + switch (blocksize) { + case 1: +
[Mesa-dev] [PATCH] Remove unneeded stall calls from batches on Baytrail.
From: Greg Hunt These cause a small slowdown when we are sending a large number of small batches to the GPU. Signed-off-by: Gregory Hunt --- src/mesa/drivers/dri/i965/gen6_vs_state.c | 2 +- src/mesa/drivers/dri/i965/gen7_blorp.cpp | 2 +- src/mesa/drivers/dri/i965/gen7_gs_state.c | 2 +- src/mesa/drivers/dri/i965/gen7_sampler_state.c | 2 +- src/mesa/drivers/dri/i965/gen7_urb.c | 6 +++--- src/mesa/drivers/dri/i965/gen7_vs_state.c | 2 +- 6 files changed, 8 insertions(+), 8 deletions(-) diff --git a/src/mesa/drivers/dri/i965/gen6_vs_state.c b/src/mesa/drivers/dri/i965/gen6_vs_state.c index 9764645..a46cc48 100644 --- a/src/mesa/drivers/dri/i965/gen6_vs_state.c +++ b/src/mesa/drivers/dri/i965/gen6_vs_state.c @@ -100,7 +100,7 @@ gen6_upload_vs_push_constants(struct brw_context *brw) stage_state, AUB_TRACE_VS_CONSTANTS); if (brw->gen >= 7) { - if (brw->gen == 7 && !brw->is_haswell) + if (brw->gen == 7 && !brw->is_haswell && !brw->is_baytrail) gen7_emit_vs_workaround_flush(brw); gen7_upload_constant_state(brw, stage_state, true /* active */, diff --git a/src/mesa/drivers/dri/i965/gen7_blorp.cpp b/src/mesa/drivers/dri/i965/gen7_blorp.cpp index 448b505..a1337fe 100644 --- a/src/mesa/drivers/dri/i965/gen7_blorp.cpp +++ b/src/mesa/drivers/dri/i965/gen7_blorp.cpp @@ -414,7 +414,7 @@ gen7_blorp_emit_gs_disable(struct brw_context *brw, * whole fixed function pipeline" means to emit a PIPE_CONTROL with the "CS * Stall" bit set. */ - if (!brw->is_haswell && brw->gt == 2 && brw->gs.enabled) + if (!brw->is_haswell && !brw->is_baytrail && brw->gt == 2 && brw->gs.enabled) gen7_emit_cs_stall_flush(brw); BEGIN_BATCH(7); diff --git a/src/mesa/drivers/dri/i965/gen7_gs_state.c b/src/mesa/drivers/dri/i965/gen7_gs_state.c index 30dfa6b..786e1fb 100644 --- a/src/mesa/drivers/dri/i965/gen7_gs_state.c +++ b/src/mesa/drivers/dri/i965/gen7_gs_state.c @@ -82,7 +82,7 @@ upload_gs_state(struct brw_context *brw) * whole fixed function pipeline" means to emit a PIPE_CONTROL with the "CS * Stall" bit set. */ - if (!brw->is_haswell && brw->gt == 2 && brw->gs.enabled != active) + if (!brw->is_haswell && !brw->is_baytrail && brw->gt == 2 && brw->gs.enabled != active) gen7_emit_cs_stall_flush(brw); if (active) { diff --git a/src/mesa/drivers/dri/i965/gen7_sampler_state.c b/src/mesa/drivers/dri/i965/gen7_sampler_state.c index 6077ff2..219a174 100644 --- a/src/mesa/drivers/dri/i965/gen7_sampler_state.c +++ b/src/mesa/drivers/dri/i965/gen7_sampler_state.c @@ -212,7 +212,7 @@ gen7_upload_sampler_state_table(struct brw_context *brw, } } - if (brw->gen == 7 && !brw->is_haswell && + if (brw->gen == 7 && !brw->is_haswell && !brw->is_baytrail && stage_state->stage == MESA_SHADER_VERTEX) { gen7_emit_vs_workaround_flush(brw); } diff --git a/src/mesa/drivers/dri/i965/gen7_urb.c b/src/mesa/drivers/dri/i965/gen7_urb.c index 2653e9c..190d6f0 100644 --- a/src/mesa/drivers/dri/i965/gen7_urb.c +++ b/src/mesa/drivers/dri/i965/gen7_urb.c @@ -121,9 +121,9 @@ gen7_emit_push_constant_state(struct brw_context *brw, unsigned vs_size, * A PIPE_CONTOL command with the CS Stall bit set must be programmed * in the ring after this instruction. * -* No such restriction exists for Haswell. +* No such restriction exists for Haswell or Baytrail. */ - if (brw->gen < 8 && !brw->is_haswell) + if (brw->gen < 8 && !brw->is_haswell && !brw->is_baytrail) gen7_emit_cs_stall_flush(brw); } @@ -263,7 +263,7 @@ gen7_upload_urb(struct brw_context *brw) brw->urb.vs_start = push_constant_chunks; brw->urb.gs_start = push_constant_chunks + vs_chunks; - if (brw->gen == 7 && !brw->is_haswell) + if (brw->gen == 7 && !brw->is_haswell && !brw->is_baytrail) gen7_emit_vs_workaround_flush(brw); gen7_emit_urb_state(brw, brw->urb.nr_vs_entries, vs_size, brw->urb.vs_start, diff --git a/src/mesa/drivers/dri/i965/gen7_vs_state.c b/src/mesa/drivers/dri/i965/gen7_vs_state.c index 4d99150..01be756 100644 --- a/src/mesa/drivers/dri/i965/gen7_vs_state.c +++ b/src/mesa/drivers/dri/i965/gen7_vs_state.c @@ -72,7 +72,7 @@ upload_vs_state(struct brw_context *brw) const int max_threads_shift = brw->is_haswell ? HSW_VS_MAX_THREADS_SHIFT : GEN6_VS_MAX_THREADS_SHIFT; - if (!brw->is_haswell) + if (!brw->is_haswell && !brw->is_baytrail) gen7_emit_vs_workaround_flush(brw); /* Use ALT floating point mode for ARB vertex programs, because they -- 1.9.1 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev