[Mesa-dev] [PATCH V3 4/8] glsl: only call mark_max_array if we are assigning an array
This change does not help fix or prevent any bugs it just seems reasonable to do Signed-off-by: Timothy Arceri t_arc...@yahoo.com.au --- src/glsl/ast_to_hir.cpp | 6 -- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/glsl/ast_to_hir.cpp b/src/glsl/ast_to_hir.cpp index e25cba3..be12f97 100644 --- a/src/glsl/ast_to_hir.cpp +++ b/src/glsl/ast_to_hir.cpp @@ -830,8 +830,10 @@ do_assignment(exec_list *instructions, struct _mesa_glsl_parse_state *state, rhs-type-array_size()); d-type = var-type; } - mark_whole_array_access(rhs); - mark_whole_array_access(lhs); + if (lhs-type-is_array()) { + mark_whole_array_access(rhs); + mark_whole_array_access(lhs); + } } /* Most callers of do_assignment (assign, add_assign, pre_inc/dec, -- 1.8.3.1 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH V3 0/8] glsl: Start implementing ARB_arrays_of_arrays
This is a first pass at implementing the ARB_arrays_of_arrays extension in Mesa. This series allows the glsl front-end to parse and compile shaders that don't require linking of arrays of arrays e.g. multi dimensional uniforms. I'm sending this extension in parts because my time working on this is about to decrease and I'm not sure how long it will be before I finish the second half. If this is not a good idea please let me know. Patches 2-3 need to be squashed when committed but I've split them here to make reviewing easier. Patch 4 is optional What's missing is linking and backend support that goes with it. This includes the linking checks the spec talks about for geometry shaders. No piglit regressions with these patches applied. Compile tests have been submitted to the piglit mailing list, the extension just needs to be enabled in the driver code in order to test. V3 - rebased on Paul Berry's awesome simplify aggregate type interface patch - removed now redundant is_array - made a bunch of other changes based on Paul Berry's advice V2 - Removed patches 2 and 6 from the first version as it turns out they are not actually needed (left overs from my first attempt at the extension). - Removed all unrelated whitespace changes, and other small whitespace cleanups - Fixed order of array dimensions in glsl_type name. ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH V3 2/8] glsl: Add arrays_of_arrays to yacc definition
Signed-off-by: Timothy Arceri t_arc...@yahoo.com.au --- src/glsl/glsl_parser.yy | 128 +--- 1 file changed, 56 insertions(+), 72 deletions(-) diff --git a/src/glsl/glsl_parser.yy b/src/glsl/glsl_parser.yy index 5451b76..2786e92 100644 --- a/src/glsl/glsl_parser.yy +++ b/src/glsl/glsl_parser.yy @@ -97,6 +97,7 @@ static bool match_layout_qualifier(const char *s1, const char *s2, ast_node *node; ast_type_specifier *type_specifier; + ast_array_specifier *array_specifier; ast_fully_specified_type *fully_specified_type; ast_function *function; ast_parameter_declarator *parameter_declarator; @@ -202,6 +203,7 @@ static bool match_layout_qualifier(const char *s1, const char *s2, %type type_qualifier interface_qualifier %type type_specifier type_specifier %type type_specifier type_specifier_nonarray +%type array_specifier array_specifier %type identifier basic_type_specifier_nonarray %type fully_specified_type fully_specified_type %type function function_prototype @@ -880,7 +882,7 @@ parameter_declarator: $$-type-specifier = $1; $$-identifier = $2; } - | type_specifier any_identifier '[' constant_expression ']' + | type_specifier any_identifier array_specifier { void *ctx = state; $$ = new(ctx) ast_parameter_declarator(); @@ -889,8 +891,7 @@ parameter_declarator: $$-type-set_location(yylloc); $$-type-specifier = $1; $$-identifier = $2; - $$-is_array = true; - $$-array_size = $4; + $$-array_specifier = $3; } ; @@ -983,40 +984,20 @@ init_declarator_list: $$-declarations.push_tail(decl-link); state-symbols-add_variable(new(state) ir_variable(NULL, $3, ir_var_auto)); } - | init_declarator_list ',' any_identifier '[' ']' + | init_declarator_list ',' any_identifier array_specifier { void *ctx = state; - ast_declaration *decl = new(ctx) ast_declaration($3, true, NULL, NULL); + ast_declaration *decl = new(ctx) ast_declaration($3, true, $4, NULL); decl-set_location(yylloc); $$ = $1; $$-declarations.push_tail(decl-link); state-symbols-add_variable(new(state) ir_variable(NULL, $3, ir_var_auto)); } - | init_declarator_list ',' any_identifier '[' constant_expression ']' + | init_declarator_list ',' any_identifier array_specifier '=' initializer { void *ctx = state; - ast_declaration *decl = new(ctx) ast_declaration($3, true, $5, NULL); - decl-set_location(yylloc); - - $$ = $1; - $$-declarations.push_tail(decl-link); - state-symbols-add_variable(new(state) ir_variable(NULL, $3, ir_var_auto)); - } - | init_declarator_list ',' any_identifier '[' ']' '=' initializer - { - void *ctx = state; - ast_declaration *decl = new(ctx) ast_declaration($3, true, NULL, $7); - decl-set_location(yylloc); - - $$ = $1; - $$-declarations.push_tail(decl-link); - state-symbols-add_variable(new(state) ir_variable(NULL, $3, ir_var_auto)); - } - | init_declarator_list ',' any_identifier '[' constant_expression ']' '=' initializer - { - void *ctx = state; - ast_declaration *decl = new(ctx) ast_declaration($3, true, $5, $8); + ast_declaration *decl = new(ctx) ast_declaration($3, true, $4, $6); decl-set_location(yylloc); $$ = $1; @@ -1053,37 +1034,19 @@ single_declaration: $$-set_location(yylloc); $$-declarations.push_tail(decl-link); } - | fully_specified_type any_identifier '[' ']' + | fully_specified_type any_identifier array_specifier { void *ctx = state; - ast_declaration *decl = new(ctx) ast_declaration($2, true, NULL, NULL); + ast_declaration *decl = new(ctx) ast_declaration($2, true, $3, NULL); $$ = new(ctx) ast_declarator_list($1); $$-set_location(yylloc); $$-declarations.push_tail(decl-link); } - | fully_specified_type any_identifier '[' constant_expression ']' + | fully_specified_type any_identifier array_specifier '=' initializer { void *ctx = state; - ast_declaration *decl = new(ctx) ast_declaration($2, true, $4, NULL); - - $$ = new(ctx) ast_declarator_list($1); - $$-set_location(yylloc); - $$-declarations.push_tail(decl-link); - } - | fully_specified_type any_identifier '[' ']' '=' initializer - { - void *ctx = state; - ast_declaration *decl = new(ctx) ast_declaration($2, true, NULL, $6); - - $$ = new(ctx) ast_declarator_list($1); - $$-set_location(yylloc); - $$-declarations.push_tail(decl-link); - } - | fully_specified_type any_identifier '[' constant_expression ']' '=' initializer - { - void *ctx = state; - ast_declaration *decl = new(ctx) ast_declaration($2, true, $4, $7); + ast_declaration *decl = new(ctx) ast_declaration($2, true, $3, $5); $$ = new(ctx) ast_declarator_list($1); $$-set_location(yylloc); @@ -1584,19
[Mesa-dev] [PATCH V3 1/8] mesa: Add ARB_arrays_of_arrays
Signed-off-by: Timothy Arceri t_arc...@yahoo.com.au Reviewed-by: Paul Berry stereotype441 at gmail.com --- src/glsl/glcpp/glcpp-parse.y | 3 +++ src/mesa/main/extensions.c | 1 + src/mesa/main/mtypes.h | 1 + 3 files changed, 5 insertions(+) diff --git a/src/glsl/glcpp/glcpp-parse.y b/src/glsl/glcpp/glcpp-parse.y index 55c4981..4df69a8 100644 --- a/src/glsl/glcpp/glcpp-parse.y +++ b/src/glsl/glcpp/glcpp-parse.y @@ -1222,6 +1222,9 @@ glcpp_parser_create (const struct gl_extensions *extensions, int api) add_builtin_define(parser, GL_EXT_texture_array, 1); } + if (extensions-ARB_arrays_of_arrays) + add_builtin_define(parser, GL_ARB_arrays_of_arrays, 1); + if (extensions-ARB_fragment_coord_conventions) add_builtin_define(parser, GL_ARB_fragment_coord_conventions, 1); diff --git a/src/mesa/main/extensions.c b/src/mesa/main/extensions.c index 2e0ccc3..95eb7ca 100644 --- a/src/mesa/main/extensions.c +++ b/src/mesa/main/extensions.c @@ -80,6 +80,7 @@ static const struct extension extension_table[] = { /* ARB Extensions */ { GL_ARB_ES2_compatibility, o(ARB_ES2_compatibility), GL, 2009 }, { GL_ARB_ES3_compatibility, o(ARB_ES3_compatibility), GL, 2012 }, + { GL_ARB_arrays_of_arrays,o(ARB_arrays_of_arrays), GL, 2012 }, { GL_ARB_base_instance, o(ARB_base_instance), GL, 2011 }, { GL_ARB_blend_func_extended, o(ARB_blend_func_extended), GL, 2009 }, { GL_ARB_clear_buffer_object, o(dummy_true), GL, 2012 }, diff --git a/src/mesa/main/mtypes.h b/src/mesa/main/mtypes.h index 33df682..eef67a8 100644 --- a/src/mesa/main/mtypes.h +++ b/src/mesa/main/mtypes.h @@ -3366,6 +3366,7 @@ struct gl_extensions GLboolean ANGLE_texture_compression_dxt; GLboolean ARB_ES2_compatibility; GLboolean ARB_ES3_compatibility; + GLboolean ARB_arrays_of_arrays; GLboolean ARB_base_instance; GLboolean ARB_blend_func_extended; GLboolean ARB_color_buffer_float; -- 1.8.3.1 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH V3 8/8] docs: Mark ARB_arrays_of_arrays as started
Signed-off-by: Timothy Arceri t_arc...@yahoo.com.au Reviewed-by: Paul Berry stereotype441 at gmail.com --- docs/GL3.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/GL3.txt b/docs/GL3.txt index 0672ec7..9f756e9 100644 --- a/docs/GL3.txt +++ b/docs/GL3.txt @@ -142,7 +142,7 @@ GL 4.2: GL 4.3: GLSL 4.3 not started - GL_ARB_arrays_of_arrays not started + GL_ARB_arrays_of_arrays started GL_ARB_ES3_compatibility DONE (i965) GL_ARB_clear_buffer_object not started GL_ARB_compute_shadernot started -- 1.8.3.1 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH V3 5/8] glsl: Allow arrays of arrays as input to vertex shader
Signed-off-by: Timothy Arceri t_arc...@yahoo.com.au Reviewed-by: Paul Berry stereotype441 at gmail.com --- src/glsl/ast_to_hir.cpp | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/glsl/ast_to_hir.cpp b/src/glsl/ast_to_hir.cpp index be12f97..f13ac51 100644 --- a/src/glsl/ast_to_hir.cpp +++ b/src/glsl/ast_to_hir.cpp @@ -3143,8 +3143,9 @@ ast_declarator_list::hir(exec_list *instructions, *vectors. Vertex shader inputs cannot be arrays or *structures. */ - const glsl_type *check_type = var-type-is_array() - ? var-type-fields.array : var-type; +const glsl_type *check_type = var-type; +while (check_type-is_array()) + check_type = check_type-element_type(); switch (check_type-base_type) { case GLSL_TYPE_FLOAT: -- 1.8.3.1 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH V3 7/8] glsl: remove remaining is_array variables
Previously the reason we needed is_array was because we used array_size == NULL to represent both non-arrays and unsized arrays. Now that we use a non-NULL array_specifier to represent an unsized array, is_array is redundant. Signed-off-by: Timothy Arceri t_arc...@yahoo.com.au --- src/glsl/ast.h | 32 +++- src/glsl/ast_to_hir.cpp | 10 +- src/glsl/ast_type.cpp | 6 ++ src/glsl/glsl_parser.yy | 30 +++--- src/glsl/glsl_parser_extras.cpp | 15 ++- 5 files changed, 35 insertions(+), 58 deletions(-) diff --git a/src/glsl/ast.h b/src/glsl/ast.h index c15a119..4660f78 100644 --- a/src/glsl/ast.h +++ b/src/glsl/ast.h @@ -371,14 +371,13 @@ public: class ast_declaration : public ast_node { public: - ast_declaration(const char *identifier, bool is_array, + ast_declaration(const char *identifier, ast_array_specifier *array_specifier, ast_expression *initializer); virtual void print(void) const; const char *identifier; - - bool is_array; + ast_array_specifier *array_specifier; ast_expression *initializer; @@ -588,10 +587,10 @@ public: * Use only if the objects are allocated from the same context and will not * be modified. Zeros the inherited ast_node's fields. */ - ast_type_specifier(const ast_type_specifier *that, bool is_array, + ast_type_specifier(const ast_type_specifier *that, ast_array_specifier *array_specifier) : ast_node(), type_name(that-type_name), structure(that-structure), -is_array(is_array), array_specifier(array_specifier), +array_specifier(array_specifier), default_precision(that-default_precision) { /* empty */ @@ -599,8 +598,7 @@ public: /** Construct a type specifier from a type name */ ast_type_specifier(const char *name) - : type_name(name), structure(NULL), - is_array(false), array_specifier(NULL), + : type_name(name), structure(NULL), array_specifier(NULL), default_precision(ast_precision_none) { /* empty */ @@ -608,8 +606,7 @@ public: /** Construct a type specifier from a structure definition */ ast_type_specifier(ast_struct_specifier *s) - : type_name(s-name), structure(s), - is_array(false), array_specifier(NULL), + : type_name(s-name), structure(s), array_specifier(NULL), default_precision(ast_precision_none) { /* empty */ @@ -626,7 +623,6 @@ public: const char *type_name; ast_struct_specifier *structure; - bool is_array; ast_array_specifier *array_specifier; /** For precision statements, this is the given precision; otherwise none. */ @@ -680,7 +676,6 @@ public: ast_parameter_declarator() : type(NULL), identifier(NULL), - is_array(false), array_specifier(NULL), formal_parameter(false), is_void(false) @@ -695,7 +690,6 @@ public: ast_fully_specified_type *type; const char *identifier; - bool is_array; ast_array_specifier *array_specifier; static void parameters_to_hir(exec_list *ast_parameters, @@ -943,13 +937,10 @@ class ast_interface_block : public ast_node { public: ast_interface_block(ast_type_qualifier layout, const char *instance_name, - bool is_array, ast_array_specifier *array_specifier) : layout(layout), block_name(NULL), instance_name(instance_name), - is_array(is_array), array_specifier(array_specifier) + array_specifier(array_specifier) { - if (!is_array) - assert(array_specifier == NULL); } virtual ir_rvalue *hir(exec_list *instructions, @@ -970,15 +961,6 @@ public: exec_list declarations; /** -* True if the block is declared as an array -* -* \note -* A block can only be an array if it also has an instance name. If this -* field is true, ::instance_name must also not be \c NULL. -*/ - bool is_array; - - /** * Declared array size of the block instance * * If the block is not declared as an array or if the block instance array diff --git a/src/glsl/ast_to_hir.cpp b/src/glsl/ast_to_hir.cpp index f13ac51..c9f3b92 100644 --- a/src/glsl/ast_to_hir.cpp +++ b/src/glsl/ast_to_hir.cpp @@ -4507,7 +4507,7 @@ ast_type_specifier::hir(exec_list *instructions, return NULL; } - if (this-is_array) { + if (this-array_specifier != NULL) { _mesa_glsl_error(loc, state, default precision statements do not apply to arrays); @@ -4924,7 +4924,7 @@ ast_interface_block::hir(exec_list *instructions, _mesa_shader_stage_to_string(state-stage)); } if (this-instance_name == NULL || - strcmp(this-instance_name, gl_in) != 0 ||
[Mesa-dev] [PATCH V3 6/8] glsl: create type name for arrays of arrays
We need to insert outermost dimensions in the correct spot otherwise the dimension order will be backwards Signed-off-by: Timothy Arceri t_arc...@yahoo.com.au Reviewed-by: Paul Berry stereotype441 at gmail.com --- src/glsl/glsl_types.cpp | 16 ++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/src/glsl/glsl_types.cpp b/src/glsl/glsl_types.cpp index 12d4ac0..62a409a 100644 --- a/src/glsl/glsl_types.cpp +++ b/src/glsl/glsl_types.cpp @@ -300,8 +300,20 @@ glsl_type::glsl_type(const glsl_type *array, unsigned length) : if (length == 0) snprintf(n, name_length, %s[], array-name); - else - snprintf(n, name_length, %s[%u], array-name, length); + else { + /* insert outermost dimensions in the correct spot + * otherwise the dimension order will be backwards + */ + const char *pos = strchr(array-name, '['); + if (pos) { + int idx = pos - array-name; + snprintf(n, idx+1, %s, array-name); + snprintf(n + idx, name_length - idx, [%u]%s, + length, array-name + idx); + } else { + snprintf(n, name_length, %s[%u], array-name, length); + } + } this-name = n; } -- 1.8.3.1 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH V3 3/8] glsl: Add array specifier to ast code
Signed-off-by: Timothy Arceri t_arc...@yahoo.com.au --- src/glsl/ast.h | 66 +++ src/glsl/ast_array_index.cpp| 13 +++ src/glsl/ast_to_hir.cpp | 172 src/glsl/ast_type.cpp | 8 +- src/glsl/glsl_parser_extras.cpp | 19 ++--- src/glsl/glsl_parser_extras.h | 2 + 6 files changed, 179 insertions(+), 101 deletions(-) diff --git a/src/glsl/ast.h b/src/glsl/ast.h index b24052b..c15a119 100644 --- a/src/glsl/ast.h +++ b/src/glsl/ast.h @@ -276,6 +276,43 @@ private: bool cons; }; +class ast_array_specifier : public ast_node { +public: + /** Unsized array specifier ([]) */ + explicit ast_array_specifier(const struct YYLTYPE locp) + : dimension_count(1), is_unsized_array(true) + { + set_location(locp); + } + + /** Sized array specifier ([dim]) */ + ast_array_specifier(const struct YYLTYPE locp, ast_expression *dim) + : dimension_count(1), is_unsized_array(false) + { + set_location(locp); + array_dimensions.push_tail(dim-link); + } + + void add_dimension(ast_expression *dim) + { + array_dimensions.push_tail(dim-link); + dimension_count++; + } + + virtual void print(void) const; + + /* Count including sized and unsized dimensions */ + unsigned dimension_count; + + /* If true, this means that the array has an unsized outermost dimension. */ + bool is_unsized_array; + + /* This list contains objects of type ast_node containing the +* sized dimensions only, in outermost-to-innermost order. +*/ + exec_list array_dimensions; +}; + /** * C-style aggregate initialization class * @@ -334,14 +371,15 @@ public: class ast_declaration : public ast_node { public: - ast_declaration(const char *identifier, bool is_array, ast_expression *array_size, - ast_expression *initializer); + ast_declaration(const char *identifier, bool is_array, + ast_array_specifier *array_specifier, + ast_expression *initializer); virtual void print(void) const; const char *identifier; bool is_array; - ast_expression *array_size; + ast_array_specifier *array_specifier; ast_expression *initializer; }; @@ -551,9 +589,9 @@ public: * be modified. Zeros the inherited ast_node's fields. */ ast_type_specifier(const ast_type_specifier *that, bool is_array, - ast_expression *array_size) + ast_array_specifier *array_specifier) : ast_node(), type_name(that-type_name), structure(that-structure), -is_array(is_array), array_size(array_size), +is_array(is_array), array_specifier(array_specifier), default_precision(that-default_precision) { /* empty */ @@ -562,7 +600,7 @@ public: /** Construct a type specifier from a type name */ ast_type_specifier(const char *name) : type_name(name), structure(NULL), - is_array(false), array_size(NULL), + is_array(false), array_specifier(NULL), default_precision(ast_precision_none) { /* empty */ @@ -571,7 +609,7 @@ public: /** Construct a type specifier from a structure definition */ ast_type_specifier(ast_struct_specifier *s) : type_name(s-name), structure(s), - is_array(false), array_size(NULL), + is_array(false), array_specifier(NULL), default_precision(ast_precision_none) { /* empty */ @@ -589,7 +627,7 @@ public: ast_struct_specifier *structure; bool is_array; - ast_expression *array_size; + ast_array_specifier *array_specifier; /** For precision statements, this is the given precision; otherwise none. */ unsigned default_precision:2; @@ -643,7 +681,7 @@ public: type(NULL), identifier(NULL), is_array(false), - array_size(NULL), + array_specifier(NULL), formal_parameter(false), is_void(false) { @@ -658,7 +696,7 @@ public: ast_fully_specified_type *type; const char *identifier; bool is_array; - ast_expression *array_size; + ast_array_specifier *array_specifier; static void parameters_to_hir(exec_list *ast_parameters, bool formal, exec_list *ir_parameters, @@ -906,12 +944,12 @@ public: ast_interface_block(ast_type_qualifier layout, const char *instance_name, bool is_array, - ast_expression *array_size) + ast_array_specifier *array_specifier) : layout(layout), block_name(NULL), instance_name(instance_name), - is_array(is_array), array_size(array_size) + is_array(is_array), array_specifier(array_specifier) { if (!is_array) - assert(array_size == NULL); + assert(array_specifier == NULL); } virtual ir_rvalue *hir(exec_list *instructions, @@ -946,7 +984,7 @@ public: * If the block is not declared as an
Re: [Mesa-dev] [PATCH] glsl: Simplify aggregate type inference to prepare for ARB_arrays_of_arrays.
On Tue, 2014-01-21 at 16:14 -0800, Paul Berry wrote: Timothy: I was inspired to write this patch by the complexities you encountered during [PATCH V2 5/8] glsl: Aggregate initializer support for arrays of array. Glad I could help spark some inspiration. Can you try rebasing your series on top of this patch to see if it simplifies things? I believe that with these changes, you should be able to drop patch 5/8 entirely. Yes with this patch I can drop patch 5 and still pass my piglit tests thanks. And thanks again for the code review. ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH] gallium/util: util_format_srgb should not return FORMAT_NONE for sRGB formats
There is only one caller, which is fixed by this patch. Marek On Wed, Jan 22, 2014 at 12:52 AM, Brian Paul bri...@vmware.com wrote: On 01/21/2014 02:21 PM, Marek Olšák wrote: From: Marek Olšák marek.ol...@amd.com This fixes a serious regression introduced in 4e549ddb500cf677b6fa16d9ebdfa67cc23da097. Cc: 9.2 10.0 mesa-sta...@lists.freedesktop.org --- src/gallium/auxiliary/util/u_format.h | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/gallium/auxiliary/util/u_format.h b/src/gallium/auxiliary/util/u_format.h index 0fbaf4c..5f86e2d 100644 --- a/src/gallium/auxiliary/util/u_format.h +++ b/src/gallium/auxiliary/util/u_format.h @@ -872,6 +872,9 @@ util_format_get_component_bits(enum pipe_format format, static INLINE enum pipe_format util_format_srgb(enum pipe_format format) { + if (util_format_is_srgb(format)) + return format; + switch (format) { case PIPE_FORMAT_L8_UNORM: return PIPE_FORMAT_L8_SRGB; Reviewed-by: Brian Paul bri...@vmware.com This looks fine for fixing things but longer-term, maybe we should change util_format_srgb() to return the format unchanged in the default case, as we do for util_format_linear(). I don't know if any callers depend on the default-PIPE_FORMAT_NONE behavior now. -Brian ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH v3 1/1] clover: Don't crash on NULL global buffer objects.
Jan Vesely jan.ves...@rutgers.edu writes: On Sat, 2014-01-18 at 13:34 +0100, Francisco Jerez wrote: Jan Vesely jan.ves...@rutgers.edu writes: Specs say If the argument is a buffer object, the arg_value pointer can be NULL or point to a NULL value in which case a NULL value will be used as the value for the argument declared as a pointer to __global or __constant memory in the kernel. So don't crash when somebody does that. v2: Insert NULL into input buffer instead of buffer handle pair Fix constant_argument too Drop r600 driver changes v3: Fix inserting NULL pointer Signed-off-by: Jan Vesely jan.ves...@rutgers.edu Looks OK to me, Reviewed-by: Francisco Jerez curroje...@riseup.net Thank you, can you push it as well? I don't have commit access. Done. Thank you. Thanks. --- src/gallium/state_trackers/clover/core/kernel.cpp | 34 +++ 1 file changed, 23 insertions(+), 11 deletions(-) diff --git a/src/gallium/state_trackers/clover/core/kernel.cpp b/src/gallium/state_trackers/clover/core/kernel.cpp index 58780d6..fb826c1 100644 --- a/src/gallium/state_trackers/clover/core/kernel.cpp +++ b/src/gallium/state_trackers/clover/core/kernel.cpp @@ -327,7 +327,7 @@ kernel::global_argument::set(size_t size, const void *value) { if (size != sizeof(cl_mem)) throw error(CL_INVALID_ARG_SIZE); - buf = objbuffer(*(cl_mem *)value); + buf = pobjbuffer(value ? *(cl_mem *)value : NULL); _set = true; } @@ -335,8 +335,14 @@ void kernel::global_argument::bind(exec_context ctx, const module::argument marg) { align(ctx.input, marg.target_align); - ctx.g_handles.push_back(allocate(ctx.input, marg.target_size)); - ctx.g_buffers.push_back(buf-resource(*ctx.q).pipe); + + if (buf) { + ctx.g_handles.push_back(allocate(ctx.input, marg.target_size)); + ctx.g_buffers.push_back(buf-resource(*ctx.q).pipe); + } else { + //NULL pointer + allocate(ctx.input, marg.target_size); + } } void @@ -379,22 +385,28 @@ kernel::constant_argument::set(size_t size, const void *value) { if (size != sizeof(cl_mem)) throw error(CL_INVALID_ARG_SIZE); - buf = objbuffer(*(cl_mem *)value); + buf = pobjbuffer(value ? *(cl_mem *)value : NULL); _set = true; } void kernel::constant_argument::bind(exec_context ctx, const module::argument marg) { - auto v = bytes(ctx.resources.size() 24); - - extend(v, module::argument::zero_ext, marg.target_size); - byteswap(v, ctx.q-dev.endianness()); align(ctx.input, marg.target_align); - insert(ctx.input, v); - st = buf-resource(*ctx.q).bind_surface(*ctx.q, false); - ctx.resources.push_back(st); + if (buf) { + auto v = bytes(ctx.resources.size() 24); + + extend(v, module::argument::zero_ext, marg.target_size); + byteswap(v, ctx.q-dev.endianness()); + insert(ctx.input, v); + + st = buf-resource(*ctx.q).bind_surface(*ctx.q, false); + ctx.resources.push_back(st); + } else { + //NULL pointer + allocate(ctx.input, marg.target_size); + } } void -- 1.8.4.2 -- Jan Vesely jan.ves...@rutgers.edu -- Jan Vesely jan.ves...@rutgers.edu pgp8TA8dLconv.pgp Description: PGP signature ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH] glsl: Add image type to the GLSL IR.
Paul Berry stereotype...@gmail.com writes: On 15 January 2014 11:42, Francisco Jerez curroje...@riseup.net wrote: v2: Reuse the glsl_sampler_dim enum for images. Reuse the glsl_type::sampler_* fields instead of creating new ones specific to image types. Reuse the same constructor as for samplers adding a new 'base_type' argument. --- Is this what you had in mind Paul? Yes, that seems reasonable. Thanks. Does your previous reviewed-by still apply for v2 of this patch? Thanks. pgpZ0ZjwnfuXL.pgp Description: PGP signature ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [Bug 73672] Half Life 2 in Wine is broken since 4e549ddb
https://bugs.freedesktop.org/show_bug.cgi?id=73672 --- Comment #21 from Benjamin Bellec b.bel...@gmail.com --- I confirm that the patch works. -- You are receiving this mail because: You are the assignee for the bug. ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [Bug 73854] GoldSrc and Source Games Segfault on Launch
https://bugs.freedesktop.org/show_bug.cgi?id=73854 --- Comment #20 from Benjamin Bellec b.bel...@gmail.com --- (In reply to comment #18) Created attachment 92554 [details] [review] glx: link loader when building with dri3 While Keith's patch does work on the overall issue with libudev, we should not link the loader util for non dri3 builds. Here is a trivial fix that will resolve the problem. Thanks to Benjamin Bellec for the bisection. I confirm that the patch works. -- You are receiving this mail because: You are the assignee for the bug. ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH] glsl: Add image type to the GLSL IR.
On 22 January 2014 04:50, Francisco Jerez curroje...@riseup.net wrote: Paul Berry stereotype...@gmail.com writes: On 15 January 2014 11:42, Francisco Jerez curroje...@riseup.net wrote: v2: Reuse the glsl_sampler_dim enum for images. Reuse the glsl_type::sampler_* fields instead of creating new ones specific to image types. Reuse the same constructor as for samplers adding a new 'base_type' argument. --- Is this what you had in mind Paul? Yes, that seems reasonable. Thanks. Does your previous reviewed-by still apply for v2 of this patch? Thanks. Yes. ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] Atomics not really atomic?
Hi, The two atomic helpers in u_atomic.h, p_atomic_set and p_atomic_read, are just passthrough macros, without the atomic guarantees. Why? I believe I saw some corruption because of this, where a 64-bit write on a 32-bit platform may not be naturally atomic. - Lauri ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 1/2] mesa: Ensure that transform feedback refers to the correct program.
Previous to this patch, the _mesa_{Begin,Resume}TransformFeedback functions were using ctx-Shader.CurrentProgram[MESA_SHADER_VERTEX] to find the program that would be the source of transform feedback data. This isn't correct--if there's a geometry shader present it should be ctx-Shader.CurrentProgram[MESA_SHADER_GEOMETRY]. (These might be different if separate shader objects are in use). This patch creates a function get_xfb_source(), which figures out the correct program to use based on GL state, and updates _mesa_{Begin,Resume}TransformFeedback to call it. get_xfb_source() is written in terms of the gl_shader_stage enum, so it should not need modification when we add tessellation shaders in the future. It also creates a new driver flag, NewTransformFeedbackProg, which is flagged whenever this program changes. To reduce future confusion, this patch also rewords some comments and error message text to avoid referring to vertex shaders. --- src/mesa/main/mtypes.h| 8 -- src/mesa/main/transformfeedback.c | 52 +-- 2 files changed, 45 insertions(+), 15 deletions(-) diff --git a/src/mesa/main/mtypes.h b/src/mesa/main/mtypes.h index 3dd9678..7fd3298 100644 --- a/src/mesa/main/mtypes.h +++ b/src/mesa/main/mtypes.h @@ -1815,8 +1815,9 @@ struct gl_transform_feedback_object /** * The shader program active when BeginTransformFeedback() was called. -* When active and unpaused, this equals -* ctx-Shader.CurrentProgram[MESA_SHADER_VERTEX]. +* When active and unpaused, this equals ctx-Shader.CurrentProgram[stage], +* where stage is the pipeline stage that is the source of data for +* transform feedback. */ struct gl_shader_program *shader_program; @@ -3779,6 +3780,9 @@ struct gl_driver_flags /** gl_context::TransformFeedback::CurrentObject */ GLbitfield NewTransformFeedback; + /** gl_context::TransformFeedback::CurrentObject::shader_program */ + GLbitfield NewTransformFeedbackProg; + /** gl_context::RasterDiscard */ GLbitfield NewRasterizerDiscard; diff --git a/src/mesa/main/transformfeedback.c b/src/mesa/main/transformfeedback.c index 74897ba..9376a9e 100644 --- a/src/mesa/main/transformfeedback.c +++ b/src/mesa/main/transformfeedback.c @@ -24,7 +24,7 @@ /* - * Vertex transform feedback support. + * Transform feedback support. * * Authors: * Brian Paul @@ -376,25 +376,48 @@ _mesa_compute_max_transform_feedback_vertices( **/ +/** + * Figure out which stage of the pipeline is the source of transform feedback + * data given the current context state, and return its gl_shader_program. + * + * If no active program can generate transform feedback data (i.e. no vertex + * shader is active), returns NULL. + */ +static struct gl_shader_program * +get_xfb_source(struct gl_context *ctx) +{ + int i; + for (i = MESA_SHADER_FRAGMENT - 1; i = MESA_SHADER_VERTEX; i--) { + if (ctx-Shader.CurrentProgram[i] != NULL) + return ctx-Shader.CurrentProgram[i]; + } + return NULL; +} + + void GLAPIENTRY _mesa_BeginTransformFeedback(GLenum mode) { struct gl_transform_feedback_object *obj; - struct gl_transform_feedback_info *info; + struct gl_transform_feedback_info *info = NULL; + struct gl_shader_program *source; GLuint i; unsigned vertices_per_prim; GET_CURRENT_CONTEXT(ctx); obj = ctx-TransformFeedback.CurrentObject; - if (ctx-Shader.CurrentProgram[MESA_SHADER_VERTEX] == NULL) { + /* Figure out what pipeline stage is the source of data for transform +* feedback. +*/ + source = get_xfb_source(ctx); + if (source == NULL) { _mesa_error(ctx, GL_INVALID_OPERATION, glBeginTransformFeedback(no program active)); return; } - info = - ctx-Shader.CurrentProgram[MESA_SHADER_VERTEX]-LinkedTransformFeedback; + info = source-LinkedTransformFeedback; if (info-NumOutputs == 0) { _mesa_error(ctx, GL_INVALID_OPERATION, @@ -452,7 +475,10 @@ _mesa_BeginTransformFeedback(GLenum mode) obj-GlesRemainingPrims = max_vertices / vertices_per_prim; } - obj-shader_program = ctx-Shader.CurrentProgram[MESA_SHADER_VERTEX]; + if (obj-shader_program != source) { + ctx-NewDriverState |= ctx-DriverFlags.NewTransformFeedbackProg; + obj-shader_program = source; + } assert(ctx-Driver.BeginTransformFeedback); ctx-Driver.BeginTransformFeedback(ctx, mode, obj); @@ -519,7 +545,7 @@ bind_buffer_range(struct gl_context *ctx, GLuint index, /** - * Specify a buffer object to receive vertex shader results. Plus, + * Specify a buffer object to receive transform feedback results. Plus, * specify the starting offset to place the results, and max size. * Called from the glBindBufferRange() function. */ @@ -563,7 +589,7 @@ _mesa_bind_buffer_range_transform_feedback(struct gl_context *ctx, /** - * Specify a buffer object to receive vertex shader results. + * Specify a
[Mesa-dev] [PATCH 2/2] i965/gen7: Use to the correct program when uploading transform feedback state.
Transform feedback may come from either the geometry shader or the vertex shader, so we can't use ctx-Shader.CurrentProgram[MESA_SHADER_VERTEX] to find the current post-link transform feedback information. Fortunately we can use ctx-TransformFeedback.CurrentObject-shader_program. --- src/mesa/drivers/dri/i965/brw_state_upload.c | 1 + src/mesa/drivers/dri/i965/gen7_sol_state.c | 15 +-- 2 files changed, 6 insertions(+), 10 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_state_upload.c b/src/mesa/drivers/dri/i965/brw_state_upload.c index af554eb..ca256c2 100644 --- a/src/mesa/drivers/dri/i965/brw_state_upload.c +++ b/src/mesa/drivers/dri/i965/brw_state_upload.c @@ -305,6 +305,7 @@ void brw_init_state( struct brw_context *brw ) STATIC_ASSERT(BRW_NUM_STATE_BITS = 8 * sizeof(brw-state.dirty.brw)); ctx-DriverFlags.NewTransformFeedback = BRW_NEW_TRANSFORM_FEEDBACK; + ctx-DriverFlags.NewTransformFeedbackProg = BRW_NEW_TRANSFORM_FEEDBACK; ctx-DriverFlags.NewRasterizerDiscard = BRW_NEW_RASTERIZER_DISCARD; ctx-DriverFlags.NewUniformBuffer = BRW_NEW_UNIFORM_BUFFER; ctx-DriverFlags.NewAtomicBuffer = BRW_NEW_ATOMIC_BUFFER; diff --git a/src/mesa/drivers/dri/i965/gen7_sol_state.c b/src/mesa/drivers/dri/i965/gen7_sol_state.c index 1ca2b74..5d2f243 100644 --- a/src/mesa/drivers/dri/i965/gen7_sol_state.c +++ b/src/mesa/drivers/dri/i965/gen7_sol_state.c @@ -39,14 +39,11 @@ static void upload_3dstate_so_buffers(struct brw_context *brw) { struct gl_context *ctx = brw-ctx; - /* BRW_NEW_VERTEX_PROGRAM */ - const struct gl_shader_program *vs_prog = - ctx-Shader.CurrentProgram[MESA_SHADER_VERTEX]; - const struct gl_transform_feedback_info *linked_xfb_info = - vs_prog-LinkedTransformFeedback; /* BRW_NEW_TRANSFORM_FEEDBACK */ struct gl_transform_feedback_object *xfb_obj = ctx-TransformFeedback.CurrentObject; + const struct gl_transform_feedback_info *linked_xfb_info = + xfb_obj-shader_program-LinkedTransformFeedback; int i; /* Set up the up to 4 output buffers. These are the ranges defined in the @@ -102,12 +99,11 @@ gen7_upload_3dstate_so_decl_list(struct brw_context *brw, const struct brw_vue_map *vue_map) { struct gl_context *ctx = brw-ctx; - /* BRW_NEW_VERTEX_PROGRAM */ - const struct gl_shader_program *vs_prog = - ctx-Shader.CurrentProgram[MESA_SHADER_VERTEX]; /* BRW_NEW_TRANSFORM_FEEDBACK */ + struct gl_transform_feedback_object *xfb_obj = + ctx-TransformFeedback.CurrentObject; const struct gl_transform_feedback_info *linked_xfb_info = - vs_prog-LinkedTransformFeedback; + xfb_obj-shader_program-LinkedTransformFeedback; uint16_t so_decl[128]; int buffer_mask = 0; int next_offset[4] = {0, 0, 0, 0}; @@ -260,7 +256,6 @@ const struct brw_tracked_state gen7_sol_state = { .dirty = { .mesa = (_NEW_LIGHT), .brw = (BRW_NEW_BATCH | - BRW_NEW_VERTEX_PROGRAM | BRW_NEW_VUE_MAP_GEOM_OUT | BRW_NEW_TRANSFORM_FEEDBACK) }, -- 1.8.5.3 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] Atomics not really atomic?
op 22-01-14 14:44, Lauri Kasanen schreef: Hi, The two atomic helpers in u_atomic.h, p_atomic_set and p_atomic_read, are just passthrough macros, without the atomic guarantees. Why? I believe I saw some corruption because of this, where a 64-bit write on a 32-bit platform may not be naturally atomic. From what I can tell p_atomic only supports 32-bits atomics. atomics don't imply barriers, so the code is probably correct for 32-bits. There is no 64-bits support in that code, so I don't see how that would have protected a 64-bits write.. ~Maarten ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [Bug 73934] New: Function roundf undeclared in textparam.c when building with MSVC11
https://bugs.freedesktop.org/show_bug.cgi?id=73934 Priority: medium Bug ID: 73934 Assignee: mesa-dev@lists.freedesktop.org Summary: Function roundf undeclared in textparam.c when building with MSVC11 Severity: normal Classification: Unclassified OS: Windows (All) Reporter: ztgr...@live.com Hardware: x86 (IA32) Status: NEW Version: 10.0 Component: Mesa core Product: Mesa I am trying to build Mesa for x86 using Scons on Windows 8.1 x64 with MSVC11, and I am getting a message stating that 'roundf' is undeclared in the file textparam.c (in the function _mesa_GetTexParameteriv). I tried adding math.h to the included headers for that file, but that didn't work. I don't do much native development (I am building Mesa for use in a Game Development Studio I am working on using SharpGL) so I have no other idea on what could be causing this error. -- You are receiving this mail because: You are the assignee for the bug. ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [Bug 73934] Function roundf undeclared in textparam.c when building with MSVC11
https://bugs.freedesktop.org/show_bug.cgi?id=73934 --- Comment #1 from Grigori Goronzy g...@chown.ath.cx --- MSVC has bad C99 support. roundf was introduced by C99. You can easily implement roundf yourself, though. Calling floorf or ceilf according to sign of the number should do it. -- You are receiving this mail because: You are the assignee for the bug. ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 05/18] glapi: Add infrastructure for ARB_multi_bind
On 01/21/2014 03:35 PM, Fredrik Höglund wrote: --- src/mapi/glapi/gen/ARB_multi_bind.xml | 53 +++ src/mapi/glapi/gen/Makefile.am |1 + src/mapi/glapi/gen/gl_API.xml |4 +++ src/mesa/main/bufferobj.c | 13 src/mesa/main/bufferobj.h |7 src/mesa/main/samplerobj.c |6 src/mesa/main/samplerobj.h |2 ++ src/mesa/main/shaderimage.c |5 +++ src/mesa/main/shaderimage.h |3 ++ src/mesa/main/tests/dispatch_sanity.cpp |8 + src/mesa/main/texobj.c |6 src/mesa/main/texobj.h |4 +++ src/mesa/main/varray.c |7 src/mesa/main/varray.h |4 +++ 14 files changed, 123 insertions(+) create mode 100644 src/mapi/glapi/gen/ARB_multi_bind.xml diff --git a/src/mapi/glapi/gen/ARB_multi_bind.xml b/src/mapi/glapi/gen/ARB_multi_bind.xml new file mode 100644 index 000..4a59d8c --- /dev/null +++ b/src/mapi/glapi/gen/ARB_multi_bind.xml @@ -0,0 +1,53 @@ +?xml version=1.0? +!DOCTYPE OpenGLAPI SYSTEM gl_API.dtd + +!-- Note: no GLX protocol info yet. -- + +OpenGLAPI + +category name=GL_ARB_multi_bind number=147 + +function name=BindBuffersBase offset=assign +param name=target type=GLenum/ +param name=first type=GLuint/ +param name=count type=GLsizei/ +param name=buffers type=const GLuint*/ I don't recall if the glapi python code parses these types, but it might be safer to put add space before '*' as we do elsewhere. So: param name=buffers type=const GLuint */ Same thing below. +/function + +function name=BindBuffersRange offset=assign +param name=target type=GLenum/ +param name=first type=GLuint/ +param name=count type=GLsizei/ +param name=buffers type=const GLuint */ +param name=offsets type=const GLintptr */ +param name=sizes type=const GLsizeiptr */ +/function + +function name=BindTextures offset=assign +param name=first type=GLuint/ +param name=count type=GLsizei/ +param name=textures type=const GLuint*/ +/function + +function name=BindSamplers offset=assign +param name=first type=GLuint/ +param name=count type=GLsizei/ +param name=samplers type=const GLuint*/ +/function + +function name=BindImageTextures offset=assign +param name=first type=GLuint/ +param name=count type=GLsizei/ +param name=textures type=const GLuint*/ +/function + +function name=BindVertexBuffers offset=assign +param name=first type=GLuint/ +param name=count type=GLsizei/ +param name=buffers type=const GLuint*/ +param name=offsets type=const GLintptr*/ +param name=strides type=const GLsizei*/ +/function + [...] ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 04/18] mesa: Add functions for doing unlocked hash table lookups
On 01/21/2014 03:35 PM, Fredrik Höglund wrote: This patch adds functions for locking/unlocking the mutex, along with _mesa_HashLookupWithoutLocking() and _mesa_HashInsertWithoutLocking() that do lookups and insertions without locking the mutex. These functions will be used by the ARB_multi_bind entry points to avoid locking/unlocking the mutex for each binding point. --- src/mesa/main/hash.c | 90 -- src/mesa/main/hash.h |9 + 2 files changed, 89 insertions(+), 10 deletions(-) diff --git a/src/mesa/main/hash.c b/src/mesa/main/hash.c index b31fd48..263397e 100644 --- a/src/mesa/main/hash.c +++ b/src/mesa/main/hash.c @@ -150,7 +150,6 @@ _mesa_DeleteHashTable(struct _mesa_HashTable *table) } - /** * Lookup an entry in the hash table, without locking. * \sa _mesa_HashLookup @@ -195,15 +194,55 @@ _mesa_HashLookup(struct _mesa_HashTable *table, GLuint key) /** - * Insert a key/pointer pair into the hash table. - * If an entry with this key already exists we'll replace the existing entry. - * + * Lookup an entry in the hash table without locking the mutex. + * + * The hash table mutex must be locked manually by calling + * _mesa_HashLockMutex() before calling this function. + * + * \param table the hash table. + * \param key the key. + * + * \return pointer to user's data or NULL if key not in table + */ +void * +_mesa_HashLookupWithoutLocking(struct _mesa_HashTable *table, GLuint key) +{ + return _mesa_HashLookup_unlocked(table, key); +} + + +/** + * Lock the hash table mutex. + * + * This function should be used when multiple objects need + * to be looked up in the hash table, to avoid having to lock + * and unlock the mutex each time. + * * \param table the hash table. - * \param key the key (not zero). - * \param data pointer to user data. */ void -_mesa_HashInsert(struct _mesa_HashTable *table, GLuint key, void *data) +_mesa_HashLockMutex(struct _mesa_HashTable *table) +{ + assert(table); + _glthread_LOCK_MUTEX(table-Mutex); +} + + +/** + * Unlock the hash table mutex. + * + * \param table the hash table. + */ +void +_mesa_HashUnlockMutex(struct _mesa_HashTable *table) +{ + assert(table); + _glthread_UNLOCK_MUTEX(table-Mutex); +} + + +static inline void +_mesa_HashInsert_unlocked(struct _mesa_HashTable *table, GLuint key, void *data) { uint32_t hash = uint_hash(key); struct hash_entry *entry; @@ -211,8 +250,6 @@ _mesa_HashInsert(struct _mesa_HashTable *table, GLuint key, void *data) assert(table); assert(key); - _glthread_LOCK_MUTEX(table-Mutex); - if (key table-MaxKey) table-MaxKey = key; @@ -226,11 +263,44 @@ _mesa_HashInsert(struct _mesa_HashTable *table, GLuint key, void *data) _mesa_hash_table_insert(table-ht, hash, uint_key(key), data); } } +} - _glthread_UNLOCK_MUTEX(table-Mutex); + +/** + * Insert a key/pointer pair into the hash table without locking the mutex. + * If an entry with this key already exists we'll replace the existing entry. + * + * The hash table mutex must be locked manually by calling + * _mesa_HashLockMutex() before calling this function. + * + * \param table the hash table. + * \param key the key (not zero). + * \param data pointer to user data. + */ +void +_mesa_HashInsertWithoutLocking(struct _mesa_HashTable *table, GLuint key, void *data) +{ + _mesa_HashInsert_unlocked(table, key, data); } +/** + * Insert a key/pointer pair into the hash table. + * If an entry with this key already exists we'll replace the existing entry. + * + * \param table the hash table. + * \param key the key (not zero). + * \param data pointer to user data. + */ +void +_mesa_HashInsert(struct _mesa_HashTable *table, GLuint key, void *data) +{ + assert(table); + _glthread_LOCK_MUTEX(table-Mutex); + _mesa_HashInsert_unlocked(table, key, data); + _glthread_UNLOCK_MUTEX(table-Mutex); +} + /** * Remove an entry from the hash table. diff --git a/src/mesa/main/hash.h b/src/mesa/main/hash.h index b34f328..7ef8186 100644 --- a/src/mesa/main/hash.h +++ b/src/mesa/main/hash.h @@ -45,6 +45,15 @@ extern void _mesa_HashInsert(struct _mesa_HashTable *table, GLuint key, void *da extern void _mesa_HashRemove(struct _mesa_HashTable *table, GLuint key); +extern void _mesa_HashLockMutex(struct _mesa_HashTable *table); + +extern void _mesa_HashUnlockMutex(struct _mesa_HashTable *table); + +extern void *_mesa_HashLookupWithoutLocking(struct _mesa_HashTable *table, GLuint key); + +extern void _mesa_HashInsertWithoutLocking(struct _mesa_HashTable *table, + GLuint key, void *data); + extern void _mesa_HashDeleteAll(struct _mesa_HashTable *table, void (*callback)(GLuint key, void *data, void *userData), In other parts of Mesa/gallium I believe we use function names such as FooLocked() to indicate that Foo should be called when the caller already holds the lock.
Re: [Mesa-dev] [PATCH 08/18] mesa: Implement glBindTextures()
On 01/21/2014 03:35 PM, Fredrik Höglund wrote: --- src/mesa/main/texobj.c | 121 1 file changed, 121 insertions(+) diff --git a/src/mesa/main/texobj.c b/src/mesa/main/texobj.c index c9fe11d..2ff3f9d 100644 --- a/src/mesa/main/texobj.c +++ b/src/mesa/main/texobj.c @@ -1140,6 +1140,28 @@ unbind_texobj_from_imgunits(struct gl_context *ctx, } } +/** + * Unbinds all textures bound to the given texture image unit. + */ +static void +unbind_textures_from_unit(struct gl_context *ctx, GLuint unit) +{ + struct gl_texture_unit *texUnit = ctx-Texture.Unit[unit]; + + while (texUnit-_BoundTextures) { + const GLuint index = ffs(texUnit-_BoundTextures) - 1; + struct gl_texture_object *texObj = ctx-Shared-DefaultTex[index]; + + _mesa_reference_texobj(texUnit-CurrentTex[index], texObj); + + /* Pass BindTexture call to device driver */ + if (ctx-Driver.BindTexture) + ctx-Driver.BindTexture(ctx, 0, texObj); + + texUnit-_BoundTextures ^= (1 index); texUnit-_BoundTextures = ~(1 index); + ctx-NewState |= _NEW_TEXTURE; + } +} /** * Delete named textures. @@ -1384,6 +1406,105 @@ _mesa_BindTexture( GLenum target, GLuint texName ) void GLAPIENTRY _mesa_BindTextures(GLuint first, GLsizei count, const GLuint *textures) { + GET_CURRENT_CONTEXT(ctx); + GLuint i; GLint/GLsizei i; + + /* The ARB_multi_bind spec says: +* +* An INVALID_OPERATION error is generated if first + count +* is greater than the number of texture image units supported +* by the implementation. +*/ + if (first + count ctx-Const.MaxCombinedTextureImageUnits) { + _mesa_error(ctx, GL_INVALID_OPERATION, + glBindTextures(first=%u + count=%u the value of + GL_MAX_COMBINED_TEXTURE_IMAGE_UNITS=%u), + first, count, ctx-Const.MaxCombinedTextureImageUnits); + return; + } + + /* Flush before changing bindings */ + FLUSH_VERTICES(ctx, 0); + + if (textures) { + /* Note that the error semantics for multi-bind commands differ from + * those of other GL commands. + * + * The issues section in the ARB_multi_bind spec says: + * + *(11) Typically, OpenGL specifies that if an error is generated by + * a command, that command has no effect. This is somewhat + * unfortunate for multi-bind commands, because it would require + * a first pass to scan the entire list of bound objects for + * errors and then a second pass to actually perform the + * bindings. Should we have different error semantics? + * + * RESOLVED: Yes. In this specification, when the parameters for + * one of the count binding points are invalid, that binding + * point is not updated and an error will be generated. However, + * other binding points in the same command will be updated if + * their parameters are valid and no other error occurs. + */ + + _mesa_begin_texture_lookups(ctx); + + for (i = 0; i count; i++) { + if (textures[i] != 0) { +struct gl_texture_unit *texUnit = ctx-Texture.Unit[first + i]; +struct gl_texture_object *current = texUnit-_Current; +struct gl_texture_object *texObj; +GLenum target; +GLuint targetIndex; gl_texture_index targetIndex; + +if (current current-Name == textures[i]) + texObj = current; +else + texObj = _mesa_lookup_texture_without_locking(ctx, textures[i]); + +/* The ARB_multi_bind spec says: + * + * An INVALID_OPERATION error is generated if any value + * in textures is not zero or the name of an existing + * texture object (per binding). + */ +if (!texObj || texObj-Target == 0) { + _mesa_error(ctx, GL_INVALID_OPERATION, + glBindTextures(textures[%u]=%u is not zero + or the name of an existing texture object), + i, textures[i]); + continue; +} + +target = texObj-Target; +targetIndex = texObj-TargetIndex; + +if (texUnit-CurrentTex[targetIndex] == texObj) + continue; + +/* Do the actual binding. The refcount on the previously bound + * texture object will be decremented. It will be deleted if the + * count hits zero. + */ +_mesa_reference_texobj(texUnit-CurrentTex[targetIndex], texObj); +ASSERT(texUnit-CurrentTex[targetIndex]); + +texUnit-_BoundTextures |= (1 targetIndex); +ctx-NewState |= _NEW_TEXTURE; + +/* Pass BindTexture call to device
Re: [Mesa-dev] [PATCH] rtasm: deal with size overflows by casting to ptrdiff_t
On 01/21/2014 06:37 PM, Ilia Mirkin wrote: This was discovered as a result of the draw-elements-base-vertex-neg piglit test, which passes very negative offsets in, followed up by large indices. The nouveau code correctly adjusts the pointer, but the transfer code needs to do the proper inverse correction. Similarly fix up the SSE code to do a 64-bit multiply to compute the proper offset. Signed-off-by: Ilia Mirkin imir...@alum.mit.edu --- With this change, nouveau passes for the draw-elements-base-vertex-neg piglit test with user_varrays, on a 64-bit setup both with and without GALLIUM_NOSSE=1. I'm pretty sure that the change should be minimal to a non-x86 setup since the rexw will be a no-op. I guess there will be an extra register use for the mov, but it shouldn't be too expensive, esp on anything remotely current. src/gallium/auxiliary/translate/translate_generic.c | 2 +- src/gallium/auxiliary/translate/translate_sse.c | 8 ++-- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/src/gallium/auxiliary/translate/translate_generic.c b/src/gallium/auxiliary/translate/translate_generic.c index 5bf97db..5ffce32 100644 --- a/src/gallium/auxiliary/translate/translate_generic.c +++ b/src/gallium/auxiliary/translate/translate_generic.c @@ -638,7 +638,7 @@ static ALWAYS_INLINE void PIPE_CDECL generic_run_one( struct translate_generic * } src = tg-attrib[attr].input_ptr + - tg-attrib[attr].input_stride * index; + (ptrdiff_t)tg-attrib[attr].input_stride * index; copy_size = tg-attrib[attr].copy_size; if(likely(copy_size = 0)) diff --git a/src/gallium/auxiliary/translate/translate_sse.c b/src/gallium/auxiliary/translate/translate_sse.c index a78ea91..a72454a 100644 --- a/src/gallium/auxiliary/translate/translate_sse.c +++ b/src/gallium/auxiliary/translate/translate_sse.c @@ -1121,7 +1121,9 @@ static boolean init_inputs( struct translate_sse *p, x86_cmovcc(p-func, tmp_EAX, buf_max_index, cc_AE); } - x86_imul(p-func, tmp_EAX, buf_stride); + x86_mov(p-func, p-tmp2_EDX, buf_stride); + x64_rexw(p-func); + x86_imul(p-func, tmp_EAX, p-tmp2_EDX); x64_rexw(p-func); x86_add(p-func, tmp_EAX, buf_base_ptr); @@ -1207,7 +1209,9 @@ static struct x86_reg get_buffer_ptr( struct translate_sse *p, x86_cmp(p-func, ptr, buf_max_index); x86_cmovcc(p-func, ptr, buf_max_index, cc_AE); - x86_imul(p-func, ptr, buf_stride); + x86_mov(p-func, p-tmp2_EDX, buf_stride); + x64_rexw(p-func); + x86_imul(p-func, ptr, p-tmp2_EDX); x64_rexw(p-func); x86_add(p-func, ptr, buf_base_ptr); return ptr; I'm no x86 expert, but this looks OK to me, and if it works for you... Reviewed-by: Brian Paul bri...@vmware.com Should this go into the 10.0.x branch too? ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 00/18] Implement GL_ARB_multi_bind
On 01/21/2014 03:35 PM, Fredrik Höglund wrote: So here is my take on GL_ARB_multi_bind. I tried to come up with names for the new hash table functions that don't suggest that they should be used to do unlocked insertions/lookups. I'm not entirely happy with the ones I came up with though, so I'm hoping someone will have better suggestions. When binding 32 textures glBindTextures() seems to be about three times faster than calling glActiveTexture() + glBindTexture() in a loop. When binding 4 textures it's about twice as fast. I hope to land this series this week if there are no major issues. Looks good, Fredrik! I just have a bunch of little nit picks. I only have time to review 1-11 for today. Hopefully someone else can review the later ones. Esp. areas like xform-fb, glBindImageTextures, UBOs, glBindBuffersRange(), etc. -Brian ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 03/18] mesa: Optimize unbind_texobj_from_texunits()
On 01/21/2014 03:35 PM, Fredrik Höglund wrote: The texture can only be bound to the index that corresponds to its target, so there is no need to loop over all possible indices for every unit and checking if the texture is bound to it. --- src/mesa/main/texobj.c | 20 ++-- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/src/mesa/main/texobj.c b/src/mesa/main/texobj.c index fbbc577..5a3cfb2 100644 --- a/src/mesa/main/texobj.c +++ b/src/mesa/main/texobj.c @@ -1084,19 +1084,19 @@ static void unbind_texobj_from_texunits(struct gl_context *ctx, struct gl_texture_object *texObj) { - GLuint u, tex; + const GLuint index = texObj-TargetIndex; + GLuint u; for (u = 0; u Elements(ctx-Texture.Unit); u++) { struct gl_texture_unit *unit = ctx-Texture.Unit[u]; - for (tex = 0; tex NUM_TEXTURE_TARGETS; tex++) { - if (texObj == unit-CurrentTex[tex]) { -_mesa_reference_texobj(unit-CurrentTex[tex], - ctx-Shared-DefaultTex[tex]); -ASSERT(unit-CurrentTex[tex]); -unit-_BoundTextures ^= (1 tex); -break; - } - } + + if (texObj != unit-CurrentTex[index]) + continue; + + _mesa_reference_texobj(unit-CurrentTex[index], + ctx-Shared-DefaultTex[index]); + ASSERT(unit-CurrentTex[index]); + unit-_BoundTextures ^= (1 index); } } I guess I'd like to see the last part written as: if (texObj == unit-CurrentTex[index]) { /* Bind the default texture for this unit/target */ _mesa_reference_texobj(unit-CurrentTex[index], ctx-Shared-DefaultTex[index]); unit-_BoundTextures = ~(1 index); } I try to only use continue when really needed. I don't think the assertion is needed since we wouldn't have created the context if we failed to create any DefaultTex. -Brian ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 01/18] mesa: Store the target index in gl_texture_object
On 01/21/2014 03:35 PM, Fredrik Höglund wrote: This will be used by glBindTextures() so we don't have to look it up for each texture. --- src/mesa/main/mtypes.h |1 + src/mesa/main/texobj.c |2 ++ 2 files changed, 3 insertions(+) diff --git a/src/mesa/main/mtypes.h b/src/mesa/main/mtypes.h index c396609..2fe47c4 100644 --- a/src/mesa/main/mtypes.h +++ b/src/mesa/main/mtypes.h @@ -1194,6 +1194,7 @@ struct gl_texture_object GLuint Name;/** the user-visible texture object ID */ GLchar *Label; /** GL_KHR_debug */ GLenum Target; /** GL_TEXTURE_1D, GL_TEXTURE_2D, etc. */ + GLuint TargetIndex; /** The gl_texture_unit::CurrentTex index */ Could this be gl_texture_index TargetIndex;? struct gl_sampler_object Sampler; diff --git a/src/mesa/main/texobj.c b/src/mesa/main/texobj.c index 3c64c437..4d97275 100644 --- a/src/mesa/main/texobj.c +++ b/src/mesa/main/texobj.c @@ -259,6 +259,7 @@ _mesa_copy_texture_object( struct gl_texture_object *dest, const struct gl_texture_object *src ) { dest-Target = src-Target; + dest-TargetIndex = src-TargetIndex; dest-Name = src-Name; dest-Priority = src-Priority; dest-Sampler.BorderColor.f[0] = src-Sampler.BorderColor.f[0]; @@ -1318,6 +1319,7 @@ _mesa_BindTexture( GLenum target, GLuint texName ) _glthread_UNLOCK_MUTEX(ctx-Shared-Mutex); } newTexObj-Target = target; + newTexObj-TargetIndex = targetIndex; } assert(valid_texture_object(newTexObj)); So the memset() in _mesa_initialize_texture_object() would implicitly set TargetIndex = TEXTURE_2D_MULTISAMPLE_INDEX (zero). Is that going to cause any trouble? -Brian ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 02/18] mesa: Add a _BoundTextures field in gl_texture_unit
On 01/21/2014 03:35 PM, Fredrik Höglund wrote: This will be used by glBindTextures() when unbinding textures, to avoid having to loop over all the targets. --- src/mesa/main/mtypes.h |3 +++ src/mesa/main/texobj.c |6 ++ src/mesa/main/texstate.c |3 +++ 3 files changed, 12 insertions(+) diff --git a/src/mesa/main/mtypes.h b/src/mesa/main/mtypes.h index 2fe47c4..c1a17b8 100644 --- a/src/mesa/main/mtypes.h +++ b/src/mesa/main/mtypes.h @@ -1368,6 +1368,9 @@ struct gl_texture_unit /** Points to highest priority, complete and enabled texture object */ struct gl_texture_object *_Current; + + /** Texture targets that have a non-default texture bound */ + GLbitfield _BoundTextures; }; diff --git a/src/mesa/main/texobj.c b/src/mesa/main/texobj.c index 4d97275..fbbc577 100644 --- a/src/mesa/main/texobj.c +++ b/src/mesa/main/texobj.c @@ -1093,6 +1093,7 @@ unbind_texobj_from_texunits(struct gl_context *ctx, _mesa_reference_texobj(unit-CurrentTex[tex], ctx-Shared-DefaultTex[tex]); ASSERT(unit-CurrentTex[tex]); +unit-_BoundTextures ^= (1 tex); I'd prefer: unit-_BoundTextures = ~(1 tex); since it tells the reader that we want to clear the bit, not just toggle it. break; } } @@ -1348,6 +1349,11 @@ _mesa_BindTexture( GLenum target, GLuint texName ) _mesa_reference_texobj(texUnit-CurrentTex[targetIndex], newTexObj); ASSERT(texUnit-CurrentTex[targetIndex]); + if (texName != 0) + texUnit-_BoundTextures |= (1 targetIndex); + else + texUnit-_BoundTextures = ~(1 targetIndex); + /* Pass BindTexture call to device driver */ if (ctx-Driver.BindTexture) ctx-Driver.BindTexture(ctx, target, newTexObj); diff --git a/src/mesa/main/texstate.c b/src/mesa/main/texstate.c index 7720965..c86324f 100644 --- a/src/mesa/main/texstate.c +++ b/src/mesa/main/texstate.c @@ -109,6 +109,7 @@ _mesa_copy_texture_state( const struct gl_context *src, struct gl_context *dst ) _mesa_reference_texobj(dst-Texture.Unit[u].CurrentTex[tex], src-Texture.Unit[u].CurrentTex[tex]); } + dst-Texture.Unit[u]._BoundTextures = src-Texture.Unit[u]._BoundTextures; _mesa_unlock_context_textures(dst); } } @@ -798,6 +799,8 @@ init_texture_unit( struct gl_context *ctx, GLuint unit ) _mesa_reference_texobj(texUnit-CurrentTex[tex], ctx-Shared-DefaultTex[tex]); } + + texUnit-_BoundTextures = 0; } ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 11/18] mesa: Refactor set_ubo_binding()
On 01/21/2014 03:35 PM, Fredrik Höglund wrote: Make set_ubo_binding() just update the binding, and move the code that does validation, flushes the vertices etc. into a new bind_uniform_buffer() function. --- src/mesa/main/bufferobj.c | 50 - 1 file changed, 31 insertions(+), 19 deletions(-) diff --git a/src/mesa/main/bufferobj.c b/src/mesa/main/bufferobj.c index 81344ac..9ef9dd0 100644 --- a/src/mesa/main/bufferobj.c +++ b/src/mesa/main/bufferobj.c @@ -2456,15 +2456,30 @@ _mesa_GetObjectParameterivAPPLE(GLenum objectType, GLuint name, GLenum pname, static void set_ubo_binding(struct gl_context *ctx, - int index, - struct gl_buffer_object *bufObj, - GLintptr offset, - GLsizeiptr size, - GLboolean autoSize) +struct gl_uniform_buffer_binding *binding, +struct gl_buffer_object *bufObj, +GLintptr offset, +GLsizeiptr size, +GLboolean autoSize) { - struct gl_uniform_buffer_binding *binding; + _mesa_reference_buffer_object(ctx, binding-BufferObject, bufObj); + + binding-Offset = offset; + binding-Size = size; + binding-AutomaticSize = autoSize; +} + +static void +bind_uniform_buffer(struct gl_context *ctx, +GLuint index, +struct gl_buffer_object *bufObj, +GLintptr offset, +GLsizeiptr size, +GLboolean autoSize) I think we need comments on set_ubo_binding() and bind_uniform_buffer() to explain how/why they're different. +{ + struct gl_uniform_buffer_binding *binding = + ctx-UniformBufferBindings[index]; - binding = ctx-UniformBufferBindings[index]; if (binding-BufferObject == bufObj binding-Offset == offset binding-Size == size @@ -2475,10 +2490,7 @@ set_ubo_binding(struct gl_context *ctx, FLUSH_VERTICES(ctx, 0); ctx-NewDriverState |= ctx-DriverFlags.NewUniformBuffer; - _mesa_reference_buffer_object(ctx, binding-BufferObject, bufObj); - binding-Offset = offset; - binding-Size = size; - binding-AutomaticSize = autoSize; + set_ubo_binding(ctx, binding, bufObj, offset, size, autoSize); } /** @@ -2507,13 +2519,12 @@ bind_buffer_range_uniform_buffer(struct gl_context *ctx, return; } - if (bufObj == ctx-Shared-NullBufferObj) { - offset = -1; - size = -1; - } - _mesa_reference_buffer_object(ctx, ctx-UniformBuffer, bufObj); - set_ubo_binding(ctx, index, bufObj, offset, size, GL_FALSE); + + if (bufObj == ctx-Shared-NullBufferObj) + bind_uniform_buffer(ctx, index, bufObj, -1, -1, GL_TRUE); + else + bind_uniform_buffer(ctx, index, bufObj, offset, size, GL_FALSE); } @@ -2532,10 +2543,11 @@ bind_buffer_base_uniform_buffer(struct gl_context *ctx, } _mesa_reference_buffer_object(ctx, ctx-UniformBuffer, bufObj); + if (bufObj == ctx-Shared-NullBufferObj) - set_ubo_binding(ctx, index, bufObj, -1, -1, GL_TRUE); + bind_uniform_buffer(ctx, index, bufObj, -1, -1, GL_TRUE); else - set_ubo_binding(ctx, index, bufObj, 0, 0, GL_TRUE); + bind_uniform_buffer(ctx, index, bufObj, 0, 0, GL_TRUE); } static void ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 1/2] mesa: Ensure that transform feedback refers to the correct program.
On 01/22/2014 06:07 AM, Paul Berry wrote: Previous to this patch, the _mesa_{Begin,Resume}TransformFeedback functions were using ctx-Shader.CurrentProgram[MESA_SHADER_VERTEX] to find the program that would be the source of transform feedback data. This isn't correct--if there's a geometry shader present it should be ctx-Shader.CurrentProgram[MESA_SHADER_GEOMETRY]. (These might be different if separate shader objects are in use). This patch creates a function get_xfb_source(), which figures out the correct program to use based on GL state, and updates _mesa_{Begin,Resume}TransformFeedback to call it. get_xfb_source() is written in terms of the gl_shader_stage enum, so it should not need modification when we add tessellation shaders in the future. It also creates a new driver flag, NewTransformFeedbackProg, which is flagged whenever this program changes. To reduce future confusion, this patch also rewords some comments and error message text to avoid referring to vertex shaders. --- src/mesa/main/mtypes.h| 8 -- src/mesa/main/transformfeedback.c | 52 +-- 2 files changed, 45 insertions(+), 15 deletions(-) diff --git a/src/mesa/main/mtypes.h b/src/mesa/main/mtypes.h index 3dd9678..7fd3298 100644 --- a/src/mesa/main/mtypes.h +++ b/src/mesa/main/mtypes.h @@ -1815,8 +1815,9 @@ struct gl_transform_feedback_object /** * The shader program active when BeginTransformFeedback() was called. -* When active and unpaused, this equals -* ctx-Shader.CurrentProgram[MESA_SHADER_VERTEX]. +* When active and unpaused, this equals ctx-Shader.CurrentProgram[stage], +* where stage is the pipeline stage that is the source of data for +* transform feedback. */ struct gl_shader_program *shader_program; @@ -3779,6 +3780,9 @@ struct gl_driver_flags /** gl_context::TransformFeedback::CurrentObject */ GLbitfield NewTransformFeedback; + /** gl_context::TransformFeedback::CurrentObject::shader_program */ + GLbitfield NewTransformFeedbackProg; + /** gl_context::RasterDiscard */ GLbitfield NewRasterizerDiscard; diff --git a/src/mesa/main/transformfeedback.c b/src/mesa/main/transformfeedback.c index 74897ba..9376a9e 100644 --- a/src/mesa/main/transformfeedback.c +++ b/src/mesa/main/transformfeedback.c @@ -24,7 +24,7 @@ /* - * Vertex transform feedback support. + * Transform feedback support. * * Authors: * Brian Paul @@ -376,25 +376,48 @@ _mesa_compute_max_transform_feedback_vertices( **/ +/** + * Figure out which stage of the pipeline is the source of transform feedback + * data given the current context state, and return its gl_shader_program. + * + * If no active program can generate transform feedback data (i.e. no vertex + * shader is active), returns NULL. + */ +static struct gl_shader_program * +get_xfb_source(struct gl_context *ctx) +{ + int i; + for (i = MESA_SHADER_FRAGMENT - 1; i = MESA_SHADER_VERTEX; i--) { I think this would be clearer as: for (i = MESA_SHADER_GEOMETRY; i = MESA_SHADER_VERTEX; i--) { ... } Note that the pipeline ordering is: Vertex - Tess. Control - Tess. Eval - Geometry - Transform Feedback (http://www.opengl.org/wiki/Rendering_Pipeline_Overview) So either implementation would work even with tessellation shaders. Either way, this series is: Reviewed-by: Kenneth Graunke kenn...@whitecape.org Cc: 10.0 mesa-sta...@lists.freedesktop.org signature.asc Description: OpenPGP digital signature ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH] mesa: Set gl_constants::MinMapBufferAlignment
From: Ian Romanick ian.d.roman...@intel.com Leaving it set to zero isn't really correct since every allocation has at least an alignment of 1 byte. It also caused a problem in the i965 driver after I removed the MAX(64, ...) from the alignment calculation. That's what I get for changing a patch without retesting it. :( Signed-off-by: Ian Romanick ian.d.roman...@intel.com Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=73907 Cc: Kenneth Graunke kenn...@whitecape.org Cc: Lu Ha huax...@intel.com --- src/mesa/main/context.c | 1 + 1 file changed, 1 insertion(+) diff --git a/src/mesa/main/context.c b/src/mesa/main/context.c index 7c3b2d7..b7cd568 100644 --- a/src/mesa/main/context.c +++ b/src/mesa/main/context.c @@ -587,6 +587,7 @@ _mesa_init_constants(struct gl_context *ctx) ctx-Const.MaxSpotExponent = 128.0; ctx-Const.MaxViewportWidth = MAX_VIEWPORT_WIDTH; ctx-Const.MaxViewportHeight = MAX_VIEWPORT_HEIGHT; + ctx-Const.MinMapBufferAlignment = 1; /* Driver must override these values if ARB_viewport_array is supported. */ ctx-Const.MaxViewports = 1; -- 1.8.1.4 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH] mesa: Set gl_constants::MinMapBufferAlignment
On 01/22/2014 08:31 AM, Ian Romanick wrote: From: Ian Romanick ian.d.roman...@intel.com Leaving it set to zero isn't really correct since every allocation has at least an alignment of 1 byte. It also caused a problem in the i965 driver after I removed the MAX(64, ...) from the alignment calculation. That's what I get for changing a patch without retesting it. :( Signed-off-by: Ian Romanick ian.d.roman...@intel.com Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=73907 Cc: Kenneth Graunke kenn...@whitecape.org Cc: Lu Ha huax...@intel.com --- src/mesa/main/context.c | 1 + 1 file changed, 1 insertion(+) diff --git a/src/mesa/main/context.c b/src/mesa/main/context.c index 7c3b2d7..b7cd568 100644 --- a/src/mesa/main/context.c +++ b/src/mesa/main/context.c @@ -587,6 +587,7 @@ _mesa_init_constants(struct gl_context *ctx) ctx-Const.MaxSpotExponent = 128.0; ctx-Const.MaxViewportWidth = MAX_VIEWPORT_WIDTH; ctx-Const.MaxViewportHeight = MAX_VIEWPORT_HEIGHT; + ctx-Const.MinMapBufferAlignment = 1; /* Driver must override these values if ARB_viewport_array is supported. */ ctx-Const.MaxViewports = 1; Ah, thanks for fixing this! Reviewed-by: Kenneth Graunke kenn...@whitecape.org signature.asc Description: OpenPGP digital signature ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH] rtasm: deal with size overflows by casting to ptrdiff_t
On Wed, Jan 22, 2014 at 12:03 PM, Brian Paul bri...@vmware.com wrote: On 01/21/2014 06:37 PM, Ilia Mirkin wrote: This was discovered as a result of the draw-elements-base-vertex-neg piglit test, which passes very negative offsets in, followed up by large indices. The nouveau code correctly adjusts the pointer, but the transfer code needs to do the proper inverse correction. Similarly fix up the SSE code to do a 64-bit multiply to compute the proper offset. Signed-off-by: Ilia Mirkin imir...@alum.mit.edu --- With this change, nouveau passes for the draw-elements-base-vertex-neg piglit test with user_varrays, on a 64-bit setup both with and without GALLIUM_NOSSE=1. I'm pretty sure that the change should be minimal to a non-x86 setup since the rexw will be a no-op. I guess there will be an extra register use for the mov, but it shouldn't be too expensive, esp on anything remotely current. src/gallium/auxiliary/translate/translate_generic.c | 2 +- src/gallium/auxiliary/translate/translate_sse.c | 8 ++-- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/src/gallium/auxiliary/translate/translate_generic.c b/src/gallium/auxiliary/translate/translate_generic.c index 5bf97db..5ffce32 100644 --- a/src/gallium/auxiliary/translate/translate_generic.c +++ b/src/gallium/auxiliary/translate/translate_generic.c @@ -638,7 +638,7 @@ static ALWAYS_INLINE void PIPE_CDECL generic_run_one( struct translate_generic * } src = tg-attrib[attr].input_ptr + - tg-attrib[attr].input_stride * index; + (ptrdiff_t)tg-attrib[attr].input_stride * index; copy_size = tg-attrib[attr].copy_size; if(likely(copy_size = 0)) diff --git a/src/gallium/auxiliary/translate/translate_sse.c b/src/gallium/auxiliary/translate/translate_sse.c index a78ea91..a72454a 100644 --- a/src/gallium/auxiliary/translate/translate_sse.c +++ b/src/gallium/auxiliary/translate/translate_sse.c @@ -1121,7 +1121,9 @@ static boolean init_inputs( struct translate_sse *p, x86_cmovcc(p-func, tmp_EAX, buf_max_index, cc_AE); } - x86_imul(p-func, tmp_EAX, buf_stride); + x86_mov(p-func, p-tmp2_EDX, buf_stride); + x64_rexw(p-func); + x86_imul(p-func, tmp_EAX, p-tmp2_EDX); x64_rexw(p-func); x86_add(p-func, tmp_EAX, buf_base_ptr); @@ -1207,7 +1209,9 @@ static struct x86_reg get_buffer_ptr( struct translate_sse *p, x86_cmp(p-func, ptr, buf_max_index); x86_cmovcc(p-func, ptr, buf_max_index, cc_AE); - x86_imul(p-func, ptr, buf_stride); + x86_mov(p-func, p-tmp2_EDX, buf_stride); + x64_rexw(p-func); + x86_imul(p-func, ptr, p-tmp2_EDX); x64_rexw(p-func); x86_add(p-func, ptr, buf_base_ptr); return ptr; I'm no x86 expert, but this looks OK to me, and if it works for you... I'm no expert either, but perhaps this will put your mind at ease (as it did mine): void *func(void *a, int b, int c) { return a + (ptrdiff_t)b * c; } gets compiled into, by gcc -O2 -S, movslq %edx, %rdx movslq %esi, %rsi imulq %rdx, %rsi leaq(%rdi,%rsi), %rax Which, to be honest, is pretty clever, using leaq that way. But it's basically an add (but if it were, e.g., int *, it would be leaq (%rdi,%rsi,2), %rax saving on a shl). (Note that this is att/gas syntax, so all backwards.) The new code generated with the modifications I made is: 0020 8b977004 mov edx, [rdi+0x470] 0026 480fafca imul rcx, rdx 002a 48038f6804 add rcx, [rdi+0x468] So the difference is that I'm first moving the value into edx, rather than using the [rdi+bla] directly in the imul, and then making the imul use 64-bit registers (by using the REX.W prefix). By moving the 32-bit value into edx, the upper rdx bits are cleared. The only potential problem is that I'm using EDX where it wasn't used before, but looking at the call-site of get_buffer_ptr, that seems like it should be OK. Reviewed-by: Brian Paul bri...@vmware.com Thanks! Would you be able to commit this as well? I noticed that I messed up the patch subject and a word in the description... s/rtasm/translate, s/transfer/translate. You can see these fixed at https://github.com/imirkin/mesa/commit/3bc3081b2106ab0544adc483768ab2a1321565ff Should this go into the 10.0.x branch too? Ehh... I don't know if it qualifies. The only legit way that this can happen (at least for nouveau) is if an application allocates a 4GB VBO. Seems unlikely. (vs the non-0 chance that I messed something up in there.) Up to you though. -ilia ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [Bug 73934] Function roundf undeclared in textparam.c when building with MSVC11
https://bugs.freedesktop.org/show_bug.cgi?id=73934 --- Comment #2 from Roland Scheidegger srol...@vmware.com --- The stable branches and hence the tarballs are missing commit bba8f10598866776ae198b363b3752c2e3bbb126 from master at the moment which fixes this. You can just manually replace *params = (GLint) roundf(obj-Sampler.LodBias); with *params = IROUND(obj-Sampler.LodBias); to fix this. -- You are receiving this mail because: You are the assignee for the bug. ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [v2 03/23] i965: rename tex_ms to tex_cms
Prepares for the introduction of non-compressed multi-sampled lookup used in the blorp programs. v2: now also taking into account gen8 Signed-off-by: Topi Pohjolainen topi.pohjolai...@intel.com Reviewed-by: Paul Berry stereotype...@gmail.com (v1) --- src/mesa/drivers/dri/i965/brw_defines.h | 2 +- src/mesa/drivers/dri/i965/brw_fs.cpp | 2 +- src/mesa/drivers/dri/i965/brw_fs_generator.cpp| 4 ++-- src/mesa/drivers/dri/i965/brw_fs_visitor.cpp | 4 ++-- src/mesa/drivers/dri/i965/brw_shader.cpp | 6 +++--- src/mesa/drivers/dri/i965/brw_vec4.cpp| 2 +- src/mesa/drivers/dri/i965/brw_vec4_generator.cpp | 4 ++-- src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp| 2 +- src/mesa/drivers/dri/i965/gen8_fs_generator.cpp | 4 ++-- src/mesa/drivers/dri/i965/gen8_vec4_generator.cpp | 4 ++-- 10 files changed, 17 insertions(+), 17 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_defines.h b/src/mesa/drivers/dri/i965/brw_defines.h index 0faad66..12f7e40 100644 --- a/src/mesa/drivers/dri/i965/brw_defines.h +++ b/src/mesa/drivers/dri/i965/brw_defines.h @@ -770,7 +770,7 @@ enum opcode { SHADER_OPCODE_TXL, SHADER_OPCODE_TXS, FS_OPCODE_TXB, - SHADER_OPCODE_TXF_MS, + SHADER_OPCODE_TXF_CMS, SHADER_OPCODE_TXF_MCS, SHADER_OPCODE_LOD, SHADER_OPCODE_TG4, diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp index a0e4830..1e6c3e0 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp @@ -766,7 +766,7 @@ fs_visitor::implied_mrf_writes(fs_inst *inst) case FS_OPCODE_TXB: case SHADER_OPCODE_TXD: case SHADER_OPCODE_TXF: - case SHADER_OPCODE_TXF_MS: + case SHADER_OPCODE_TXF_CMS: case SHADER_OPCODE_TXF_MCS: case SHADER_OPCODE_TG4: case SHADER_OPCODE_TG4_OFFSET: diff --git a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp index e701fc5..d257748 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp @@ -425,7 +425,7 @@ fs_generator::generate_tex(fs_inst *inst, struct brw_reg dst, struct brw_reg src case SHADER_OPCODE_TXF: msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_LD; break; - case SHADER_OPCODE_TXF_MS: + case SHADER_OPCODE_TXF_CMS: if (brw-gen = 7) msg_type = GEN7_SAMPLER_MESSAGE_SAMPLE_LD2DMS; else @@ -1657,7 +1657,7 @@ fs_generator::generate_code(exec_list *instructions) case FS_OPCODE_TXB: case SHADER_OPCODE_TXD: case SHADER_OPCODE_TXF: - case SHADER_OPCODE_TXF_MS: + case SHADER_OPCODE_TXF_CMS: case SHADER_OPCODE_TXF_MCS: case SHADER_OPCODE_TXL: case SHADER_OPCODE_TXS: diff --git a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp index 69ca940..4180377 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp @@ -1214,7 +1214,7 @@ fs_visitor::emit_texture_gen5(ir_texture *ir, fs_reg dst, fs_reg coordinate, /* sample index */ emit(MOV(fs_reg(MRF, base_mrf + mlen, BRW_REGISTER_TYPE_UD), sample_index)); mlen += reg_width; - inst = emit(SHADER_OPCODE_TXF_MS, dst); + inst = emit(SHADER_OPCODE_TXF_CMS, dst); break; case ir_lod: inst = emit(SHADER_OPCODE_LOD, dst); @@ -1405,7 +1405,7 @@ fs_visitor::emit_texture_gen7(ir_texture *ir, fs_reg dst, fs_reg coordinate, case ir_txl: inst = emit(SHADER_OPCODE_TXL, dst, payload); break; case ir_txd: inst = emit(SHADER_OPCODE_TXD, dst, payload); break; case ir_txf: inst = emit(SHADER_OPCODE_TXF, dst, payload); break; - case ir_txf_ms: inst = emit(SHADER_OPCODE_TXF_MS, dst, payload); break; + case ir_txf_ms: inst = emit(SHADER_OPCODE_TXF_CMS, dst, payload); break; case ir_txs: inst = emit(SHADER_OPCODE_TXS, dst, payload); break; case ir_query_levels: inst = emit(SHADER_OPCODE_TXS, dst, payload); break; case ir_lod: inst = emit(SHADER_OPCODE_LOD, dst, payload); break; diff --git a/src/mesa/drivers/dri/i965/brw_shader.cpp b/src/mesa/drivers/dri/i965/brw_shader.cpp index 446c917..b74d6e8 100644 --- a/src/mesa/drivers/dri/i965/brw_shader.cpp +++ b/src/mesa/drivers/dri/i965/brw_shader.cpp @@ -440,8 +440,8 @@ brw_instruction_name(enum opcode op) return txs; case FS_OPCODE_TXB: return txb; - case SHADER_OPCODE_TXF_MS: - return txf_ms; + case SHADER_OPCODE_TXF_CMS: + return txf_cms; case SHADER_OPCODE_TXF_MCS: return txf_mcs; case SHADER_OPCODE_TG4: @@ -538,7 +538,7 @@ backend_instruction::is_tex() opcode == FS_OPCODE_TXB || opcode == SHADER_OPCODE_TXD || opcode == SHADER_OPCODE_TXF || - opcode == SHADER_OPCODE_TXF_MS || + opcode == SHADER_OPCODE_TXF_CMS || opcode == SHADER_OPCODE_TXF_MCS || opcode ==
[Mesa-dev] [v2 14/23] i965/blorp: wrap SHL (/brw_SHL(func, /emit_shl(/)
Signed-off-by: Topi Pohjolainen topi.pohjolai...@intel.com Reviewed-by: Paul Berry stereotype...@gmail.com --- src/mesa/drivers/dri/i965/brw_blorp_blit.cpp | 24 src/mesa/drivers/dri/i965/brw_blorp_blit_eu.h | 7 +++ 2 files changed, 19 insertions(+), 12 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp b/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp index 715c716..432c11c 100644 --- a/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp +++ b/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp @@ -1166,12 +1166,12 @@ brw_blorp_blit_program::translate_tiling(bool old_tiled_w, bool new_tiled_w) emit_and(t1, X, brw_imm_uw(0xfff4)); /* X ~0b1011 */ emit_shr(t1, t1, brw_imm_uw(1)); /* (X ~0b1011) 1 */ emit_and(t2, Y, brw_imm_uw(1)); /* Y 0b1 */ - brw_SHL(func, t2, t2, brw_imm_uw(2)); /* (Y 0b1) 2 */ + emit_shl(t2, t2, brw_imm_uw(2)); /* (Y 0b1) 2 */ brw_OR(func, t1, t1, t2); /* (X ~0b1011) 1 | (Y 0b1) 2 */ emit_and(t2, X, brw_imm_uw(1)); /* X 0b1 */ brw_OR(func, Xp, t1, t2); emit_and(t1, Y, brw_imm_uw(0xfffe)); /* Y ~0b1 */ - brw_SHL(func, t1, t1, brw_imm_uw(1)); /* (Y ~0b1) 1 */ + emit_shl(t1, t1, brw_imm_uw(1)); /* (Y ~0b1) 1 */ emit_and(t2, X, brw_imm_uw(8)); /* X 0b1000 */ emit_shr(t2, t2, brw_imm_uw(2)); /* (X 0b1000) 2 */ brw_OR(func, t1, t1, t2); /* (Y ~0b1) 1 | (X 0b1000) 2 */ @@ -1187,12 +1187,12 @@ brw_blorp_blit_program::translate_tiling(bool old_tiled_w, bool new_tiled_w) * Y' = (Y ~0b11) 1 | (X 0b100) 2 */ emit_and(t1, X, brw_imm_uw(0xfffa)); /* X ~0b101 */ - brw_SHL(func, t1, t1, brw_imm_uw(1)); /* (X ~0b101) 1 */ + emit_shl(t1, t1, brw_imm_uw(1)); /* (X ~0b101) 1 */ emit_and(t2, Y, brw_imm_uw(2)); /* Y 0b10 */ - brw_SHL(func, t2, t2, brw_imm_uw(2)); /* (Y 0b10) 2 */ + emit_shl(t2, t2, brw_imm_uw(2)); /* (Y 0b10) 2 */ brw_OR(func, t1, t1, t2); /* (X ~0b101) 1 | (Y 0b10) 2 */ emit_and(t2, Y, brw_imm_uw(1)); /* Y 0b1 */ - brw_SHL(func, t2, t2, brw_imm_uw(1)); /* (Y 0b1) 1 */ + emit_shl(t2, t2, brw_imm_uw(1)); /* (Y 0b1) 1 */ brw_OR(func, t1, t1, t2); /* (X ~0b101) 1 | (Y 0b10) 2 | (Y 0b1) 1 */ emit_and(t2, X, brw_imm_uw(1)); /* X 0b1 */ @@ -1246,12 +1246,12 @@ brw_blorp_blit_program::encode_msaa(unsigned num_samples, emit_and(t2, S, brw_imm_uw(1)); /* S 0b1 */ brw_OR(func, t1, t1, t2); /* (X ~0b1) | (S 0b1) */ } - brw_SHL(func, t1, t1, brw_imm_uw(1)); /* (X ~0b1) 1 + emit_shl(t1, t1, brw_imm_uw(1)); /* (X ~0b1) 1 | (S 0b1) 1 */ emit_and(t2, X, brw_imm_uw(1)); /* X 0b1 */ brw_OR(func, Xp, t1, t2); emit_and(t1, Y, brw_imm_uw(0xfffe)); /* Y ~0b1 */ - brw_SHL(func, t1, t1, brw_imm_uw(1)); /* (Y ~0b1) 1 */ + emit_shl(t1, t1, brw_imm_uw(1)); /* (Y ~0b1) 1 */ if (!s_is_zero) { emit_and(t2, S, brw_imm_uw(2)); /* S 0b10 */ brw_OR(func, t1, t1, t2); /* (Y ~0b1) 1 | (S 0b10) */ @@ -1266,19 +1266,19 @@ brw_blorp_blit_program::encode_msaa(unsigned num_samples, * Y' = (Y ~0b1) 1 | (S 0b10) | (Y 0b1) */ emit_and(t1, X, brw_imm_uw(0xfffe)); /* X ~0b1 */ - brw_SHL(func, t1, t1, brw_imm_uw(2)); /* (X ~0b1) 2 */ + emit_shl(t1, t1, brw_imm_uw(2)); /* (X ~0b1) 2 */ if (!s_is_zero) { emit_and(t2, S, brw_imm_uw(4)); /* S 0b100 */ brw_OR(func, t1, t1, t2); /* (X ~0b1) 2 | (S 0b100) */ emit_and(t2, S, brw_imm_uw(1)); /* S 0b1 */ -brw_SHL(func, t2, t2, brw_imm_uw(1)); /* (S 0b1) 1 */ +emit_shl(t2, t2, brw_imm_uw(1)); /* (S 0b1) 1 */ brw_OR(func, t1, t1, t2); /* (X ~0b1) 2 | (S 0b100) | (S 0b1) 1 */ } emit_and(t2, X, brw_imm_uw(1)); /* X 0b1 */ brw_OR(func, Xp, t1, t2); emit_and(t1, Y, brw_imm_uw(0xfffe)); /* Y ~0b1 */ - brw_SHL(func, t1, t1, brw_imm_uw(1)); /* (Y ~0b1) 1 */ + emit_shl(t1, t1, brw_imm_uw(1)); /* (Y ~0b1) 1 */ if (!s_is_zero) { emit_and(t2, S, brw_imm_uw(2)); /* S 0b10 */ brw_OR(func, t1, t1, t2); /* (Y ~0b1) 1 | (S 0b10) */ @@ -1459,8 +1459,8 @@ brw_blorp_blit_program::single_to_blend() * that maxe up a pixel). So we need to multiply our X and Y coordinates * each by 2 and then add 1. */ - brw_SHL(func, t1, X, brw_imm_w(1)); - brw_SHL(func, t2, Y, brw_imm_w(1)); + emit_shl(t1, X, brw_imm_w(1)); + emit_shl(t2, Y, brw_imm_w(1)); emit_add(Xp, t1, brw_imm_w(1)); emit_add(Yp, t2, brw_imm_w(1)); SWAP_XY_AND_XPYP(); diff --git
[Mesa-dev] [v2 06/23] i965/blorp: move emission of rt-write into eu-emitter
Signed-off-by: Topi Pohjolainen topi.pohjolai...@intel.com Reviewed-by: Paul Berry stereotype...@gmail.com --- src/mesa/drivers/dri/i965/brw_blorp_blit.cpp| 15 +-- src/mesa/drivers/dri/i965/brw_blorp_blit_eu.cpp | 18 ++ src/mesa/drivers/dri/i965/brw_blorp_blit_eu.h | 5 + 3 files changed, 28 insertions(+), 10 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp b/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp index 034a82b..4bbdf3d 100644 --- a/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp +++ b/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp @@ -1954,16 +1954,11 @@ brw_blorp_blit_program::render_target_write() } /* Now write to the render target and terminate the thread */ - brw_fb_WRITE(func, -16 /* dispatch_width */, -base_mrf /* msg_reg_nr */, -mrf_rt_write /* src0 */, -BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD16_SINGLE_SOURCE, -BRW_BLORP_RENDERBUFFER_BINDING_TABLE_INDEX, -mrf_offset /* msg_length. TODO: Should be smaller for non-RGBA formats. */, -0 /* response_length */, -true /* eot */, -use_header); + emit_render_target_write( + mrf_rt_write, + base_mrf, + mrf_offset /* msg_length. TODO: Should be smaller for non-RGBA formats. */, + use_header); } diff --git a/src/mesa/drivers/dri/i965/brw_blorp_blit_eu.cpp b/src/mesa/drivers/dri/i965/brw_blorp_blit_eu.cpp index ff45b6a..df8d63d 100644 --- a/src/mesa/drivers/dri/i965/brw_blorp_blit_eu.cpp +++ b/src/mesa/drivers/dri/i965/brw_blorp_blit_eu.cpp @@ -134,3 +134,21 @@ brw_blorp_eu_emitter::emit_texture_lookup(const struct brw_reg dst, BRW_SAMPLER_SIMD_MODE_SIMD16, BRW_SAMPLER_RETURN_FORMAT_FLOAT32); } + +void +brw_blorp_eu_emitter::emit_render_target_write(const struct brw_reg src0, + unsigned msg_reg_nr, + unsigned msg_length, + bool use_header) +{ + brw_fb_WRITE(func, +16 /* dispatch_width */, +msg_reg_nr, +src0, +BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD16_SINGLE_SOURCE, +BRW_BLORP_RENDERBUFFER_BINDING_TABLE_INDEX, +msg_length, +0 /* response_length */, +true /* eot */, +use_header); +} diff --git a/src/mesa/drivers/dri/i965/brw_blorp_blit_eu.h b/src/mesa/drivers/dri/i965/brw_blorp_blit_eu.h index 9e7c43f..5f0c8cf 100644 --- a/src/mesa/drivers/dri/i965/brw_blorp_blit_eu.h +++ b/src/mesa/drivers/dri/i965/brw_blorp_blit_eu.h @@ -47,6 +47,11 @@ protected: unsigned base_mrf, unsigned msg_length); + void emit_render_target_write(const struct brw_reg src0, + unsigned msg_reg_nr, + unsigned msg_length, + bool use_header); + void *mem_ctx; struct brw_compile func; }; -- 1.8.3.1 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [v2 10/23] i965/blorp: wrap MOV (/brw_MOV(func, /emit_mov(/)
In addition, the two special cases requiring explicit execution size control are wrapped manually. Signed-off-by: Topi Pohjolainen topi.pohjolai...@intel.com Reviewed-by: Paul Berry stereotype...@gmail.com --- src/mesa/drivers/dri/i965/brw_blorp_blit.cpp | 68 +-- src/mesa/drivers/dri/i965/brw_blorp_blit_eu.h | 12 + 2 files changed, 45 insertions(+), 35 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp b/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp index da10cf0..ff32e25 100644 --- a/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp +++ b/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp @@ -1046,8 +1046,8 @@ brw_blorp_blit_program::compute_frag_coords() stride(suboffset(R1, 5), 2, 4, 0), brw_imm_v(0x11001100)); /* Move the coordinates to UD registers. */ - brw_MOV(func, vec16(Xp), retype(X, BRW_REGISTER_TYPE_UW)); - brw_MOV(func, vec16(Yp), retype(Y, BRW_REGISTER_TYPE_UW)); + emit_mov(vec16(Xp), retype(X, BRW_REGISTER_TYPE_UW)); + emit_mov(vec16(Yp), retype(Y, BRW_REGISTER_TYPE_UW)); SWAP_XY_AND_XPYP(); if (key-persample_msaa_dispatch) { @@ -1063,12 +1063,10 @@ brw_blorp_blit_program::compute_frag_coords() * then copy from it using vstride=1, width=4, hstride=0. */ struct brw_reg t1_uw1 = retype(t1, BRW_REGISTER_TYPE_UW); - brw_MOV(func, vec16(t1_uw1), brw_imm_v(0x3210)); + emit_mov(vec16(t1_uw1), brw_imm_v(0x3210)); /* Move to UD sample_index register. */ - brw_set_compression_control(func, BRW_COMPRESSION_NONE); - brw_MOV(func, S, stride(t1_uw1, 1, 4, 0)); - brw_MOV(func, offset(S, 1), suboffset(stride(t1_uw1, 1, 4, 0), 2)); - brw_set_compression_control(func, BRW_COMPRESSION_COMPRESSED); + emit_mov_8(S, stride(t1_uw1, 1, 4, 0)); + emit_mov_8(offset(S, 1), suboffset(stride(t1_uw1, 1, 4, 0), 2)); break; } case 8: { @@ -1090,7 +1088,7 @@ brw_blorp_blit_program::compute_frag_coords() struct brw_reg r0_ud1 = vec1(retype(R0, BRW_REGISTER_TYPE_UD)); brw_AND(func, t1_ud1, r0_ud1, brw_imm_ud(0xc0)); brw_SHR(func, t1_ud1, t1_ud1, brw_imm_ud(5)); - brw_MOV(func, vec16(t2_uw1), brw_imm_v(0x3210)); + emit_mov(vec16(t2_uw1), brw_imm_v(0x3210)); brw_ADD(func, vec16(S), retype(t1_ud1, BRW_REGISTER_TYPE_UW), stride(t2_uw1, 1, 4, 0)); brw_set_compression_control(func, BRW_COMPRESSION_NONE); @@ -1388,8 +1386,8 @@ brw_blorp_blit_program::translate_dst_to_src() struct brw_reg Yp_f = retype(Yp, BRW_REGISTER_TYPE_F); /* Move the UD coordinates to float registers. */ - brw_MOV(func, Xp_f, X); - brw_MOV(func, Yp_f, Y); + emit_mov(Xp_f, X); + emit_mov(Yp_f, Y); /* Scale and offset */ brw_MUL(func, X_f, Xp_f, x_transform.multiplier); brw_MUL(func, Y_f, Yp_f, y_transform.multiplier); @@ -1430,8 +1428,8 @@ brw_blorp_blit_program::translate_dst_to_src() /* Round the float coordinates down to nearest integer by moving to * UD registers. */ - brw_MOV(func, Xp, X_f); - brw_MOV(func, Yp, Y_f); + emit_mov(Xp, X_f); + emit_mov(Yp, Y_f); SWAP_XY_AND_XPYP(); } } @@ -1533,7 +1531,7 @@ brw_blorp_blit_program::manual_blend_average(unsigned num_samples) s_is_zero = true; } else { s_is_zero = false; - brw_MOV(func, vec16(S), brw_imm_ud(i)); + emit_mov(vec16(S), brw_imm_ud(i)); } texel_fetch(texture_data[stack_depth++]); @@ -1633,8 +1631,8 @@ brw_blorp_blit_program::manual_blend_bilinear(unsigned num_samples) brw_imm_f((float)(i 0x1) * (1.0 / key-x_scale))); brw_ADD(func, vec16(y_sample_coords), Yp_f, brw_imm_f((float)((i 1) 0x1) * (1.0 / key-y_scale))); - brw_MOV(func, vec16(X), x_sample_coords); - brw_MOV(func, vec16(Y), y_sample_coords); + emit_mov(vec16(X), x_sample_coords); + emit_mov(vec16(Y), y_sample_coords); /* The MCS value we fetch has to match up with the pixel that we're * sampling from. Since we sample from different pixels in each @@ -1673,7 +1671,7 @@ brw_blorp_blit_program::manual_blend_bilinear(unsigned num_samples) brw_MUL(func, vec16(t1_f), t1_f, brw_imm_f(key-x_scale)); brw_MUL(func, vec16(t2_f), t2_f, brw_imm_f(key-x_scale * key-y_scale)); brw_ADD(func, vec16(t1_f), t1_f, t2_f); - brw_MOV(func, vec16(S), t1_f); + emit_mov(vec16(S), t1_f); if (num_samples == 8) { /* Map the sample index to a sample number */ @@ -1681,20 +1679,20 @@ brw_blorp_blit_program::manual_blend_bilinear(unsigned num_samples) S, brw_imm_d(4)); brw_IF(func, BRW_EXECUTE_16); { -brw_MOV(func, vec16(t2), brw_imm_d(5)); +emit_mov(vec16(t2), brw_imm_d(5)); emit_if_eq_mov(S, 1, vec16(t2), 2); emit_if_eq_mov(S,
[Mesa-dev] [v2 12/23] i965/blorp: wrap ADD (/brw_ADD(func, /emit_add(/)
In addition, the special case requiring explicit execution size control is wrapped manually. Signed-off-by: Topi Pohjolainen topi.pohjolai...@intel.com Reviewed-by: Paul Berry stereotype...@gmail.com --- src/mesa/drivers/dri/i965/brw_blorp_blit.cpp | 34 +-- src/mesa/drivers/dri/i965/brw_blorp_blit_eu.h | 16 + 2 files changed, 32 insertions(+), 18 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp b/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp index 5833d83..392e7a5 100644 --- a/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp +++ b/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp @@ -1031,7 +1031,7 @@ brw_blorp_blit_program::compute_frag_coords() * Then, we need to add the repeating sequence (0, 1, 0, 1, ...) to the * result, since pixels n+1 and n+3 are in the right half of the subspan. */ - brw_ADD(func, vec16(retype(X, BRW_REGISTER_TYPE_UW)), + emit_add(vec16(retype(X, BRW_REGISTER_TYPE_UW)), stride(suboffset(R1, 4), 2, 4, 0), brw_imm_v(0x10101010)); /* Similarly, Y coordinates for subspans come from R1.2[31:16] through @@ -1042,7 +1042,7 @@ brw_blorp_blit_program::compute_frag_coords() * And we need to add the repeating sequence (0, 0, 1, 1, ...), since * pixels n+2 and n+3 are in the bottom half of the subspan. */ - brw_ADD(func, vec16(retype(Y, BRW_REGISTER_TYPE_UW)), + emit_add(vec16(retype(Y, BRW_REGISTER_TYPE_UW)), stride(suboffset(R1, 5), 2, 4, 0), brw_imm_v(0x11001100)); /* Move the coordinates to UD registers. */ @@ -1089,13 +1089,11 @@ brw_blorp_blit_program::compute_frag_coords() emit_and(t1_ud1, r0_ud1, brw_imm_ud(0xc0)); brw_SHR(func, t1_ud1, t1_ud1, brw_imm_ud(5)); emit_mov(vec16(t2_uw1), brw_imm_v(0x3210)); - brw_ADD(func, vec16(S), retype(t1_ud1, BRW_REGISTER_TYPE_UW), - stride(t2_uw1, 1, 4, 0)); - brw_set_compression_control(func, BRW_COMPRESSION_NONE); - brw_ADD(func, offset(S, 1), - retype(t1_ud1, BRW_REGISTER_TYPE_UW), - suboffset(stride(t2_uw1, 1, 4, 0), 2)); - brw_set_compression_control(func, BRW_COMPRESSION_COMPRESSED); + emit_add(vec16(S), retype(t1_ud1, BRW_REGISTER_TYPE_UW), + stride(t2_uw1, 1, 4, 0)); + emit_add_8(offset(S, 1), +retype(t1_ud1, BRW_REGISTER_TYPE_UW), +suboffset(stride(t2_uw1, 1, 4, 0), 2)); break; } default: @@ -1391,8 +1389,8 @@ brw_blorp_blit_program::translate_dst_to_src() /* Scale and offset */ brw_MUL(func, X_f, Xp_f, x_transform.multiplier); brw_MUL(func, Y_f, Yp_f, y_transform.multiplier); - brw_ADD(func, X_f, X_f, x_transform.offset); - brw_ADD(func, Y_f, Y_f, y_transform.offset); + emit_add(X_f, X_f, x_transform.offset); + emit_add(Y_f, Y_f, y_transform.offset); if (key-blit_scaled key-blend) { /* Translate coordinates to lay out the samples in a rectangular grid * roughly corresponding to sample locations. @@ -1402,8 +1400,8 @@ brw_blorp_blit_program::translate_dst_to_src() /* Adjust coordinates so that integers represent pixel centers rather * than pixel edges. */ - brw_ADD(func, X_f, X_f, brw_imm_f(-0.5)); - brw_ADD(func, Y_f, Y_f, brw_imm_f(-0.5)); + emit_add(X_f, X_f, brw_imm_f(-0.5)); + emit_add(Y_f, Y_f, brw_imm_f(-0.5)); /* Clamp the X, Y texture coordinates to properly handle the sampling of * texels on texture edges. @@ -1463,8 +1461,8 @@ brw_blorp_blit_program::single_to_blend() */ brw_SHL(func, t1, X, brw_imm_w(1)); brw_SHL(func, t2, Y, brw_imm_w(1)); - brw_ADD(func, Xp, t1, brw_imm_w(1)); - brw_ADD(func, Yp, t2, brw_imm_w(1)); + emit_add(Xp, t1, brw_imm_w(1)); + emit_add(Yp, t2, brw_imm_w(1)); SWAP_XY_AND_XPYP(); } @@ -1627,9 +1625,9 @@ brw_blorp_blit_program::manual_blend_bilinear(unsigned num_samples) s_is_zero = false; /* Compute pixel coordinates */ - brw_ADD(func, vec16(x_sample_coords), Xp_f, + emit_add(vec16(x_sample_coords), Xp_f, brw_imm_f((float)(i 0x1) * (1.0 / key-x_scale))); - brw_ADD(func, vec16(y_sample_coords), Yp_f, + emit_add(vec16(y_sample_coords), Yp_f, brw_imm_f((float)((i 1) 0x1) * (1.0 / key-y_scale))); emit_mov(vec16(X), x_sample_coords); emit_mov(vec16(Y), y_sample_coords); @@ -1670,7 +1668,7 @@ brw_blorp_blit_program::manual_blend_bilinear(unsigned num_samples) brw_FRC(func, vec16(t2_f), y_sample_coords); brw_MUL(func, vec16(t1_f), t1_f, brw_imm_f(key-x_scale)); brw_MUL(func, vec16(t2_f), t2_f, brw_imm_f(key-x_scale * key-y_scale)); - brw_ADD(func, vec16(t1_f), t1_f, t2_f); + emit_add(vec16(t1_f), t1_f, t2_f); emit_mov(vec16(S), t1_f); if (num_samples == 8) { diff --git a/src/mesa/drivers/dri/i965/brw_blorp_blit_eu.h
[Mesa-dev] [v2 16/23] i965/blorp: wrap MUL (/brw_MUL(func, /emit_mul(/)
Signed-off-by: Topi Pohjolainen topi.pohjolai...@intel.com Reviewed-by: Paul Berry stereotype...@gmail.com --- src/mesa/drivers/dri/i965/brw_blorp_blit.cpp | 18 +- src/mesa/drivers/dri/i965/brw_blorp_blit_eu.h | 7 +++ 2 files changed, 16 insertions(+), 9 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp b/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp index f9d1079..2b9224b 100644 --- a/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp +++ b/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp @@ -1387,16 +1387,16 @@ brw_blorp_blit_program::translate_dst_to_src() emit_mov(Xp_f, X); emit_mov(Yp_f, Y); /* Scale and offset */ - brw_MUL(func, X_f, Xp_f, x_transform.multiplier); - brw_MUL(func, Y_f, Yp_f, y_transform.multiplier); + emit_mul(X_f, Xp_f, x_transform.multiplier); + emit_mul(Y_f, Yp_f, y_transform.multiplier); emit_add(X_f, X_f, x_transform.offset); emit_add(Y_f, Y_f, y_transform.offset); if (key-blit_scaled key-blend) { /* Translate coordinates to lay out the samples in a rectangular grid * roughly corresponding to sample locations. */ - brw_MUL(func, X_f, X_f, brw_imm_f(key-x_scale)); - brw_MUL(func, Y_f, Y_f, brw_imm_f(key-y_scale)); + emit_mul(X_f, X_f, brw_imm_f(key-x_scale)); + emit_mul(Y_f, Y_f, brw_imm_f(key-y_scale)); /* Adjust coordinates so that integers represent pixel centers rather * than pixel edges. */ @@ -1419,8 +1419,8 @@ brw_blorp_blit_program::translate_dst_to_src() /* Round the float coordinates down to nearest integer */ brw_RNDD(func, Xp_f, X_f); brw_RNDD(func, Yp_f, Y_f); - brw_MUL(func, X_f, Xp_f, brw_imm_f(1 / key-x_scale)); - brw_MUL(func, Y_f, Yp_f, brw_imm_f(1 / key-y_scale)); + emit_mul(X_f, Xp_f, brw_imm_f(1 / key-x_scale)); + emit_mul(Y_f, Yp_f, brw_imm_f(1 / key-y_scale)); SWAP_XY_AND_XPYP(); } else if (!key-bilinear_filter) { /* Round the float coordinates down to nearest integer by moving to @@ -1576,7 +1576,7 @@ brw_blorp_blit_program::manual_blend_average(unsigned num_samples) /* Scale the result down by a factor of num_samples */ /* TODO: should use a smaller loop bound for non-RGBA formats */ for (int j = 0; j 4; ++j) { - brw_MUL(func, offset(texture_data[0], 2*j), + emit_mul(offset(texture_data[0], 2*j), offset(vec8(texture_data[0]), 2*j), brw_imm_f(1.0/num_samples)); } @@ -1666,8 +1666,8 @@ brw_blorp_blit_program::manual_blend_bilinear(unsigned num_samples) */ brw_FRC(func, vec16(t1_f), x_sample_coords); brw_FRC(func, vec16(t2_f), y_sample_coords); - brw_MUL(func, vec16(t1_f), t1_f, brw_imm_f(key-x_scale)); - brw_MUL(func, vec16(t2_f), t2_f, brw_imm_f(key-x_scale * key-y_scale)); + emit_mul(vec16(t1_f), t1_f, brw_imm_f(key-x_scale)); + emit_mul(vec16(t2_f), t2_f, brw_imm_f(key-x_scale * key-y_scale)); emit_add(vec16(t1_f), t1_f, t2_f); emit_mov(vec16(S), t1_f); diff --git a/src/mesa/drivers/dri/i965/brw_blorp_blit_eu.h b/src/mesa/drivers/dri/i965/brw_blorp_blit_eu.h index 1100789..c083ad8 100644 --- a/src/mesa/drivers/dri/i965/brw_blorp_blit_eu.h +++ b/src/mesa/drivers/dri/i965/brw_blorp_blit_eu.h @@ -109,6 +109,13 @@ protected: brw_set_compression_control(func, BRW_COMPRESSION_COMPRESSED); } + inline void emit_mul(const struct brw_reg dst, +const struct brw_reg src1, +const struct brw_reg src2) + { + brw_MUL(func, dst, src1, src2); + } + inline void emit_shr(const struct brw_reg dst, const struct brw_reg src1, const struct brw_reg src2) -- 1.8.3.1 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [v2 17/23] i965/blorp: wrap FRC (/brw_FRC(func, /emit_frc(/)
Signed-off-by: Topi Pohjolainen topi.pohjolai...@intel.com Reviewed-by: Paul Berry stereotype...@gmail.com --- src/mesa/drivers/dri/i965/brw_blorp_blit.cpp | 8 src/mesa/drivers/dri/i965/brw_blorp_blit_eu.h | 6 ++ 2 files changed, 10 insertions(+), 4 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp b/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp index 2b9224b..4d0b882 100644 --- a/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp +++ b/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp @@ -1413,8 +1413,8 @@ brw_blorp_blit_program::translate_dst_to_src() /* Store the fractional parts to be used as bilinear interpolation * coefficients. */ - brw_FRC(func, x_frac, X_f); - brw_FRC(func, y_frac, Y_f); + emit_frc(x_frac, X_f); + emit_frc(y_frac, Y_f); /* Round the float coordinates down to nearest integer */ brw_RNDD(func, Xp_f, X_f); @@ -1664,8 +1664,8 @@ brw_blorp_blit_program::manual_blend_bilinear(unsigned num_samples) *| 6 | 7 || 7 | 1 | *-- */ - brw_FRC(func, vec16(t1_f), x_sample_coords); - brw_FRC(func, vec16(t2_f), y_sample_coords); + emit_frc(vec16(t1_f), x_sample_coords); + emit_frc(vec16(t2_f), y_sample_coords); emit_mul(vec16(t1_f), t1_f, brw_imm_f(key-x_scale)); emit_mul(vec16(t2_f), t2_f, brw_imm_f(key-x_scale * key-y_scale)); emit_add(vec16(t1_f), t1_f, t2_f); diff --git a/src/mesa/drivers/dri/i965/brw_blorp_blit_eu.h b/src/mesa/drivers/dri/i965/brw_blorp_blit_eu.h index c083ad8..f22207d 100644 --- a/src/mesa/drivers/dri/i965/brw_blorp_blit_eu.h +++ b/src/mesa/drivers/dri/i965/brw_blorp_blit_eu.h @@ -137,6 +137,12 @@ protected: brw_OR(func, dst, src1, src2); } + inline void emit_frc(const struct brw_reg dst, +const struct brw_reg src) + { + brw_FRC(func, dst, src); + } + void *mem_ctx; struct brw_compile func; }; -- 1.8.3.1 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [v2 05/23] i965/blorp: move emission of texture lookup into eu-emitter
Resolving of the hardware message type is moved into the emitter also in preparation for switching to use fs_generator. The generator wants to translate the high level op-code into the message type and hence the emitter needs to know the original op-code. Signed-off-by: Topi Pohjolainen topi.pohjolai...@intel.com Reviewed-by: Paul Berry stereotype...@gmail.com --- src/mesa/drivers/dri/i965/brw_blorp_blit.cpp| 34 +++ src/mesa/drivers/dri/i965/brw_blorp_blit_eu.cpp | 43 + src/mesa/drivers/dri/i965/brw_blorp_blit_eu.h | 5 +++ 3 files changed, 60 insertions(+), 22 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp b/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp index 03fabd6..034a82b 100644 --- a/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp +++ b/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp @@ -650,7 +650,7 @@ private: void sample(struct brw_reg dst); void texel_fetch(struct brw_reg dst); void mcs_fetch(); - void texture_lookup(struct brw_reg dst, GLuint msg_type, + void texture_lookup(struct brw_reg dst, enum opcode op, const sampler_message_arg *args, int num_args); void render_target_write(); @@ -1765,8 +1765,7 @@ brw_blorp_blit_program::sample(struct brw_reg dst) SAMPLER_MESSAGE_ARG_V_FLOAT }; - texture_lookup(dst, GEN5_SAMPLER_MESSAGE_SAMPLE, args, - ARRAY_SIZE(args)); + texture_lookup(dst, SHADER_OPCODE_TEX, args, ARRAY_SIZE(args)); } /** @@ -1802,8 +1801,7 @@ brw_blorp_blit_program::texel_fetch(struct brw_reg dst) switch (brw-gen) { case 6: - texture_lookup(dst, GEN5_SAMPLER_MESSAGE_SAMPLE_LD, gen6_args, - s_is_zero ? 2 : 5); + texture_lookup(dst, SHADER_OPCODE_TXF, gen6_args, s_is_zero ? 2 : 5); break; case 7: switch (key-tex_layout) { @@ -1819,16 +1817,16 @@ brw_blorp_blit_program::texel_fetch(struct brw_reg dst) * INTEL_MSAA_LAYOUT_CMS. */ case INTEL_MSAA_LAYOUT_CMS: - texture_lookup(dst, GEN7_SAMPLER_MESSAGE_SAMPLE_LD2DMS, + texture_lookup(dst, SHADER_OPCODE_TXF_CMS, gen7_ld2dms_args, ARRAY_SIZE(gen7_ld2dms_args)); break; case INTEL_MSAA_LAYOUT_UMS: - texture_lookup(dst, GEN7_SAMPLER_MESSAGE_SAMPLE_LD2DSS, + texture_lookup(dst, SHADER_OPCODE_TXF_UMS, gen7_ld2dss_args, ARRAY_SIZE(gen7_ld2dss_args)); break; case INTEL_MSAA_LAYOUT_NONE: assert(s_is_zero); - texture_lookup(dst, GEN5_SAMPLER_MESSAGE_SAMPLE_LD, gen7_ld_args, + texture_lookup(dst, SHADER_OPCODE_TXF, gen7_ld_args, ARRAY_SIZE(gen7_ld_args)); break; } @@ -1846,13 +1844,13 @@ brw_blorp_blit_program::mcs_fetch() SAMPLER_MESSAGE_ARG_U_INT, SAMPLER_MESSAGE_ARG_V_INT }; - texture_lookup(vec16(mcs_data), GEN7_SAMPLER_MESSAGE_SAMPLE_LD_MCS, + texture_lookup(vec16(mcs_data), SHADER_OPCODE_TXF_MCS, gen7_ld_mcs_args, ARRAY_SIZE(gen7_ld_mcs_args)); } void brw_blorp_blit_program::texture_lookup(struct brw_reg dst, - GLuint msg_type, + enum opcode op, const sampler_message_arg *args, int num_args) { @@ -1916,18 +1914,10 @@ brw_blorp_blit_program::texture_lookup(struct brw_reg dst, mrf.nr += 2; } - brw_SAMPLE(func, - retype(dst, BRW_REGISTER_TYPE_UW) /* dest */, - base_mrf /* msg_reg_nr */, - brw_message_reg(base_mrf) /* src0 */, - BRW_BLORP_TEXTURE_BINDING_TABLE_INDEX, - 0 /* sampler */, - msg_type, - 8 /* response_length. TODO: should be smaller for non-RGBA formats? */, - mrf.nr - base_mrf /* msg_length */, - 0 /* header_present */, - BRW_SAMPLER_SIMD_MODE_SIMD16, - BRW_SAMPLER_RETURN_FORMAT_FLOAT32); + emit_texture_lookup(retype(dst, BRW_REGISTER_TYPE_UW) /* dest */, + op, + base_mrf, + mrf.nr - base_mrf /* msg_length */); } #undef X diff --git a/src/mesa/drivers/dri/i965/brw_blorp_blit_eu.cpp b/src/mesa/drivers/dri/i965/brw_blorp_blit_eu.cpp index 161c679..ff45b6a 100644 --- a/src/mesa/drivers/dri/i965/brw_blorp_blit_eu.cpp +++ b/src/mesa/drivers/dri/i965/brw_blorp_blit_eu.cpp @@ -23,6 +23,7 @@ #include glsl/ralloc.h #include brw_blorp_blit_eu.h +#include brw_blorp.h brw_blorp_eu_emitter::brw_blorp_eu_emitter(struct brw_context *brw) : mem_ctx(ralloc_context(NULL)) @@ -91,3 +92,45 @@ brw_blorp_eu_emitter::emit_kill_if_outside_rect(const struct brw_reg x, struct brw_instruction *inst = brw_AND(func, g1, f0, g1); inst-header.mask_control = BRW_MASK_DISABLE; } + +void
[Mesa-dev] [v2] Blorp blit compiler to use FS LIR
Here are the remaining patches rebased on top of the two small fixes submitted earlier. Even though I included the entire remaining series, I have revised only patches 1, 3, 4, 7, 19, 20, 21 and 23. These consist of manual changes due to the aforementioned fixes, similar patching of gen8 as gen6/7 (earlier the gen8 generators were not upstreamed yet) and finally the fixes and improvements suggested by Paul. Topi Pohjolainen (23): i965/blorp: introduce separate eu-emitter for blit compiler i965/blorp: move emission of pixel kill into eu-emitter i965: rename tex_ms to tex_cms i965/fs: introduce non-compressed equivalent of tex_cms i965/blorp: move emission of texture lookup into eu-emitter i965/blorp: move emission of rt-write into eu-emitter i965/blorp: move emission of sample combining into eu-emitter i965/blorp: wrap emission of conditional assignment i965/blorp: wrap emission of if-equal-assignment i965/blorp: wrap MOV (/brw_MOV(func, /emit_mov(/) i965/blorp: wrap AND (/brw_AND(func, /emit_and(/) i965/blorp: wrap ADD (/brw_ADD(func, /emit_add(/) i965/blorp: wrap SHR (/brw_SHR(func, /emit_shr(/) i965/blorp: wrap SHL (/brw_SHL(func, /emit_shl(/) i965/blorp: wrap OR (/brw_OR(func, /emit_or(/) i965/blorp: wrap MUL (/brw_MUL(func, /emit_mul(/) i965/blorp: wrap FRC (/brw_FRC(func, /emit_frc(/) i965/blorp: wrap RNDD (/brw_RNDD(func, /emit_rndd(/) i965/blorp: wrap brw_IF/ELSE/ENDIF() into eu-emitter i965/fs: allow unit tests to dump the final patched assembly i965/fs: introduce blorp specific rt-write for fs_generator i965/fs: add support for BRW_OPCODE_AVG in fs_generator i965/blorp: switch eu-emitter to use FS IR and fs_generator src/mesa/drivers/dri/i965/Makefile.sources| 1 + src/mesa/drivers/dri/i965/brw_blorp_blit.cpp | 523 -- src/mesa/drivers/dri/i965/brw_blorp_blit_eu.cpp | 136 ++ src/mesa/drivers/dri/i965/brw_blorp_blit_eu.h | 190 src/mesa/drivers/dri/i965/brw_defines.h | 4 +- src/mesa/drivers/dri/i965/brw_fs.cpp | 2 +- src/mesa/drivers/dri/i965/brw_fs.h| 6 +- src/mesa/drivers/dri/i965/brw_fs_generator.cpp| 44 +- src/mesa/drivers/dri/i965/brw_fs_visitor.cpp | 4 +- src/mesa/drivers/dri/i965/brw_shader.cpp | 11 +- src/mesa/drivers/dri/i965/brw_vec4.cpp| 2 +- src/mesa/drivers/dri/i965/brw_vec4_generator.cpp | 4 +- src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp| 2 +- src/mesa/drivers/dri/i965/gen8_fs_generator.cpp | 8 +- src/mesa/drivers/dri/i965/gen8_vec4_generator.cpp | 4 +- 15 files changed, 589 insertions(+), 352 deletions(-) create mode 100644 src/mesa/drivers/dri/i965/brw_blorp_blit_eu.cpp create mode 100644 src/mesa/drivers/dri/i965/brw_blorp_blit_eu.h -- 1.8.3.1 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [v2 13/23] i965/blorp: wrap SHR (/brw_SHR(func, /emit_shr(/)
Signed-off-by: Topi Pohjolainen topi.pohjolai...@intel.com Reviewed-by: Paul Berry stereotype...@gmail.com --- src/mesa/drivers/dri/i965/brw_blorp_blit.cpp | 24 src/mesa/drivers/dri/i965/brw_blorp_blit_eu.h | 7 +++ 2 files changed, 19 insertions(+), 12 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp b/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp index 392e7a5..715c716 100644 --- a/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp +++ b/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp @@ -1087,7 +1087,7 @@ brw_blorp_blit_program::compute_frag_coords() struct brw_reg t2_uw1 = retype(t2, BRW_REGISTER_TYPE_UW); struct brw_reg r0_ud1 = vec1(retype(R0, BRW_REGISTER_TYPE_UD)); emit_and(t1_ud1, r0_ud1, brw_imm_ud(0xc0)); - brw_SHR(func, t1_ud1, t1_ud1, brw_imm_ud(5)); + emit_shr(t1_ud1, t1_ud1, brw_imm_ud(5)); emit_mov(vec16(t2_uw1), brw_imm_v(0x3210)); emit_add(vec16(S), retype(t1_ud1, BRW_REGISTER_TYPE_UW), stride(t2_uw1, 1, 4, 0)); @@ -1164,7 +1164,7 @@ brw_blorp_blit_program::translate_tiling(bool old_tiled_w, bool new_tiled_w) * Y' = (Y ~0b1) 1 | (X 0b1000) 2 | (X 0b10) 1 */ emit_and(t1, X, brw_imm_uw(0xfff4)); /* X ~0b1011 */ - brw_SHR(func, t1, t1, brw_imm_uw(1)); /* (X ~0b1011) 1 */ + emit_shr(t1, t1, brw_imm_uw(1)); /* (X ~0b1011) 1 */ emit_and(t2, Y, brw_imm_uw(1)); /* Y 0b1 */ brw_SHL(func, t2, t2, brw_imm_uw(2)); /* (Y 0b1) 2 */ brw_OR(func, t1, t1, t2); /* (X ~0b1011) 1 | (Y 0b1) 2 */ @@ -1173,10 +1173,10 @@ brw_blorp_blit_program::translate_tiling(bool old_tiled_w, bool new_tiled_w) emit_and(t1, Y, brw_imm_uw(0xfffe)); /* Y ~0b1 */ brw_SHL(func, t1, t1, brw_imm_uw(1)); /* (Y ~0b1) 1 */ emit_and(t2, X, brw_imm_uw(8)); /* X 0b1000 */ - brw_SHR(func, t2, t2, brw_imm_uw(2)); /* (X 0b1000) 2 */ + emit_shr(t2, t2, brw_imm_uw(2)); /* (X 0b1000) 2 */ brw_OR(func, t1, t1, t2); /* (Y ~0b1) 1 | (X 0b1000) 2 */ emit_and(t2, X, brw_imm_uw(2)); /* X 0b10 */ - brw_SHR(func, t2, t2, brw_imm_uw(1)); /* (X 0b10) 1 */ + emit_shr(t2, t2, brw_imm_uw(1)); /* (X 0b10) 1 */ brw_OR(func, Yp, t1, t2); SWAP_XY_AND_XPYP(); } else { @@ -1198,9 +1198,9 @@ brw_blorp_blit_program::translate_tiling(bool old_tiled_w, bool new_tiled_w) emit_and(t2, X, brw_imm_uw(1)); /* X 0b1 */ brw_OR(func, Xp, t1, t2); emit_and(t1, Y, brw_imm_uw(0xfffc)); /* Y ~0b11 */ - brw_SHR(func, t1, t1, brw_imm_uw(1)); /* (Y ~0b11) 1 */ + emit_shr(t1, t1, brw_imm_uw(1)); /* (Y ~0b11) 1 */ emit_and(t2, X, brw_imm_uw(4)); /* X 0b100 */ - brw_SHR(func, t2, t2, brw_imm_uw(2)); /* (X 0b100) 2 */ + emit_shr(t2, t2, brw_imm_uw(2)); /* (X 0b100) 2 */ brw_OR(func, Yp, t1, t2); SWAP_XY_AND_XPYP(); } @@ -1331,16 +1331,16 @@ brw_blorp_blit_program::decode_msaa(unsigned num_samples, * S = (Y 0b10) | (X 0b10) 1 */ emit_and(t1, X, brw_imm_uw(0xfffc)); /* X ~0b11 */ - brw_SHR(func, t1, t1, brw_imm_uw(1)); /* (X ~0b11) 1 */ + emit_shr(t1, t1, brw_imm_uw(1)); /* (X ~0b11) 1 */ emit_and(t2, X, brw_imm_uw(1)); /* X 0b1 */ brw_OR(func, Xp, t1, t2); emit_and(t1, Y, brw_imm_uw(0xfffc)); /* Y ~0b11 */ - brw_SHR(func, t1, t1, brw_imm_uw(1)); /* (Y ~0b11) 1 */ + emit_shr(t1, t1, brw_imm_uw(1)); /* (Y ~0b11) 1 */ emit_and(t2, Y, brw_imm_uw(1)); /* Y 0b1 */ brw_OR(func, Yp, t1, t2); emit_and(t1, Y, brw_imm_uw(2)); /* Y 0b10 */ emit_and(t2, X, brw_imm_uw(2)); /* X 0b10 */ - brw_SHR(func, t2, t2, brw_imm_uw(1)); /* (X 0b10) 1 */ + emit_shr(t2, t2, brw_imm_uw(1)); /* (X 0b10) 1 */ brw_OR(func, S, t1, t2); break; case 8: @@ -1350,18 +1350,18 @@ brw_blorp_blit_program::decode_msaa(unsigned num_samples, * S = (X 0b100) | (Y 0b10) | (X 0b10) 1 */ emit_and(t1, X, brw_imm_uw(0xfff8)); /* X ~0b111 */ - brw_SHR(func, t1, t1, brw_imm_uw(2)); /* (X ~0b111) 2 */ + emit_shr(t1, t1, brw_imm_uw(2)); /* (X ~0b111) 2 */ emit_and(t2, X, brw_imm_uw(1)); /* X 0b1 */ brw_OR(func, Xp, t1, t2); emit_and(t1, Y, brw_imm_uw(0xfffc)); /* Y ~0b11 */ - brw_SHR(func, t1, t1, brw_imm_uw(1)); /* (Y ~0b11) 1 */ + emit_shr(t1, t1, brw_imm_uw(1)); /* (Y ~0b11) 1 */ emit_and(t2, Y, brw_imm_uw(1)); /* Y 0b1 */ brw_OR(func, Yp, t1, t2); emit_and(t1, X, brw_imm_uw(4)); /* X 0b100 */ emit_and(t2, Y, brw_imm_uw(2)); /* Y 0b10 */ brw_OR(func, t1, t1, t2); /* (X 0b100) | (Y 0b10) */ emit_and(t2, X, brw_imm_uw(2)); /* X 0b10 */ - brw_SHR(func, t2, t2, brw_imm_uw(1));
[Mesa-dev] [v2 22/23] i965/fs: add support for BRW_OPCODE_AVG in fs_generator
Needed for compiling blorp blit programs. Signed-off-by: Topi Pohjolainen topi.pohjolai...@intel.com Reviewed-by: Paul Berry stereotype...@gmail.com --- src/mesa/drivers/dri/i965/brw_fs_generator.cpp | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp index 29050c9..9d647fb 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp @@ -1421,6 +1421,9 @@ fs_generator::generate_code(exec_list *instructions, FILE *dump_file) case BRW_OPCODE_MUL: brw_MUL(p, dst, src[0], src[1]); break; + case BRW_OPCODE_AVG: +brw_AVG(p, dst, src[0], src[1]); +break; case BRW_OPCODE_MACH: brw_set_acc_write_control(p, 1); brw_MACH(p, dst, src[0], src[1]); -- 1.8.3.1 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [v2 11/23] i965/blorp: wrap AND (/brw_AND(func, /emit_and(/)
Signed-off-by: Topi Pohjolainen topi.pohjolai...@intel.com Reviewed-by: Paul Berry stereotype...@gmail.com --- src/mesa/drivers/dri/i965/brw_blorp_blit.cpp | 78 +-- src/mesa/drivers/dri/i965/brw_blorp_blit_eu.h | 7 +++ 2 files changed, 46 insertions(+), 39 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp b/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp index ff32e25..5833d83 100644 --- a/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp +++ b/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp @@ -1086,7 +1086,7 @@ brw_blorp_blit_program::compute_frag_coords() struct brw_reg t1_ud1 = vec1(retype(t1, BRW_REGISTER_TYPE_UD)); struct brw_reg t2_uw1 = retype(t2, BRW_REGISTER_TYPE_UW); struct brw_reg r0_ud1 = vec1(retype(R0, BRW_REGISTER_TYPE_UD)); - brw_AND(func, t1_ud1, r0_ud1, brw_imm_ud(0xc0)); + emit_and(t1_ud1, r0_ud1, brw_imm_ud(0xc0)); brw_SHR(func, t1_ud1, t1_ud1, brw_imm_ud(5)); emit_mov(vec16(t2_uw1), brw_imm_v(0x3210)); brw_ADD(func, vec16(S), retype(t1_ud1, BRW_REGISTER_TYPE_UW), @@ -1165,19 +1165,19 @@ brw_blorp_blit_program::translate_tiling(bool old_tiled_w, bool new_tiled_w) * X' = (X ~0b1011) 1 | (Y 0b1) 2 | X 0b1 (4) * Y' = (Y ~0b1) 1 | (X 0b1000) 2 | (X 0b10) 1 */ - brw_AND(func, t1, X, brw_imm_uw(0xfff4)); /* X ~0b1011 */ + emit_and(t1, X, brw_imm_uw(0xfff4)); /* X ~0b1011 */ brw_SHR(func, t1, t1, brw_imm_uw(1)); /* (X ~0b1011) 1 */ - brw_AND(func, t2, Y, brw_imm_uw(1)); /* Y 0b1 */ + emit_and(t2, Y, brw_imm_uw(1)); /* Y 0b1 */ brw_SHL(func, t2, t2, brw_imm_uw(2)); /* (Y 0b1) 2 */ brw_OR(func, t1, t1, t2); /* (X ~0b1011) 1 | (Y 0b1) 2 */ - brw_AND(func, t2, X, brw_imm_uw(1)); /* X 0b1 */ + emit_and(t2, X, brw_imm_uw(1)); /* X 0b1 */ brw_OR(func, Xp, t1, t2); - brw_AND(func, t1, Y, brw_imm_uw(0xfffe)); /* Y ~0b1 */ + emit_and(t1, Y, brw_imm_uw(0xfffe)); /* Y ~0b1 */ brw_SHL(func, t1, t1, brw_imm_uw(1)); /* (Y ~0b1) 1 */ - brw_AND(func, t2, X, brw_imm_uw(8)); /* X 0b1000 */ + emit_and(t2, X, brw_imm_uw(8)); /* X 0b1000 */ brw_SHR(func, t2, t2, brw_imm_uw(2)); /* (X 0b1000) 2 */ brw_OR(func, t1, t1, t2); /* (Y ~0b1) 1 | (X 0b1000) 2 */ - brw_AND(func, t2, X, brw_imm_uw(2)); /* X 0b10 */ + emit_and(t2, X, brw_imm_uw(2)); /* X 0b10 */ brw_SHR(func, t2, t2, brw_imm_uw(1)); /* (X 0b10) 1 */ brw_OR(func, Yp, t1, t2); SWAP_XY_AND_XPYP(); @@ -1188,20 +1188,20 @@ brw_blorp_blit_program::translate_tiling(bool old_tiled_w, bool new_tiled_w) * X' = (X ~0b101) 1 | (Y 0b10) 2 | (Y 0b1) 1 | X 0b1 * Y' = (Y ~0b11) 1 | (X 0b100) 2 */ - brw_AND(func, t1, X, brw_imm_uw(0xfffa)); /* X ~0b101 */ + emit_and(t1, X, brw_imm_uw(0xfffa)); /* X ~0b101 */ brw_SHL(func, t1, t1, brw_imm_uw(1)); /* (X ~0b101) 1 */ - brw_AND(func, t2, Y, brw_imm_uw(2)); /* Y 0b10 */ + emit_and(t2, Y, brw_imm_uw(2)); /* Y 0b10 */ brw_SHL(func, t2, t2, brw_imm_uw(2)); /* (Y 0b10) 2 */ brw_OR(func, t1, t1, t2); /* (X ~0b101) 1 | (Y 0b10) 2 */ - brw_AND(func, t2, Y, brw_imm_uw(1)); /* Y 0b1 */ + emit_and(t2, Y, brw_imm_uw(1)); /* Y 0b1 */ brw_SHL(func, t2, t2, brw_imm_uw(1)); /* (Y 0b1) 1 */ brw_OR(func, t1, t1, t2); /* (X ~0b101) 1 | (Y 0b10) 2 | (Y 0b1) 1 */ - brw_AND(func, t2, X, brw_imm_uw(1)); /* X 0b1 */ + emit_and(t2, X, brw_imm_uw(1)); /* X 0b1 */ brw_OR(func, Xp, t1, t2); - brw_AND(func, t1, Y, brw_imm_uw(0xfffc)); /* Y ~0b11 */ + emit_and(t1, Y, brw_imm_uw(0xfffc)); /* Y ~0b11 */ brw_SHR(func, t1, t1, brw_imm_uw(1)); /* (Y ~0b11) 1 */ - brw_AND(func, t2, X, brw_imm_uw(4)); /* X 0b100 */ + emit_and(t2, X, brw_imm_uw(4)); /* X 0b100 */ brw_SHR(func, t2, t2, brw_imm_uw(2)); /* (X 0b100) 2 */ brw_OR(func, Yp, t1, t2); SWAP_XY_AND_XPYP(); @@ -1243,22 +1243,22 @@ brw_blorp_blit_program::encode_msaa(unsigned num_samples, * where X' = (X ~0b1) 1 | (S 0b1) 1 | (X 0b1) * Y' = (Y ~0b1) 1 | (S 0b10) | (Y 0b1) */ - brw_AND(func, t1, X, brw_imm_uw(0xfffe)); /* X ~0b1 */ + emit_and(t1, X, brw_imm_uw(0xfffe)); /* X ~0b1 */ if (!s_is_zero) { -brw_AND(func, t2, S, brw_imm_uw(1)); /* S 0b1 */ +emit_and(t2, S, brw_imm_uw(1)); /* S 0b1 */ brw_OR(func, t1, t1, t2); /* (X ~0b1) | (S 0b1) */ } brw_SHL(func, t1, t1, brw_imm_uw(1)); /* (X ~0b1) 1 | (S 0b1) 1 */ - brw_AND(func, t2, X, brw_imm_uw(1)); /* X 0b1 */ + emit_and(t2, X, brw_imm_uw(1)); /* X 0b1 */ brw_OR(func, Xp, t1, t2); -
[Mesa-dev] [PATCH RFC 04/11] glsl: add dead branch analysis
Dead branch analysis determines when the then or else branches of an if statement will always terminate in a loop jump or return statement, and hence once we enter that branch we will never get to the statements after the if. This is useful for determining the dominance tree, which is needed for the conversion to SSA, as well as various other SSA-based optimizations. --- src/glsl/Makefile.sources | 1 + src/glsl/ir_dead_branches.cpp | 226 ++ src/glsl/ir_dead_branches.h | 78 +++ 3 files changed, 305 insertions(+) create mode 100644 src/glsl/ir_dead_branches.cpp create mode 100644 src/glsl/ir_dead_branches.h diff --git a/src/glsl/Makefile.sources b/src/glsl/Makefile.sources index e69c1ac..a43bfa7 100644 --- a/src/glsl/Makefile.sources +++ b/src/glsl/Makefile.sources @@ -33,6 +33,7 @@ LIBGLSL_FILES = \ $(GLSL_SRCDIR)/ir_clone.cpp \ $(GLSL_SRCDIR)/ir_constant_expression.cpp \ $(GLSL_SRCDIR)/ir.cpp \ + $(GLSL_SRCDIR)/ir_dead_branches.cpp \ $(GLSL_SRCDIR)/ir_equals.cpp \ $(GLSL_SRCDIR)/ir_expression_flattening.cpp \ $(GLSL_SRCDIR)/ir_function_can_inline.cpp \ diff --git a/src/glsl/ir_dead_branches.cpp b/src/glsl/ir_dead_branches.cpp new file mode 100644 index 000..f86f009 --- /dev/null +++ b/src/glsl/ir_dead_branches.cpp @@ -0,0 +1,226 @@ +/* + * Copyright © 2013 Connor Abbott (con...@abbott.cx) + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the Software), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED AS IS, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include ir.h +#include ir_visitor.h +#include ir_dead_branches.h +#include main/hash_table.h + +/** + * \file ir_dead_branches.h + * + * Provides a visitor which determines, for each if instruction, whether + * control will never flow the from the then-block or else-block + * to the next instruction due to jump statements (break, continue, return, + * discard). + */ + +/* + * Note that we keep track of whether a given branch is dead due to a return- + * like statement (return or discard) or due to a loop jump. For example, + * imagine you have a control flow like the following: + * + * if (...) { + *while (...) { + * if (...) { + * ... + * continue; + * } else { + * ... + * return; + * } + *} + * } + * + * After processing the inner if statement, we see that both branches are dead; + * normally, this would result in declaring the then-branch of the outer if + * statement dead, but in this case, there is a loop in between the inner and + * outer if statement, so the branch can in fact be taken. However, if the + * continue statement were a discard or return instead, then control would + * always leave the function as soon as the while loop was reached, so in this + * case the dead branch must skip across the loop. So we keep track of whether + * the immediately enclosing control statement is a loop (in_loop), and if we + * are, then after processing an if statement, we only propagate the dead branch + * through the loop if both branches of the inner if statement are dead due to + * a return or discard statement (then_dead_return and else_dead_return). + */ + +ir_dead_branches_visitor::ir_dead_branches_visitor() +{ + this-ht = _mesa_hash_table_create(NULL, _mesa_key_pointer_equal); + this-in_loop = false; + this-outer_if = NULL; + this-in_then = false; +} + +static void +free_entry(struct hash_entry *entry) +{ + ir_dead_branches *dead_branches = (ir_dead_branches *) entry-data; + delete dead_branches; +} + +ir_dead_branches_visitor::~ir_dead_branches_visitor() +{ + _mesa_hash_table_destroy(this-ht, free_entry); +} + +ir_dead_branches::ir_dead_branches(ir_if *ir) +{ + this-ir = ir; + this-then_dead = false; + this-else_dead = false; + this-then_dead_return = false; + this-else_dead_return = false; +} + +ir_dead_branches * +ir_dead_branches_visitor::get_dead_branches(ir_if *ir) +{ + assert(ir); + + struct
[Mesa-dev] [v2 19/23] i965/blorp: wrap brw_IF/ELSE/ENDIF() into eu-emitter
v2 (Paul): renamed emit_if() to emit_cmp_if() Signed-off-by: Topi Pohjolainen topi.pohjolai...@intel.com Reviewed-by: Paul Berry stereotype...@gmail.com --- src/mesa/drivers/dri/i965/brw_blorp_blit.cpp | 14 +- src/mesa/drivers/dri/i965/brw_blorp_blit_eu.h | 18 ++ 2 files changed, 23 insertions(+), 9 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp b/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp index aae0704..6454d2a 100644 --- a/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp +++ b/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp @@ -1548,9 +1548,7 @@ brw_blorp_blit_program::manual_blend_average(unsigned num_samples) * Since we have already sampled from sample 0, all we need to do is * skip the remaining fetches and averaging if MCS is zero. */ - brw_CMP(func, vec16(brw_null_reg()), BRW_CONDITIONAL_NZ, - mcs_data, brw_imm_ud(0)); - brw_IF(func, BRW_EXECUTE_16); + emit_cmp_if(BRW_CONDITIONAL_NZ, mcs_data, brw_imm_ud(0)); } /* Do count_trailing_one_bits(i) times */ @@ -1583,7 +1581,7 @@ brw_blorp_blit_program::manual_blend_average(unsigned num_samples) } if (key-tex_layout == INTEL_MSAA_LAYOUT_CMS) - brw_ENDIF(func); + emit_endif(); } void @@ -1673,23 +1671,21 @@ brw_blorp_blit_program::manual_blend_bilinear(unsigned num_samples) if (num_samples == 8) { /* Map the sample index to a sample number */ - brw_CMP(func, vec16(brw_null_reg()), BRW_CONDITIONAL_L, - S, brw_imm_d(4)); - brw_IF(func, BRW_EXECUTE_16); + emit_cmp_if(BRW_CONDITIONAL_L, S, brw_imm_d(4)); { emit_mov(vec16(t2), brw_imm_d(5)); emit_if_eq_mov(S, 1, vec16(t2), 2); emit_if_eq_mov(S, 2, vec16(t2), 4); emit_if_eq_mov(S, 3, vec16(t2), 6); } - brw_ELSE(func); + emit_else(); { emit_mov(vec16(t2), brw_imm_d(0)); emit_if_eq_mov(S, 5, vec16(t2), 3); emit_if_eq_mov(S, 6, vec16(t2), 7); emit_if_eq_mov(S, 7, vec16(t2), 1); } - brw_ENDIF(func); + emit_endif(); emit_mov(vec16(S), t2); } texel_fetch(texture_data[i]); diff --git a/src/mesa/drivers/dri/i965/brw_blorp_blit_eu.h b/src/mesa/drivers/dri/i965/brw_blorp_blit_eu.h index 07c96b0..736f5b0 100644 --- a/src/mesa/drivers/dri/i965/brw_blorp_blit_eu.h +++ b/src/mesa/drivers/dri/i965/brw_blorp_blit_eu.h @@ -149,6 +149,24 @@ protected: brw_RNDD(func, dst, src); } + inline void emit_cmp_if(int op, + const struct brw_reg x, + const struct brw_reg y) + { + brw_CMP(func, vec16(brw_null_reg()), op, x, y); + brw_IF(func, BRW_EXECUTE_16); + } + + inline void emit_else(void) + { + brw_ELSE(func); + } + + inline void emit_endif(void) + { + brw_ENDIF(func); + } + void *mem_ctx; struct brw_compile func; }; -- 1.8.3.1 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH RFC 05/11] glsl: add loop jump visitor
This visitor will allow us to determine all the loop jumps that correspond to each loop. In SSA form, each input to a phi node is associated with a predecessor basic block. In the case of phi nodes at the beginning and end of loops, these predecessor blocks will include all blocks that end with a loop_jump (break or continue), and so in order to insert phi nodes we must know all the loop_jump instructions that correspond to each loop. --- src/glsl/Makefile.sources | 1 + src/glsl/ir_loop_jumps.cpp | 129 + src/glsl/ir_loop_jumps.h | 71 + 3 files changed, 201 insertions(+) create mode 100644 src/glsl/ir_loop_jumps.cpp create mode 100644 src/glsl/ir_loop_jumps.h diff --git a/src/glsl/Makefile.sources b/src/glsl/Makefile.sources index a43bfa7..869158a 100644 --- a/src/glsl/Makefile.sources +++ b/src/glsl/Makefile.sources @@ -42,6 +42,7 @@ LIBGLSL_FILES = \ $(GLSL_SRCDIR)/ir_hierarchical_visitor.cpp \ $(GLSL_SRCDIR)/ir_hv_accept.cpp \ $(GLSL_SRCDIR)/ir_import_prototypes.cpp \ + $(GLSL_SRCDIR)/ir_loop_jumps.cpp \ $(GLSL_SRCDIR)/ir_print_visitor.cpp \ $(GLSL_SRCDIR)/ir_reader.cpp \ $(GLSL_SRCDIR)/ir_rvalue_visitor.cpp \ diff --git a/src/glsl/ir_loop_jumps.cpp b/src/glsl/ir_loop_jumps.cpp new file mode 100644 index 000..1386340 --- /dev/null +++ b/src/glsl/ir_loop_jumps.cpp @@ -0,0 +1,129 @@ +/* + * Copyright © 2013 Connor Abbott (con...@abbott.cx) + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the Software), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED AS IS, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include ir.h +#include ir_visitor.h +#include ir_loop_jumps.h +#include main/hash_table.h + +/** + * \file ir_loop_jumps.h + * + * Provides a visitor that collects all the continue and break statements for + * each loop. + */ + +ir_loop_jumps::ir_loop_jumps(ir_loop *loop) : loop(loop) +{ + this-mem_ctx = ralloc_context(NULL); +} + +ir_loop_jumps::~ir_loop_jumps() +{ + ralloc_free(this-mem_ctx); +} + +void +ir_loop_jumps::add_continue(ir_loop_jump *ir) +{ + ir_loop_jump_entry *entry = new(this-mem_ctx) ir_loop_jump_entry(); + entry-ir = ir; + this-continues.push_tail(entry); +} + +void +ir_loop_jumps::add_break(ir_loop_jump *ir) +{ + ir_loop_jump_entry *entry = new(this-mem_ctx) ir_loop_jump_entry(); + entry-ir = ir; + this-breaks.push_tail(entry); +} + +ir_loop_jumps_visitor::ir_loop_jumps_visitor() +{ + this-ht = _mesa_hash_table_create(NULL, _mesa_key_pointer_equal); + this-outer_loop = NULL; +} + +static void +free_entry(struct hash_entry *entry) +{ + ir_loop_jumps *loop_jumps = (ir_loop_jumps *) entry-data; + delete loop_jumps; +} + +ir_loop_jumps_visitor::~ir_loop_jumps_visitor() +{ + _mesa_hash_table_destroy(this-ht, free_entry); +} + +ir_visitor_status +ir_loop_jumps_visitor::visit_enter(ir_loop *ir) +{ + ir_loop_jumps *loop_jumps = new ir_loop_jumps(ir); + _mesa_hash_table_insert(this-ht, _mesa_hash_pointer(ir), ir, loop_jumps); + + ir_loop *old_outer_loop = this-outer_loop; + this-outer_loop = ir; + + visit_list_elements(this, ir-body_instructions); + + this-outer_loop = old_outer_loop; + return visit_continue_with_parent; +} + +ir_visitor_status +ir_loop_jumps_visitor::visit(ir_loop_jump *ir) +{ + ir_loop_jumps *loop_jumps = this-get_loop_jumps(this-outer_loop); + switch (ir-mode) { + case ir_loop_jump::jump_break: +loop_jumps-add_break(ir); +break; + + case ir_loop_jump::jump_continue: +loop_jumps-add_continue(ir); +break; + + default: +assert(!unknown loop jump mode); +break; + } + + return visit_continue; +} + +ir_loop_jumps * +ir_loop_jumps_visitor::get_loop_jumps(ir_loop *ir) +{ + assert(ir); + + struct hash_entry *e = _mesa_hash_table_search(this-ht, + _mesa_hash_pointer(ir), + ir); + if (e) +
[Mesa-dev] [PATCH RFC 09/11] glsl: add pass to convert GLSL IR to SSA form
opt_to_ssa will convert temporaries and local variables to SSA form, although for now it can't handle array and record dereferences. --- src/glsl/Makefile.sources |1 + src/glsl/ir_optimization.h |2 + src/glsl/opt_to_ssa.cpp| 1155 3 files changed, 1158 insertions(+) create mode 100644 src/glsl/opt_to_ssa.cpp diff --git a/src/glsl/Makefile.sources b/src/glsl/Makefile.sources index 869158a..961784b 100644 --- a/src/glsl/Makefile.sources +++ b/src/glsl/Makefile.sources @@ -100,6 +100,7 @@ LIBGLSL_FILES = \ $(GLSL_SRCDIR)/opt_redundant_jumps.cpp \ $(GLSL_SRCDIR)/opt_structure_splitting.cpp \ $(GLSL_SRCDIR)/opt_swizzle_swizzle.cpp \ + $(GLSL_SRCDIR)/opt_to_ssa.cpp \ $(GLSL_SRCDIR)/opt_tree_grafting.cpp \ $(GLSL_SRCDIR)/opt_vectorize.cpp \ $(GLSL_SRCDIR)/s_expression.cpp \ diff --git a/src/glsl/ir_optimization.h b/src/glsl/ir_optimization.h index 055d655..92c8b57 100644 --- a/src/glsl/ir_optimization.h +++ b/src/glsl/ir_optimization.h @@ -65,6 +65,8 @@ enum lower_packing_builtins_op { LOWER_UNPACK_UNORM_4x8 = 0x0800 }; +void convert_to_ssa(exec_list *instructions); + bool do_common_optimization(exec_list *ir, bool linked, bool uniform_locations_assigned, unsigned max_unroll_iterations, diff --git a/src/glsl/opt_to_ssa.cpp b/src/glsl/opt_to_ssa.cpp new file mode 100644 index 000..c1044f6 --- /dev/null +++ b/src/glsl/opt_to_ssa.cpp @@ -0,0 +1,1155 @@ +/* + * Copyright © 2013 Connor Abbott (con...@abbott.cx) + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the Software), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED AS IS, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include ir.h +#include ir_optimization.h +#include ir_hierarchical_visitor.h +#include ir_dead_branches.h +#include ir_loop_jumps.h +#include ir_builder.h +#include ralloc.h +#include glsl_types.h +#include main/hash_table.h + +/** + * \file opt_to_ssa.cpp + * + * This pass will convert all temporaries and local variables to SSA + * temporaries, except for variables which are derefenced as an array or + * structure (which we cannot support in SSA form). The algorithm is loosely + * based on Efficiently Computing Static Single Assignment Form and the + * Control Dependence Graph by Cytron et. al., although there are a number of + * differences caused by the fact that we are operating on a hierachical tree + * of if's and loops instead of the graph of basic blocks that Cytron et. al. + * assume. In particular, instead of explicitly constructing the dominance tree, + * we use an approximation simple enough that all the information we need can + * be found on the fly. The approximation we use is this: + * + * - The instruction before an if statement dominates the then and else branches + * as well as the instructions after the branch, unless one of the branches is + * dead. If, for example, the then branch is dead, then the instruction before + * the if statement dominates the then branch and the else branch, and the else + * branch dominates the instruction after the if statement because if we get + * past the branch then we know we must have gone through the else branch. + * + * - The instruction before the loop dominates the instructions inside the loop + * as well as the instructions after the loop. Here is where the approximation + * lies: really, since the loop is guarenteed to execute at least once, the + * instructions after the loop can potentially be dominated by an instruction + * inside the loop. Computing that instruction, though, would be complicated, + * and in the end it doesn't hurt much if we ignore that detail. In the end, we + * may have some phi nodes where all the sources are the same, but these can + * easily be optimized away. + * + * The iterated dominance frontier of an instruction can then be calculated by + * walking up the stack of control flow elements (if's and loops) that
[Mesa-dev] [v2 09/23] i965/blorp: wrap emission of if-equal-assignment
Signed-off-by: Topi Pohjolainen topi.pohjolai...@intel.com Reviewed-by: Paul Berry stereotype...@gmail.com --- src/mesa/drivers/dri/i965/brw_blorp_blit.cpp | 30 ++- src/mesa/drivers/dri/i965/brw_blorp_blit_eu.h | 6 ++ 2 files changed, 12 insertions(+), 24 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp b/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp index 356bb92..da10cf0 100644 --- a/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp +++ b/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp @@ -1682,34 +1682,16 @@ brw_blorp_blit_program::manual_blend_bilinear(unsigned num_samples) brw_IF(func, BRW_EXECUTE_16); { brw_MOV(func, vec16(t2), brw_imm_d(5)); -brw_CMP(func, vec16(brw_null_reg()), BRW_CONDITIONAL_EQ, -S, brw_imm_d(1)); -brw_MOV(func, vec16(t2), brw_imm_d(2)); -brw_set_predicate_control(func, BRW_PREDICATE_NONE); -brw_CMP(func, vec16(brw_null_reg()), BRW_CONDITIONAL_EQ, -S, brw_imm_d(2)); -brw_MOV(func, vec16(t2), brw_imm_d(4)); -brw_set_predicate_control(func, BRW_PREDICATE_NONE); -brw_CMP(func, vec16(brw_null_reg()), BRW_CONDITIONAL_EQ, -S, brw_imm_d(3)); -brw_MOV(func, vec16(t2), brw_imm_d(6)); -brw_set_predicate_control(func, BRW_PREDICATE_NONE); +emit_if_eq_mov(S, 1, vec16(t2), 2); +emit_if_eq_mov(S, 2, vec16(t2), 4); +emit_if_eq_mov(S, 3, vec16(t2), 6); } brw_ELSE(func); { brw_MOV(func, vec16(t2), brw_imm_d(0)); -brw_CMP(func, vec16(brw_null_reg()), BRW_CONDITIONAL_EQ, -S, brw_imm_d(5)); -brw_MOV(func, vec16(t2), brw_imm_d(3)); -brw_set_predicate_control(func, BRW_PREDICATE_NONE); -brw_CMP(func, vec16(brw_null_reg()), BRW_CONDITIONAL_EQ, -S, brw_imm_d(6)); -brw_MOV(func, vec16(t2), brw_imm_d(7)); -brw_set_predicate_control(func, BRW_PREDICATE_NONE); -brw_CMP(func, vec16(brw_null_reg()), BRW_CONDITIONAL_EQ, -S, brw_imm_d(7)); -brw_MOV(func, vec16(t2), brw_imm_d(1)); -brw_set_predicate_control(func, BRW_PREDICATE_NONE); +emit_if_eq_mov(S, 5, vec16(t2), 3); +emit_if_eq_mov(S, 6, vec16(t2), 7); +emit_if_eq_mov(S, 7, vec16(t2), 1); } brw_ENDIF(func); brw_MOV(func, vec16(S), t2); diff --git a/src/mesa/drivers/dri/i965/brw_blorp_blit_eu.h b/src/mesa/drivers/dri/i965/brw_blorp_blit_eu.h index 555b6d3..34e8da9 100644 --- a/src/mesa/drivers/dri/i965/brw_blorp_blit_eu.h +++ b/src/mesa/drivers/dri/i965/brw_blorp_blit_eu.h @@ -68,6 +68,12 @@ protected: brw_set_predicate_control(func, BRW_PREDICATE_NONE); } + inline void emit_if_eq_mov(const struct brw_reg x, unsigned y, + const struct brw_reg dst, unsigned src) + { + emit_cond_mov(x, brw_imm_d(y), BRW_CONDITIONAL_EQ, dst, brw_imm_d(src)); + } + void *mem_ctx; struct brw_compile func; }; -- 1.8.3.1 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [v2 04/23] i965/fs: introduce non-compressed equivalent of tex_cms
v2: introduces 'SHADER_OPCODE_TXF_UMS' also for gen8 Signed-off-by: Topi Pohjolainen topi.pohjolai...@intel.com Reviewed-by: Paul Berry stereotype...@gmail.com (v1) --- src/mesa/drivers/dri/i965/brw_defines.h | 1 + src/mesa/drivers/dri/i965/brw_fs_generator.cpp | 5 + src/mesa/drivers/dri/i965/brw_shader.cpp| 3 +++ src/mesa/drivers/dri/i965/gen8_fs_generator.cpp | 4 4 files changed, 13 insertions(+) diff --git a/src/mesa/drivers/dri/i965/brw_defines.h b/src/mesa/drivers/dri/i965/brw_defines.h index 12f7e40..7beda72 100644 --- a/src/mesa/drivers/dri/i965/brw_defines.h +++ b/src/mesa/drivers/dri/i965/brw_defines.h @@ -771,6 +771,7 @@ enum opcode { SHADER_OPCODE_TXS, FS_OPCODE_TXB, SHADER_OPCODE_TXF_CMS, + SHADER_OPCODE_TXF_UMS, SHADER_OPCODE_TXF_MCS, SHADER_OPCODE_LOD, SHADER_OPCODE_TG4, diff --git a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp index d257748..a92b8ba 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp @@ -431,6 +431,10 @@ fs_generator::generate_tex(fs_inst *inst, struct brw_reg dst, struct brw_reg src else msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_LD; break; + case SHADER_OPCODE_TXF_UMS: + assert(brw-gen = 7); + msg_type = GEN7_SAMPLER_MESSAGE_SAMPLE_LD2DSS; + break; case SHADER_OPCODE_TXF_MCS: assert(brw-gen = 7); msg_type = GEN7_SAMPLER_MESSAGE_SAMPLE_LD_MCS; @@ -1658,6 +1662,7 @@ fs_generator::generate_code(exec_list *instructions) case SHADER_OPCODE_TXD: case SHADER_OPCODE_TXF: case SHADER_OPCODE_TXF_CMS: + case SHADER_OPCODE_TXF_UMS: case SHADER_OPCODE_TXF_MCS: case SHADER_OPCODE_TXL: case SHADER_OPCODE_TXS: diff --git a/src/mesa/drivers/dri/i965/brw_shader.cpp b/src/mesa/drivers/dri/i965/brw_shader.cpp index b74d6e8..b38032e 100644 --- a/src/mesa/drivers/dri/i965/brw_shader.cpp +++ b/src/mesa/drivers/dri/i965/brw_shader.cpp @@ -442,6 +442,8 @@ brw_instruction_name(enum opcode op) return txb; case SHADER_OPCODE_TXF_CMS: return txf_cms; + case SHADER_OPCODE_TXF_UMS: + return txf_ums; case SHADER_OPCODE_TXF_MCS: return txf_mcs; case SHADER_OPCODE_TG4: @@ -539,6 +541,7 @@ backend_instruction::is_tex() opcode == SHADER_OPCODE_TXD || opcode == SHADER_OPCODE_TXF || opcode == SHADER_OPCODE_TXF_CMS || + opcode == SHADER_OPCODE_TXF_UMS || opcode == SHADER_OPCODE_TXF_MCS || opcode == SHADER_OPCODE_TXL || opcode == SHADER_OPCODE_TXS || diff --git a/src/mesa/drivers/dri/i965/gen8_fs_generator.cpp b/src/mesa/drivers/dri/i965/gen8_fs_generator.cpp index 4e70534..6c710bc 100644 --- a/src/mesa/drivers/dri/i965/gen8_fs_generator.cpp +++ b/src/mesa/drivers/dri/i965/gen8_fs_generator.cpp @@ -197,6 +197,9 @@ gen8_fs_generator::generate_tex(fs_inst *ir, case SHADER_OPCODE_TXF_CMS: msg_type = GEN7_SAMPLER_MESSAGE_SAMPLE_LD2DMS; break; + case SHADER_OPCODE_TXF_UMS: + msg_type = GEN7_SAMPLER_MESSAGE_SAMPLE_LD2DSS; + break; case SHADER_OPCODE_TXF_MCS: msg_type = GEN7_SAMPLER_MESSAGE_SAMPLE_LD_MCS; break; @@ -864,6 +867,7 @@ gen8_fs_generator::generate_code(exec_list *instructions) case SHADER_OPCODE_TXD: case SHADER_OPCODE_TXF: case SHADER_OPCODE_TXF_CMS: + case SHADER_OPCODE_TXF_UMS: case SHADER_OPCODE_TXF_MCS: case SHADER_OPCODE_TXL: case SHADER_OPCODE_TXS: -- 1.8.3.1 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [v2 02/23] i965/blorp: move emission of pixel kill into eu-emitter
The combination of four separate comparison operations and and the masked and require special treatment when moving to FS LIR. Signed-off-by: Topi Pohjolainen topi.pohjolai...@intel.com Reviewed-by: Paul Berry stereotype...@gmail.com --- src/mesa/drivers/dri/i965/brw_blorp_blit.cpp| 28 +++-- src/mesa/drivers/dri/i965/brw_blorp_blit_eu.cpp | 28 + src/mesa/drivers/dri/i965/brw_blorp_blit_eu.h | 7 +++ 3 files changed, 38 insertions(+), 25 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp b/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp index f9c355b..03fabd6 100644 --- a/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp +++ b/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp @@ -640,7 +640,6 @@ private: void translate_tiling(bool old_tiled_w, bool new_tiled_w); void encode_msaa(unsigned num_samples, intel_msaa_layout layout); void decode_msaa(unsigned num_samples, intel_msaa_layout layout); - void kill_if_outside_dst_rect(); void translate_dst_to_src(); void clamp_tex_coords(struct brw_reg regX, struct brw_reg regY, struct brw_reg clampX0, struct brw_reg clampY0, @@ -833,7 +832,9 @@ brw_blorp_blit_program::compile(struct brw_context *brw, */ if (key-use_kill) - kill_if_outside_dst_rect(); + emit_kill_if_outside_rect(x_coords[xy_coord_index], +y_coords[xy_coord_index], +dst_x0, dst_x1, dst_y0, dst_y1); /* Next, apply a translation to obtain coordinates in the source image. */ translate_dst_to_src(); @@ -1375,29 +1376,6 @@ brw_blorp_blit_program::decode_msaa(unsigned num_samples, } /** - * Emit code that kills pixels whose X and Y coordinates are outside the - * boundary of the rectangle defined by the push constants (dst_x0, dst_y0, - * dst_x1, dst_y1). - */ -void -brw_blorp_blit_program::kill_if_outside_dst_rect() -{ - struct brw_reg f0 = brw_flag_reg(0, 0); - struct brw_reg g1 = retype(brw_vec1_grf(1, 7), BRW_REGISTER_TYPE_UW); - struct brw_reg null32 = vec16(retype(brw_null_reg(), BRW_REGISTER_TYPE_UD)); - - brw_CMP(func, null32, BRW_CONDITIONAL_GE, X, dst_x0); - brw_CMP(func, null32, BRW_CONDITIONAL_GE, Y, dst_y0); - brw_CMP(func, null32, BRW_CONDITIONAL_L, X, dst_x1); - brw_CMP(func, null32, BRW_CONDITIONAL_L, Y, dst_y1); - - brw_set_predicate_control(func, BRW_PREDICATE_NONE); - - struct brw_instruction *inst = brw_AND(func, g1, f0, g1); - inst-header.mask_control = BRW_MASK_DISABLE; -} - -/** * Emit code to translate from destination (X, Y) coordinates to source (X, Y) * coordinates. */ diff --git a/src/mesa/drivers/dri/i965/brw_blorp_blit_eu.cpp b/src/mesa/drivers/dri/i965/brw_blorp_blit_eu.cpp index 8d723d6..161c679 100644 --- a/src/mesa/drivers/dri/i965/brw_blorp_blit_eu.cpp +++ b/src/mesa/drivers/dri/i965/brw_blorp_blit_eu.cpp @@ -63,3 +63,31 @@ brw_blorp_eu_emitter::get_program(unsigned *program_size, FILE *dump_file) return brw_get_program(func, program_size); } + +/** + * Emit code that kills pixels whose X and Y coordinates are outside the + * boundary of the rectangle defined by the push constants (dst_x0, dst_y0, + * dst_x1, dst_y1). + */ +void +brw_blorp_eu_emitter::emit_kill_if_outside_rect(const struct brw_reg x, +const struct brw_reg y, +const struct brw_reg dst_x0, +const struct brw_reg dst_x1, +const struct brw_reg dst_y0, +const struct brw_reg dst_y1) +{ + struct brw_reg f0 = brw_flag_reg(0, 0); + struct brw_reg g1 = retype(brw_vec1_grf(1, 7), BRW_REGISTER_TYPE_UW); + struct brw_reg null32 = vec16(retype(brw_null_reg(), BRW_REGISTER_TYPE_UD)); + + brw_CMP(func, null32, BRW_CONDITIONAL_GE, x, dst_x0); + brw_CMP(func, null32, BRW_CONDITIONAL_GE, y, dst_y0); + brw_CMP(func, null32, BRW_CONDITIONAL_L, x, dst_x1); + brw_CMP(func, null32, BRW_CONDITIONAL_L, y, dst_y1); + + brw_set_predicate_control(func, BRW_PREDICATE_NONE); + + struct brw_instruction *inst = brw_AND(func, g1, f0, g1); + inst-header.mask_control = BRW_MASK_DISABLE; +} diff --git a/src/mesa/drivers/dri/i965/brw_blorp_blit_eu.h b/src/mesa/drivers/dri/i965/brw_blorp_blit_eu.h index 1bcb0d9..3f74e0e 100644 --- a/src/mesa/drivers/dri/i965/brw_blorp_blit_eu.h +++ b/src/mesa/drivers/dri/i965/brw_blorp_blit_eu.h @@ -35,6 +35,13 @@ protected: const unsigned *get_program(unsigned *program_size, FILE *dump_file); + void emit_kill_if_outside_rect(const struct brw_reg x, + const struct brw_reg y, + const struct brw_reg dst_x0, + const struct brw_reg dst_x1, + const struct brw_reg dst_y0, +
[Mesa-dev] [v2 07/23] i965/blorp: move emission of sample combining into eu-emitter
v2 (Paul): pass the combining opcode as an argument to emit_combine(). This keeps manual_blend_average() selfcontained documentation wise. Signed-off-by: Topi Pohjolainen topi.pohjolai...@intel.com Reviewed-by: Paul Berry stereotype...@gmail.com (v1) --- src/mesa/drivers/dri/i965/brw_blorp_blit.cpp| 14 +- src/mesa/drivers/dri/i965/brw_blorp_blit_eu.cpp | 14 ++ src/mesa/drivers/dri/i965/brw_blorp_blit_eu.h | 5 + 3 files changed, 24 insertions(+), 9 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp b/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp index 4bbdf3d..b5f1907 100644 --- a/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp +++ b/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp @@ -1534,12 +1534,6 @@ brw_blorp_blit_program::manual_blend_average(unsigned num_samples) * For integer formats, we replace the add operations with average * operations and skip the final division. */ - typedef struct brw_instruction *(*brw_op2_ptr)(struct brw_compile *, - struct brw_reg, - struct brw_reg, - struct brw_reg); - brw_op2_ptr combine_op = - key-texture_data_type == BRW_REGISTER_TYPE_F ? brw_ADD : brw_AVG; unsigned stack_depth = 0; for (unsigned i = 0; i num_samples; ++i) { assert(stack_depth == _mesa_bitcount(i)); /* Loop invariant */ @@ -1581,9 +1575,11 @@ brw_blorp_blit_program::manual_blend_average(unsigned num_samples) /* TODO: should use a smaller loop bound for non_RGBA formats */ for (int k = 0; k 4; ++k) { -combine_op(func, offset(texture_data[stack_depth - 1], 2*k), - offset(vec8(texture_data[stack_depth - 1]), 2*k), - offset(vec8(texture_data[stack_depth]), 2*k)); +emit_combine(key-texture_data_type == BRW_REGISTER_TYPE_F ? +BRW_OPCODE_ADD : BRW_OPCODE_AVG, + offset(texture_data[stack_depth - 1], 2*k), + offset(vec8(texture_data[stack_depth - 1]), 2*k), + offset(vec8(texture_data[stack_depth]), 2*k)); } } } diff --git a/src/mesa/drivers/dri/i965/brw_blorp_blit_eu.cpp b/src/mesa/drivers/dri/i965/brw_blorp_blit_eu.cpp index df8d63d..9b63458 100644 --- a/src/mesa/drivers/dri/i965/brw_blorp_blit_eu.cpp +++ b/src/mesa/drivers/dri/i965/brw_blorp_blit_eu.cpp @@ -152,3 +152,17 @@ brw_blorp_eu_emitter::emit_render_target_write(const struct brw_reg src0, true /* eot */, use_header); } + +void +brw_blorp_eu_emitter::emit_combine(enum opcode combine_opcode, + const struct brw_reg dst, + const struct brw_reg src_1, + const struct brw_reg src_2) +{ + assert(combine_opcode == BRW_OPCODE_ADD || combine_opcode == BRW_OPCODE_AVG); + + if (combine_opcode == BRW_OPCODE_ADD) + brw_ADD(func, dst, src_1, src_2); + else + brw_AVG(func, dst, src_1, src_2); +} diff --git a/src/mesa/drivers/dri/i965/brw_blorp_blit_eu.h b/src/mesa/drivers/dri/i965/brw_blorp_blit_eu.h index 5f0c8cf..55e05f7 100644 --- a/src/mesa/drivers/dri/i965/brw_blorp_blit_eu.h +++ b/src/mesa/drivers/dri/i965/brw_blorp_blit_eu.h @@ -52,6 +52,11 @@ protected: unsigned msg_length, bool use_header); + void emit_combine(enum opcode combine_opcode, + const struct brw_reg dst, + const struct brw_reg src_1, + const struct brw_reg src_2); + void *mem_ctx; struct brw_compile func; }; -- 1.8.3.1 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [v2 18/23] i965/blorp: wrap RNDD (/brw_RNDD(func, /emit_rndd(/)
Signed-off-by: Topi Pohjolainen topi.pohjolai...@intel.com Reviewed-by: Paul Berry stereotype...@gmail.com --- src/mesa/drivers/dri/i965/brw_blorp_blit.cpp | 4 ++-- src/mesa/drivers/dri/i965/brw_blorp_blit_eu.h | 6 ++ 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp b/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp index 4d0b882..aae0704 100644 --- a/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp +++ b/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp @@ -1417,8 +1417,8 @@ brw_blorp_blit_program::translate_dst_to_src() emit_frc(y_frac, Y_f); /* Round the float coordinates down to nearest integer */ - brw_RNDD(func, Xp_f, X_f); - brw_RNDD(func, Yp_f, Y_f); + emit_rndd(Xp_f, X_f); + emit_rndd(Yp_f, Y_f); emit_mul(X_f, Xp_f, brw_imm_f(1 / key-x_scale)); emit_mul(Y_f, Yp_f, brw_imm_f(1 / key-y_scale)); SWAP_XY_AND_XPYP(); diff --git a/src/mesa/drivers/dri/i965/brw_blorp_blit_eu.h b/src/mesa/drivers/dri/i965/brw_blorp_blit_eu.h index f22207d..07c96b0 100644 --- a/src/mesa/drivers/dri/i965/brw_blorp_blit_eu.h +++ b/src/mesa/drivers/dri/i965/brw_blorp_blit_eu.h @@ -143,6 +143,12 @@ protected: brw_FRC(func, dst, src); } + inline void emit_rndd(const struct brw_reg dst, + const struct brw_reg src) + { + brw_RNDD(func, dst, src); + } + void *mem_ctx; struct brw_compile func; }; -- 1.8.3.1 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH RFC 01/11] glsl: fix handling of quadop_vector constant expression
We forgot to handle the case where the base type was a boolean. --- src/glsl/ir_constant_expression.cpp | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/glsl/ir_constant_expression.cpp b/src/glsl/ir_constant_expression.cpp index f811fd1..9edc378 100644 --- a/src/glsl/ir_constant_expression.cpp +++ b/src/glsl/ir_constant_expression.cpp @@ -1501,6 +1501,9 @@ ir_expression::constant_expression_value(struct hash_table *variable_context) case GLSL_TYPE_FLOAT: data.f[c] = op[c]-value.f[0]; break; +case GLSL_TYPE_BOOL: + data.b[c] = op[c]-value.b[0]; + break; default: assert(0); } -- 1.8.3.1 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [v2 15/23] i965/blorp: wrap OR (/brw_OR(func, /emit_or(/)
Signed-off-by: Topi Pohjolainen topi.pohjolai...@intel.com Reviewed-by: Paul Berry stereotype...@gmail.com --- src/mesa/drivers/dri/i965/brw_blorp_blit.cpp | 48 +-- src/mesa/drivers/dri/i965/brw_blorp_blit_eu.h | 7 2 files changed, 31 insertions(+), 24 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp b/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp index 432c11c..f9d1079 100644 --- a/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp +++ b/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp @@ -1167,17 +1167,17 @@ brw_blorp_blit_program::translate_tiling(bool old_tiled_w, bool new_tiled_w) emit_shr(t1, t1, brw_imm_uw(1)); /* (X ~0b1011) 1 */ emit_and(t2, Y, brw_imm_uw(1)); /* Y 0b1 */ emit_shl(t2, t2, brw_imm_uw(2)); /* (Y 0b1) 2 */ - brw_OR(func, t1, t1, t2); /* (X ~0b1011) 1 | (Y 0b1) 2 */ + emit_or(t1, t1, t2); /* (X ~0b1011) 1 | (Y 0b1) 2 */ emit_and(t2, X, brw_imm_uw(1)); /* X 0b1 */ - brw_OR(func, Xp, t1, t2); + emit_or(Xp, t1, t2); emit_and(t1, Y, brw_imm_uw(0xfffe)); /* Y ~0b1 */ emit_shl(t1, t1, brw_imm_uw(1)); /* (Y ~0b1) 1 */ emit_and(t2, X, brw_imm_uw(8)); /* X 0b1000 */ emit_shr(t2, t2, brw_imm_uw(2)); /* (X 0b1000) 2 */ - brw_OR(func, t1, t1, t2); /* (Y ~0b1) 1 | (X 0b1000) 2 */ + emit_or(t1, t1, t2); /* (Y ~0b1) 1 | (X 0b1000) 2 */ emit_and(t2, X, brw_imm_uw(2)); /* X 0b10 */ emit_shr(t2, t2, brw_imm_uw(1)); /* (X 0b10) 1 */ - brw_OR(func, Yp, t1, t2); + emit_or(Yp, t1, t2); SWAP_XY_AND_XPYP(); } else { /* Applying the same logic as above, but in reverse, we obtain the @@ -1190,18 +1190,18 @@ brw_blorp_blit_program::translate_tiling(bool old_tiled_w, bool new_tiled_w) emit_shl(t1, t1, brw_imm_uw(1)); /* (X ~0b101) 1 */ emit_and(t2, Y, brw_imm_uw(2)); /* Y 0b10 */ emit_shl(t2, t2, brw_imm_uw(2)); /* (Y 0b10) 2 */ - brw_OR(func, t1, t1, t2); /* (X ~0b101) 1 | (Y 0b10) 2 */ + emit_or(t1, t1, t2); /* (X ~0b101) 1 | (Y 0b10) 2 */ emit_and(t2, Y, brw_imm_uw(1)); /* Y 0b1 */ emit_shl(t2, t2, brw_imm_uw(1)); /* (Y 0b1) 1 */ - brw_OR(func, t1, t1, t2); /* (X ~0b101) 1 | (Y 0b10) 2 + emit_or(t1, t1, t2); /* (X ~0b101) 1 | (Y 0b10) 2 | (Y 0b1) 1 */ emit_and(t2, X, brw_imm_uw(1)); /* X 0b1 */ - brw_OR(func, Xp, t1, t2); + emit_or(Xp, t1, t2); emit_and(t1, Y, brw_imm_uw(0xfffc)); /* Y ~0b11 */ emit_shr(t1, t1, brw_imm_uw(1)); /* (Y ~0b11) 1 */ emit_and(t2, X, brw_imm_uw(4)); /* X 0b100 */ emit_shr(t2, t2, brw_imm_uw(2)); /* (X 0b100) 2 */ - brw_OR(func, Yp, t1, t2); + emit_or(Yp, t1, t2); SWAP_XY_AND_XPYP(); } } @@ -1244,20 +1244,20 @@ brw_blorp_blit_program::encode_msaa(unsigned num_samples, emit_and(t1, X, brw_imm_uw(0xfffe)); /* X ~0b1 */ if (!s_is_zero) { emit_and(t2, S, brw_imm_uw(1)); /* S 0b1 */ -brw_OR(func, t1, t1, t2); /* (X ~0b1) | (S 0b1) */ +emit_or(t1, t1, t2); /* (X ~0b1) | (S 0b1) */ } emit_shl(t1, t1, brw_imm_uw(1)); /* (X ~0b1) 1 | (S 0b1) 1 */ emit_and(t2, X, brw_imm_uw(1)); /* X 0b1 */ - brw_OR(func, Xp, t1, t2); + emit_or(Xp, t1, t2); emit_and(t1, Y, brw_imm_uw(0xfffe)); /* Y ~0b1 */ emit_shl(t1, t1, brw_imm_uw(1)); /* (Y ~0b1) 1 */ if (!s_is_zero) { emit_and(t2, S, brw_imm_uw(2)); /* S 0b10 */ -brw_OR(func, t1, t1, t2); /* (Y ~0b1) 1 | (S 0b10) */ +emit_or(t1, t1, t2); /* (Y ~0b1) 1 | (S 0b10) */ } emit_and(t2, Y, brw_imm_uw(1)); /* Y 0b1 */ - brw_OR(func, Yp, t1, t2); + emit_or(Yp, t1, t2); break; case 8: /* encode_msaa(8, IMS, X, Y, S) = (X', Y', 0) @@ -1269,22 +1269,22 @@ brw_blorp_blit_program::encode_msaa(unsigned num_samples, emit_shl(t1, t1, brw_imm_uw(2)); /* (X ~0b1) 2 */ if (!s_is_zero) { emit_and(t2, S, brw_imm_uw(4)); /* S 0b100 */ -brw_OR(func, t1, t1, t2); /* (X ~0b1) 2 | (S 0b100) */ +emit_or(t1, t1, t2); /* (X ~0b1) 2 | (S 0b100) */ emit_and(t2, S, brw_imm_uw(1)); /* S 0b1 */ emit_shl(t2, t2, brw_imm_uw(1)); /* (S 0b1) 1 */ -brw_OR(func, t1, t1, t2); /* (X ~0b1) 2 | (S 0b100) +emit_or(t1, t1, t2); /* (X ~0b1) 2 | (S 0b100) | (S 0b1) 1 */ } emit_and(t2, X, brw_imm_uw(1)); /* X 0b1 */ - brw_OR(func, Xp, t1, t2); + emit_or(Xp, t1, t2); emit_and(t1, Y, brw_imm_uw(0xfffe)); /* Y ~0b1 */ emit_shl(t1, t1, brw_imm_uw(1)); /* (Y ~0b1) 1 */ if
[Mesa-dev] [PATCH RFC 10/11] glsl: add a pass to convert out of SSA form
Right now we are being basically as naive as possible, and inserting more copies than necessary. It is possible to implement a more sophisticated algorithm later, although extending the current copy propagation pass to support loops better and/or relying on backends to do copy propagation may make this unecessary. --- src/glsl/Makefile.sources | 1 + src/glsl/ir_optimization.h | 1 + src/glsl/opt_from_ssa.cpp | 198 + 3 files changed, 200 insertions(+) create mode 100644 src/glsl/opt_from_ssa.cpp diff --git a/src/glsl/Makefile.sources b/src/glsl/Makefile.sources index 961784b..55859ed 100644 --- a/src/glsl/Makefile.sources +++ b/src/glsl/Makefile.sources @@ -94,6 +94,7 @@ LIBGLSL_FILES = \ $(GLSL_SRCDIR)/opt_dead_functions.cpp \ $(GLSL_SRCDIR)/opt_flatten_nested_if_blocks.cpp \ $(GLSL_SRCDIR)/opt_flip_matrices.cpp \ + $(GLSL_SRCDIR)/opt_from_ssa.cpp \ $(GLSL_SRCDIR)/opt_function_inlining.cpp \ $(GLSL_SRCDIR)/opt_if_simplification.cpp \ $(GLSL_SRCDIR)/opt_noop_swizzle.cpp \ diff --git a/src/glsl/ir_optimization.h b/src/glsl/ir_optimization.h index 92c8b57..9c0ff31 100644 --- a/src/glsl/ir_optimization.h +++ b/src/glsl/ir_optimization.h @@ -66,6 +66,7 @@ enum lower_packing_builtins_op { }; void convert_to_ssa(exec_list *instructions); +void convert_from_ssa(exec_list *instructions); bool do_common_optimization(exec_list *ir, bool linked, bool uniform_locations_assigned, diff --git a/src/glsl/opt_from_ssa.cpp b/src/glsl/opt_from_ssa.cpp new file mode 100644 index 000..6071c45 --- /dev/null +++ b/src/glsl/opt_from_ssa.cpp @@ -0,0 +1,198 @@ +/* + * Copyright © 2013 Connor Abbott (con...@abbott.cx) + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the Software), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED AS IS, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include ir.h +#include ir_builder.h + +/** + * \file opt_from_ssa.cpp + * + * This file removes all the SSA temporaries and phi nodes from a program. It + * immplements Method I of the paper Translating out of Single Static + * Assignment Form by Sreedhar et. al., a naive method that inserts many more + * copies than necessary; it is assumed that later copy propagation passes will + * clean up the result of this pass. + */ + +using namespace ir_builder; + +static ir_variable * +insert_decl(exec_list *instrs, const glsl_type *type, void *mem_ctx) +{ + ir_variable *var = new(mem_ctx) ir_variable(type, phi_temp, + ir_var_temporary); + instrs-push_head(var); + return var; +} + +static void +eliminate_phi_if(ir_phi_if *phi, ir_if *ir, exec_list *instrs) +{ + ir_variable *var = insert_decl(instrs, phi-dest-type, ralloc_parent(ir)); + + /* +* This converts the destination of the phi node into a non-SSA variable, +* which ir_from_ssa_visitor::visit(ir_dereference_variable *) would normally +* do. We need to do this here because otherwise, the assignment we're +* inserting here will get skipped by the list visitor macro and it won't +* get converted. +*/ + + ir-insert_after(phi-dest); + phi-dest-insert_after(assign(phi-dest, var)); + phi-dest-data.mode = ir_var_temporary; + + if (phi-if_src != NULL) + ir-then_instructions.push_tail(assign(var, phi-if_src)); + + if (phi-else_src != NULL) + ir-else_instructions.push_tail(assign(var, phi-else_src)); + + phi-remove(); +} + +static void +eliminate_phi_loop_begin(ir_phi_loop_begin *phi, ir_loop *ir, exec_list *instrs) +{ + ir_variable *var = insert_decl(instrs, phi-dest-type, ralloc_parent(ir)); + ir-body_instructions.push_head(phi-dest); + phi-dest-insert_after(assign(phi-dest, var)); + phi-dest-data.mode = ir_var_temporary; + + if (phi-enter_src != NULL) + ir-insert_before(assign(var, phi-enter_src)); + + if (phi-repeat_src != NULL) + ir-body_instructions.push_tail(assign(var, phi-repeat_src)); + +
[Mesa-dev] [PATCH RFC 07/11] glsl: add SSA infrastructure
This patch introduces all the changes to the IR that are necessary for representing programs in the SSA form. This consists of a new variable mode, the SSA temporary, which is guarenteed to be written to exactly once, and classes to represent phi nodes in the IR. In the current code, variables are first declared using an ir_variable instruction inserted into the instruction stream, and then every dereference will point to the ir_variable declared earlier. SSA temporaries, however, do not work this way. Instead, the variable is declared when it is assigned. That is, the variable is owned by the one and only instruction where it is defined. In SSA, phi nodes may exist at the beginning of any join nodes, or basic blocks with more than one predecessor. In our IR, this can happen in one of three places: - After an if statement, where the then branch and the else branch converge. - At the beginning of a loop, which can be reached from either before the loop (on the first iteration), the end of the loop (when we get to the end of the loop and jump back to the beginning), or any continue statement. - At the end of a loop, which can be reached from any break statement within the loop. Accordingly, there are three different types of phi nodes: if phi nodes, phi nodes at the beginning of a loop, and phi nodes at the end of a loop, all of which derive from the ir_phi base class. --- src/glsl/ir.cpp| 56 +++ src/glsl/ir.h | 196 - src/glsl/ir_clone.cpp | 147 --- src/glsl/ir_hierarchical_visitor.cpp | 36 + src/glsl/ir_hierarchical_visitor.h | 11 ++ src/glsl/ir_hv_accept.cpp | 55 ++- src/glsl/ir_print_visitor.cpp | 196 - src/glsl/ir_print_visitor.h| 15 ++ src/glsl/ir_validate.cpp | 158 +++- src/glsl/ir_visitor.h | 8 + src/mesa/drivers/dri/i965/brw_fs.h | 4 + src/mesa/drivers/dri/i965/brw_fs_visitor.cpp | 28 src/mesa/drivers/dri/i965/brw_vec4.h | 4 + src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp | 24 +++ src/mesa/program/ir_to_mesa.cpp| 28 src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 29 16 files changed, 956 insertions(+), 39 deletions(-) diff --git a/src/glsl/ir.cpp b/src/glsl/ir.cpp index 1a36bd6..f1ded80 100644 --- a/src/glsl/ir.cpp +++ b/src/glsl/ir.cpp @@ -1229,6 +1229,37 @@ ir_loop::ir_loop() } +ir_phi::ir_phi() +{ + this-dest = NULL; +} + + +ir_phi_if::ir_phi_if(ir_variable *dest, ir_variable *if_src, +ir_variable *else_src) + : if_src(if_src), else_src(else_src) +{ + this-ir_type = ir_type_phi_if; + this-dest = dest; +} + + +ir_phi_loop_begin::ir_phi_loop_begin(ir_variable* dest, ir_variable* enter_src, +ir_variable* repeat_src) + : enter_src(enter_src), repeat_src(repeat_src) +{ + this-ir_type = ir_type_phi_loop_begin; + this-dest = dest; +} + + +ir_phi_loop_end::ir_phi_loop_end(ir_variable *dest) +{ + this-ir_type = ir_type_phi_loop_end; + this-dest = dest; +} + + ir_dereference_variable::ir_dereference_variable(ir_variable *var) { assert(var != NULL); @@ -1554,6 +1585,9 @@ ir_variable::ir_variable(const struct glsl_type *type, const char *name, this-data.max_array_access = 0; this-data.atomic.buffer_index = 0; this-data.atomic.offset = 0; + this-ssa_assignment = NULL; + this-ssa_phi = NULL; + this-ssa_call = NULL; if (type != NULL) { if (type-base_type == GLSL_TYPE_SAMPLER) @@ -1722,12 +1756,19 @@ steal_memory(ir_instruction *ir, void *new_ctx) { ir_variable *var = ir-as_variable(); ir_constant *constant = ir-as_constant(); + ir_dereference_variable *deref = ir-as_dereference_variable(); + ir_phi *phi = ir-as_phi(); + ir_phi_loop_begin *phi_loop_begin = ir-as_phi_loop_begin(); + ir_phi_loop_end *phi_loop_end = ir-as_phi_loop_end(); if (var != NULL var-constant_value != NULL) steal_memory(var-constant_value, ir); if (var != NULL var-constant_initializer != NULL) steal_memory(var-constant_initializer, ir); + if (deref != NULL deref-var-data.mode == ir_var_temporary_ssa) + steal_memory(deref-var, ir); + /* The components of aggregate constants are not visited by the normal * visitor, so steal their values by hand. */ @@ -1744,6 +1785,21 @@ steal_memory(ir_instruction *ir, void *new_ctx) } } + if (phi != NULL) + steal_memory(phi-dest, new_ctx); + + if (phi_loop_begin != NULL) { + foreach_list(n, phi_loop_begin-continue_srcs) { +ralloc_steal(new_ctx, n); + } + } + + if (phi_loop_end != NULL) { + foreach_list(n, phi_loop_end-break_srcs) { +ralloc_steal(new_ctx, n); + } + } +
[Mesa-dev] [v2 20/23] i965/fs: allow unit tests to dump the final patched assembly
Unit tests comparing generated blorp programs to known good need to have the dump in designated file instead of in default standard output. The comparison also expects the jump counters of if-else-instructions to be correctly set and hence the dump needs to be taken _after_ 'patch_IF_ELSE()' is run (the default dump of the fs_generator does this before). v2 (Paul): dropped the redundant 'dump_enabled' argument Signed-off-by: Topi Pohjolainen topi.pohjolai...@intel.com Reviewed-by: Paul Berry stereotype...@gmail.com --- src/mesa/drivers/dri/i965/brw_fs.h | 5 +++-- src/mesa/drivers/dri/i965/brw_fs_generator.cpp | 13 +++-- 2 files changed, 10 insertions(+), 8 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_fs.h b/src/mesa/drivers/dri/i965/brw_fs.h index a903908..9984802 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.h +++ b/src/mesa/drivers/dri/i965/brw_fs.h @@ -517,10 +517,11 @@ public: const unsigned *generate_assembly(exec_list *simd8_instructions, exec_list *simd16_instructions, - unsigned *assembly_size); + unsigned *assembly_size, + FILE *dump_file = NULL); private: - void generate_code(exec_list *instructions); + void generate_code(exec_list *instructions, FILE *dump_file); void generate_fb_write(fs_inst *inst); void generate_pixel_xy(struct brw_reg dst, bool is_x); void generate_linterp(fs_inst *inst, struct brw_reg dst, diff --git a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp index a92b8ba..bcb47e6 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp @@ -1298,7 +1298,7 @@ fs_generator::generate_untyped_surface_read(fs_inst *inst, struct brw_reg dst, } void -fs_generator::generate_code(exec_list *instructions) +fs_generator::generate_code(exec_list *instructions, FILE *dump_file) { int last_native_insn_offset = p-next_insn_offset; const char *last_annotation_string = NULL; @@ -1807,21 +1807,22 @@ fs_generator::generate_code(exec_list *instructions) * which is often something we want to debug. So this is here in * case you're doing that. */ - if (0) { - brw_dump_compile(p, stdout, 0, p-next_insn_offset); + if (dump_file) { + brw_dump_compile(p, dump_file, 0, p-next_insn_offset); } } const unsigned * fs_generator::generate_assembly(exec_list *simd8_instructions, exec_list *simd16_instructions, -unsigned *assembly_size) +unsigned *assembly_size, +FILE *dump_file) { assert(simd8_instructions || simd16_instructions); if (simd8_instructions) { dispatch_width = 8; - generate_code(simd8_instructions); + generate_code(simd8_instructions, dump_file); } if (simd16_instructions) { @@ -1842,7 +1843,7 @@ fs_generator::generate_assembly(exec_list *simd8_instructions, brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED); dispatch_width = 16; - generate_code(simd16_instructions); + generate_code(simd16_instructions, dump_file); } return brw_get_program(p, assembly_size); -- 1.8.3.1 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [v2 01/23] i965/blorp: introduce separate eu-emitter for blit compiler
Prepares for presenting blorp blit programs using FS IR that allows EU-assembly generation using i965 glsl-compiler backend (fs_generator). v2: rebased on top of endif-jump counter fix (moving the added brw_set_uip_jip() into the emitter) Signed-off-by: Topi Pohjolainen topi.pohjolai...@intel.com Reviewed-by: Paul Berry stereotype...@gmail.com (v1) --- src/mesa/drivers/dri/i965/Makefile.sources | 1 + src/mesa/drivers/dri/i965/brw_blorp_blit.cpp| 43 ++-- src/mesa/drivers/dri/i965/brw_blorp_blit_eu.cpp | 65 + src/mesa/drivers/dri/i965/brw_blorp_blit_eu.h | 42 4 files changed, 113 insertions(+), 38 deletions(-) create mode 100644 src/mesa/drivers/dri/i965/brw_blorp_blit_eu.cpp create mode 100644 src/mesa/drivers/dri/i965/brw_blorp_blit_eu.h diff --git a/src/mesa/drivers/dri/i965/Makefile.sources b/src/mesa/drivers/dri/i965/Makefile.sources index d0c85cf..a3fb417 100644 --- a/src/mesa/drivers/dri/i965/Makefile.sources +++ b/src/mesa/drivers/dri/i965/Makefile.sources @@ -30,6 +30,7 @@ i965_FILES = \ brw_binding_tables.c \ brw_blorp.cpp \ brw_blorp_blit.cpp \ + brw_blorp_blit_eu.cpp \ brw_blorp_clear.cpp \ brw_cc.c \ brw_cfg.cpp \ diff --git a/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp b/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp index 3b92c56..f9c355b 100644 --- a/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp +++ b/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp @@ -25,13 +25,11 @@ #include main/fbobject.h #include main/renderbuffer.h -#include glsl/ralloc.h - #include intel_fbo.h #include brw_blorp.h #include brw_context.h -#include brw_eu.h +#include brw_blorp_blit_eu.h #include brw_state.h #define FILE_DEBUG_FLAG DEBUG_BLORP @@ -624,12 +622,11 @@ enum sampler_message_arg * (In these formulas, pitch is the number of bytes occupied by a single row * of samples). */ -class brw_blorp_blit_program +class brw_blorp_blit_program : public brw_blorp_eu_emitter { public: brw_blorp_blit_program(struct brw_context *brw, const brw_blorp_blit_prog_key *key); - ~brw_blorp_blit_program(); const GLuint *compile(struct brw_context *brw, GLuint *program_size, FILE *dump_file = stdout); @@ -668,10 +665,8 @@ private: */ static const unsigned LOG2_MAX_BLEND_SAMPLES = 3; - void *mem_ctx; struct brw_context *brw; const brw_blorp_blit_prog_key *key; - struct brw_compile func; /* Thread dispatch header */ struct brw_reg R0; @@ -745,16 +740,10 @@ private: brw_blorp_blit_program::brw_blorp_blit_program( struct brw_context *brw, const brw_blorp_blit_prog_key *key) - : mem_ctx(ralloc_context(NULL)), + : brw_blorp_eu_emitter(brw), brw(brw), key(key) { - brw_init_compile(brw, func, mem_ctx); -} - -brw_blorp_blit_program::~brw_blorp_blit_program() -{ - ralloc_free(mem_ctx); } const GLuint * @@ -806,21 +795,6 @@ brw_blorp_blit_program::compile(struct brw_context *brw, memset(prog_data, 0, sizeof(prog_data)); prog_data.persample_msaa_dispatch = key-persample_msaa_dispatch; - /* -* By default everything is emitted as 16-wide with only a few exceptions -* handled explicitly either here in the compiler or by one of the specific -* code emission calls. -* It should be also noted that here in this file any alterations of the -* compression control settings are only used to affect the execution size -* of the instructions. The instruction template used to initialise all the -* instructions is effectively not altered -- the value stays at zero -* representing either GEN6_COMPRESSION_1Q or GEN6_COMPRESSION_1H depending -* on the context. -* If any other settings are used in the instruction headers, they are set -* elsewhere by the individual code emission calls. -*/ - brw_set_compression_control(func, BRW_COMPRESSION_COMPRESSED); - alloc_regs(); compute_frag_coords(); @@ -928,14 +902,7 @@ brw_blorp_blit_program::compile(struct brw_context *brw, */ render_target_write(); - brw_set_uip_jip(func); - - if (unlikely(INTEL_DEBUG DEBUG_BLORP)) { - printf(Native code for BLORP blit:\n); - brw_dump_compile(func, dump_file, 0, func.next_insn_offset); - printf(\n); - } - return brw_get_program(func, program_size); + return get_program(program_size, dump_file); } void @@ -2385,7 +2352,7 @@ brw_blorp_blit_params::get_wm_prog(struct brw_context *brw, prog_offset, prog_data)) { brw_blorp_blit_program prog(brw, this-wm_prog_key); GLuint program_size; - const GLuint *program = prog.compile(brw, program_size); + const GLuint *program = prog.compile(brw, program_size, stdout); brw_upload_cache(brw-cache, BRW_BLORP_BLIT_PROG, this-wm_prog_key, sizeof(this-wm_prog_key),
[Mesa-dev] [v2 08/23] i965/blorp: wrap emission of conditional assignment
Signed-off-by: Topi Pohjolainen topi.pohjolai...@intel.com Reviewed-by: Paul Berry stereotype...@gmail.com --- src/mesa/drivers/dri/i965/brw_blorp_blit.cpp | 19 --- src/mesa/drivers/dri/i965/brw_blorp_blit_eu.h | 11 +++ 2 files changed, 15 insertions(+), 15 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp b/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp index b5f1907..356bb92 100644 --- a/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp +++ b/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp @@ -1444,21 +1444,10 @@ brw_blorp_blit_program::clamp_tex_coords(struct brw_reg regX, struct brw_reg clampX1, struct brw_reg clampY1) { - brw_CMP(func, vec16(brw_null_reg()), BRW_CONDITIONAL_L, regX, clampX0); - brw_MOV(func, regX, clampX0); - brw_set_predicate_control(func, BRW_PREDICATE_NONE); - - brw_CMP(func, vec16(brw_null_reg()), BRW_CONDITIONAL_G, regX, clampX1); - brw_MOV(func, regX, clampX1); - brw_set_predicate_control(func, BRW_PREDICATE_NONE); - - brw_CMP(func, vec16(brw_null_reg()), BRW_CONDITIONAL_L, regY, clampY0); - brw_MOV(func, regY, clampY0); - brw_set_predicate_control(func, BRW_PREDICATE_NONE); - - brw_CMP(func, vec16(brw_null_reg()), BRW_CONDITIONAL_G, regY, clampY1); - brw_MOV(func, regY, clampY1); - brw_set_predicate_control(func, BRW_PREDICATE_NONE); + emit_cond_mov(regX, clampX0, BRW_CONDITIONAL_L, regX, clampX0); + emit_cond_mov(regX, clampX1, BRW_CONDITIONAL_G, regX, clampX1); + emit_cond_mov(regY, clampY0, BRW_CONDITIONAL_L, regY, clampY0); + emit_cond_mov(regY, clampY1, BRW_CONDITIONAL_G, regY, clampY1); } /** diff --git a/src/mesa/drivers/dri/i965/brw_blorp_blit_eu.h b/src/mesa/drivers/dri/i965/brw_blorp_blit_eu.h index 55e05f7..555b6d3 100644 --- a/src/mesa/drivers/dri/i965/brw_blorp_blit_eu.h +++ b/src/mesa/drivers/dri/i965/brw_blorp_blit_eu.h @@ -57,6 +57,17 @@ protected: const struct brw_reg src_1, const struct brw_reg src_2); + inline void emit_cond_mov(const struct brw_reg x, + const struct brw_reg y, + int op, + const struct brw_reg dst, + const struct brw_reg src) + { + brw_CMP(func, vec16(brw_null_reg()), op, x, y); + brw_MOV(func, dst, src); + brw_set_predicate_control(func, BRW_PREDICATE_NONE); + } + void *mem_ctx; struct brw_compile func; }; -- 1.8.3.1 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [v2 23/23] i965/blorp: switch eu-emitter to use FS IR and fs_generator
No regressions on IVB (piglit quick + unit tests). v2 (Paul): - no need to patch the unit tests anymore. Original logic was altered and unit tests updated to match the fs-generator - lrp emission moves from the blorp compiler core into the emitter here (previously there was a separate refactoring patch which is not really needed anymore as the lrp logic got refactored when the original lrp logic got fixed). - pass 'BRW_BLORP_RENDERBUFFER_BINDING_TABLE_INDEX' to the generator in fs_inst::target instead of hardcoding it CC: Paul Berry stereotype...@gmail.com Signed-off-by: Topi Pohjolainen topi.pohjolai...@intel.com --- src/mesa/drivers/dri/i965/brw_blorp_blit.cpp| 20 src/mesa/drivers/dri/i965/brw_blorp_blit_eu.cpp | 120 +--- src/mesa/drivers/dri/i965/brw_blorp_blit_eu.h | 64 - 3 files changed, 84 insertions(+), 120 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp b/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp index 6454d2a..c4d1108 100644 --- a/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp +++ b/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp @@ -654,11 +654,6 @@ private: const sampler_message_arg *args, int num_args); void render_target_write(); - void emit_lrp(const struct brw_reg dst, - const struct brw_reg src1, - const struct brw_reg src2, - const struct brw_reg src3); - /** * Base-2 logarithm of the maximum number of samples that can be blended. */ @@ -1585,21 +1580,6 @@ brw_blorp_blit_program::manual_blend_average(unsigned num_samples) } void -brw_blorp_blit_program::emit_lrp(const struct brw_reg dst, - const struct brw_reg src1, - const struct brw_reg src2, - const struct brw_reg src3) -{ - brw_set_access_mode(func, BRW_ALIGN_16); - brw_set_compression_control(func, BRW_COMPRESSION_NONE); - brw_LRP(func, dst, src1, src2, src3); - brw_set_compression_control(func, BRW_COMPRESSION_2NDHALF); - brw_LRP(func, sechalf(dst), sechalf(src1), sechalf(src2), sechalf(src3)); - brw_set_compression_control(func, BRW_COMPRESSION_COMPRESSED); - brw_set_access_mode(func, BRW_ALIGN_1); -} - -void brw_blorp_blit_program::manual_blend_bilinear(unsigned num_samples) { /* We do this computation by performing the following operations: diff --git a/src/mesa/drivers/dri/i965/brw_blorp_blit_eu.cpp b/src/mesa/drivers/dri/i965/brw_blorp_blit_eu.cpp index 9b63458..5b652ad 100644 --- a/src/mesa/drivers/dri/i965/brw_blorp_blit_eu.cpp +++ b/src/mesa/drivers/dri/i965/brw_blorp_blit_eu.cpp @@ -26,24 +26,9 @@ #include brw_blorp.h brw_blorp_eu_emitter::brw_blorp_eu_emitter(struct brw_context *brw) - : mem_ctx(ralloc_context(NULL)) + : mem_ctx(ralloc_context(NULL)), c(rzalloc(mem_ctx, struct brw_wm_compile)), + generator(brw, c, NULL, NULL, false) { - brw_init_compile(brw, func, mem_ctx); - - /* -* By default everything is emitted as 16-wide with only a few expections -* handled explicitly either here in the compiler or by one of the specific -* code emission calls. -* It should be also noted that here in this file any alterations of the -* compression control settings are only used to affect the execution size -* of the instructions. The instruction template used to initialise all the -* instructions is effectively not altered -- the value stays at zero -* representing either GEN6_COMPRESSION_1Q or GEN6_COMPRESSION_1H depending -* on the context. -* If any other settings are used in the instruction headers, they are set -* elsewhere by the individual code emission calls. -*/ - brw_set_compression_control(func, BRW_COMPRESSION_COMPRESSED); } brw_blorp_eu_emitter::~brw_blorp_eu_emitter() @@ -54,15 +39,17 @@ brw_blorp_eu_emitter::~brw_blorp_eu_emitter() const unsigned * brw_blorp_eu_emitter::get_program(unsigned *program_size, FILE *dump_file) { - brw_set_uip_jip(func); + const unsigned *res; if (unlikely(INTEL_DEBUG DEBUG_BLORP)) { printf(Native code for BLORP blit:\n); - brw_dump_compile(func, dump_file, 0, func.next_insn_offset); + res = generator.generate_assembly(NULL, insts, program_size, dump_file); printf(\n); + } else { + res = generator.generate_assembly(NULL, insts, program_size); } - return brw_get_program(func, program_size); + return res; } /** @@ -80,17 +67,15 @@ brw_blorp_eu_emitter::emit_kill_if_outside_rect(const struct brw_reg x, { struct brw_reg f0 = brw_flag_reg(0, 0); struct brw_reg g1 = retype(brw_vec1_grf(1, 7), BRW_REGISTER_TYPE_UW); - struct brw_reg null32 = vec16(retype(brw_null_reg(), BRW_REGISTER_TYPE_UD)); - brw_CMP(func, null32, BRW_CONDITIONAL_GE, x, dst_x0); - brw_CMP(func, null32, BRW_CONDITIONAL_GE, y, dst_y0); - brw_CMP(func, null32,
[Mesa-dev] [v2 21/23] i965/fs: introduce blorp specific rt-write for fs_generator
The compiler for blorp programs likes to emit instructions for the message construction itself meaning that the generator needs to skip any such when blorp programs are translated for the hw. In addition, the binding table control is special for blorp programs and the generator does not need to update the binding tables associated with the compiler bookkeeping (this in fact gets thrown away as the blorp compiler sets the program data in its own way). v2 (Paul): do not hardcode the binding table index but use fs_inst::target instead. Signed-off-by: Topi Pohjolainen topi.pohjolai...@intel.com --- src/mesa/drivers/dri/i965/brw_defines.h| 1 + src/mesa/drivers/dri/i965/brw_fs.h | 1 + src/mesa/drivers/dri/i965/brw_fs_generator.cpp | 19 +++ src/mesa/drivers/dri/i965/brw_shader.cpp | 2 ++ 4 files changed, 23 insertions(+) diff --git a/src/mesa/drivers/dri/i965/brw_defines.h b/src/mesa/drivers/dri/i965/brw_defines.h index 7beda72..7f4cd10 100644 --- a/src/mesa/drivers/dri/i965/brw_defines.h +++ b/src/mesa/drivers/dri/i965/brw_defines.h @@ -753,6 +753,7 @@ enum opcode { * instructions. */ FS_OPCODE_FB_WRITE = 128, + FS_OPCODE_BLORP_FB_WRITE, SHADER_OPCODE_RCP, SHADER_OPCODE_RSQ, SHADER_OPCODE_SQRT, diff --git a/src/mesa/drivers/dri/i965/brw_fs.h b/src/mesa/drivers/dri/i965/brw_fs.h index 9984802..ea5de22 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.h +++ b/src/mesa/drivers/dri/i965/brw_fs.h @@ -523,6 +523,7 @@ public: private: void generate_code(exec_list *instructions, FILE *dump_file); void generate_fb_write(fs_inst *inst); + void generate_blorp_fb_write(fs_inst *inst); void generate_pixel_xy(struct brw_reg dst, bool is_x); void generate_linterp(fs_inst *inst, struct brw_reg dst, struct brw_reg *src); diff --git a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp index bcb47e6..29050c9 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp @@ -190,6 +190,21 @@ fs_generator::generate_fb_write(fs_inst *inst) mark_surface_used(surf_index); } +void +fs_generator::generate_blorp_fb_write(fs_inst *inst) +{ + brw_fb_WRITE(p, +16 /* dispatch_width */, +inst-base_mrf, +brw_reg_from_fs_reg(inst-src[0]), +BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD16_SINGLE_SOURCE, +inst-target, +inst-mlen, +0, +true, +inst-header_present); +} + /* Computes the integer pixel x,y values from the origin. * * This is the basis of gl_FragCoord computation, but is also used @@ -1714,6 +1729,10 @@ fs_generator::generate_code(exec_list *instructions, FILE *dump_file) generate_fb_write(inst); break; + case FS_OPCODE_BLORP_FB_WRITE: +generate_blorp_fb_write(inst); +break; + case FS_OPCODE_MOV_DISPATCH_TO_FLAGS: generate_mov_dispatch_to_flags(inst); break; diff --git a/src/mesa/drivers/dri/i965/brw_shader.cpp b/src/mesa/drivers/dri/i965/brw_shader.cpp index b38032e..45bf730 100644 --- a/src/mesa/drivers/dri/i965/brw_shader.cpp +++ b/src/mesa/drivers/dri/i965/brw_shader.cpp @@ -406,6 +406,8 @@ brw_instruction_name(enum opcode op) switch (op) { case FS_OPCODE_FB_WRITE: return fb_write; + case FS_OPCODE_BLORP_FB_WRITE: + return blorp_fb_write; case SHADER_OPCODE_RCP: return rcp; -- 1.8.3.1 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH RFC 00/11] glsl: add Single Static Assignment (SSA)
This series enables GLSL IR support for SSA, including passes to convert to and from SSA form. SSA is a form of the intermediate representation of a compiler in which each variable is assigned exactly once. SSA form makes many optimizations faster and easier to write, and enables other more powerful optimizations. SSA is used in GCC [1] and LLVM [2] as well as various compiler backends within Mesa itself, such as r600g-sb and Nouveau. Adding support for SSA will allow the various optimizations these backends perform to be implemented in one place, instead of making each driver reinvent the wheel (as several have already done). Additionally, all new backends would recieve these optimizations, reducing the burden of writing a compiler backend for a new driver. Even though no optimization passes are now implemented, I am putting out this series to solicit feedback on the design, to make sure I don't have to rewrite things before I go ahead and write these new passes. There are no piglit regressions on Softpipe, except for the spec/OpenGL 2.0/max-samplers test, which only passed before because the compiler happened to unroll the loop; the extra copies caused by the conversion to and from SSA stop the compiler from unrolling, meaning that the resulting GLSL IR code contains an indirect sampler index which glsl-to-tgsi can't handle. Patch 01 is a fix for a bug that came up while Piglit testing this series. Patches 02-06 are changes to GLSL IR that are not explicitly related to enabling SSA, but which are needed by the later patches. Patch 07 modifies the core GLSL IR support to allow it to represent shaders in SSA form, and modifies the printer to print phi nodes and SSA temporaries correctly. Patch 08 adds a function that will come in handy in patch 09, as well as later SSA-based optimizations. Patch 09 adds the code to convert programs to SSA form. Patch 10 adds the code to eliminate phi nodes and SSA temporaries, undoing what the code in Patch 09 does. Patch 11 allows us to Piglit test the series, and will get replaced once some actual optimization passes are in place. Some design choices that may need to be discussed: - ir_variables in SSA form are now owned by the instruction where they are defined, i.e. there are no seperate ir_variable declarations. This is different from what the compiler currently assumes and requires a lot of rework in different areas, but I thought it was justified for a couple of different reasons: 1. In SSA form, usually variable dereferences point to the instruction in which the variable is written to. Although doing this would be too much of a rewrite, making variables owned by the instruction where they are defined provides some of the benefit of this, making some optimizations such as Global Code Motion [3] easier to write. 2. The original reason for having each ir_variable be declared before it is read/written to was to preserve the tree structure of the IR by making sure each ir_variable appeared as a child only once (i.e. in its declaration). With SSA form, where variables are now written to once, it makes sense for each variable to be a child of the one time it is written to. - The conversion from SSA is currently very naive and inserts many more copies than necessary. It appears that the current copy propagation pass is not able to remove many of those copies, especially in loops. It seems there are a couple different options: 1. Implement Sreedhar's full algorithm; this requires that we implement liveness analysis in GLSL IR. 2. Improve the current copy propagation pass to eliminate the copies it can't handle. 3. Leave it alone, and require that backends remove the copies. i965 vec4 and fs backends, for example, already have a more sophisticated register coalescing pass that does what we need to do, so i965 should be fine with the extra copies. Things that are left to do: - Fixup ir_reader, fix the existing GLSL IR tests, and add more tests for the conversion to/from SSA. - Add more optimizations and convert over the existing optimizations. Some optimizations need to be converted to use SSA, while others will be replaced by a more powerful version. For example, Global Code Motion and Global Value Numbering (GVN-GCM) [4] will replace constant propagation, local value numbering, and some of the loop analysis framework while being more powerful than all of those passes. - As mentioned in the introduction, there are various drivers which already use SSA. These drivers are all Gallium drivers, so it would make sense to add support for SSA to TGSI so that the code isn't converted to SSA twice (first in GLSL IR, then in the driver). Also, this would help new drivers like freedreno that want to use SSA optimizations in their backend. This may be more controversial, though, and it's outside of the current scope of this work. This series is also available at https://github.com/cwabbott0/mesa/tree/glsl-ir-ssa-rfc [1] http://gcc.gnu.org/onlinedocs/gccint/SSA.html [2]
[Mesa-dev] [PATCH RFC 02/11] glsl: add as_loop_jump() method to ir_instruction
This will let us dynamically downcast to ir_loop_jump, which will be needed later. --- src/glsl/ir.h | 6 ++ 1 file changed, 6 insertions(+) diff --git a/src/glsl/ir.h b/src/glsl/ir.h index 19e8383..d1e790d 100644 --- a/src/glsl/ir.h +++ b/src/glsl/ir.h @@ -138,6 +138,7 @@ public: virtual class ir_constant * as_constant() { return NULL; } virtual class ir_discard * as_discard() { return NULL; } virtual class ir_jump * as_jump() { return NULL; } + virtual class ir_loop_jump * as_loop_jump(){ return NULL; } /*@}*/ /** @@ -1617,6 +1618,11 @@ public: virtual ir_loop_jump *clone(void *mem_ctx, struct hash_table *) const; + virtual ir_loop_jump *as_loop_jump() + { + return this; + } + virtual void accept(ir_visitor *v) { v-visit(this); -- 1.8.3.1 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH RFC 08/11] glsl: add ssa_assign() to ir_builder
ssa_assign() creates an SSA variable and assignment at the same time. With this, simple sequences of SSA statements can be easily created. --- src/glsl/ir_builder.cpp | 14 ++ src/glsl/ir_builder.h | 1 + 2 files changed, 15 insertions(+) diff --git a/src/glsl/ir_builder.cpp b/src/glsl/ir_builder.cpp index 2229cd2..03cb38f 100644 --- a/src/glsl/ir_builder.cpp +++ b/src/glsl/ir_builder.cpp @@ -76,6 +76,20 @@ assign(deref lhs, operand rhs, operand condition) return assign(lhs, rhs, condition, (1 lhs.val-type-vector_elements) - 1); } +ir_assignment * +ssa_assign(const char *name, operand rhs) +{ + void *mem_ctx = ralloc_parent(rhs.val); + + ir_variable *var = new(mem_ctx) ir_variable(rhs.val-type, name, + ir_var_temporary_ssa); + + ir_assignment *ret = assign(var, rhs); + var-ssa_assignment = ret; + return ret; +} + + ir_return * ret(operand retval) { diff --git a/src/glsl/ir_builder.h b/src/glsl/ir_builder.h index ae10995..415112b 100644 --- a/src/glsl/ir_builder.h +++ b/src/glsl/ir_builder.h @@ -124,6 +124,7 @@ ir_assignment *assign(deref lhs, operand rhs); ir_assignment *assign(deref lhs, operand rhs, int writemask); ir_assignment *assign(deref lhs, operand rhs, operand condition); ir_assignment *assign(deref lhs, operand rhs, operand condition, int writemask); +ir_assignment *ssa_assign(const char *name, operand rhs); ir_return *ret(operand retval); -- 1.8.3.1 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH RFC 11/11] glsl: convert to and from SSA form in the compiler
This patch is mainly for allowing me to test these changes with piglit. In the future, a do_ssa_optimizations() function will need to be created and used by this code, as well as all other users of do_common_optimizations(). --- src/glsl/glsl_parser_extras.cpp | 4 1 file changed, 4 insertions(+) diff --git a/src/glsl/glsl_parser_extras.cpp b/src/glsl/glsl_parser_extras.cpp index 9a1e0a2..fc88a4b 100644 --- a/src/glsl/glsl_parser_extras.cpp +++ b/src/glsl/glsl_parser_extras.cpp @@ -1488,6 +1488,10 @@ _mesa_glsl_compile_shader(struct gl_context *ctx, struct gl_shader *shader, struct gl_shader_compiler_options *options = ctx-ShaderCompilerOptions[shader-Stage]; + convert_to_ssa(shader-ir); + + convert_from_ssa(shader-ir); + /* Do some optimization at compile time to reduce shader IR size * and reduce later work if the same shader is linked multiple times */ -- 1.8.3.1 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 1/2] mesa: Ensure that transform feedback refers to the correct program.
On 22 January 2014 08:20, Kenneth Graunke kenn...@whitecape.org wrote: On 01/22/2014 06:07 AM, Paul Berry wrote: @@ -376,25 +376,48 @@ _mesa_compute_max_transform_feedback_vertices( **/ +/** + * Figure out which stage of the pipeline is the source of transform feedback + * data given the current context state, and return its gl_shader_program. + * + * If no active program can generate transform feedback data (i.e. no vertex + * shader is active), returns NULL. + */ +static struct gl_shader_program * +get_xfb_source(struct gl_context *ctx) +{ + int i; + for (i = MESA_SHADER_FRAGMENT - 1; i = MESA_SHADER_VERTEX; i--) { I think this would be clearer as: for (i = MESA_SHADER_GEOMETRY; i = MESA_SHADER_VERTEX; i--) { ... } Note that the pipeline ordering is: Vertex - Tess. Control - Tess. Eval - Geometry - Transform Feedback (http://www.opengl.org/wiki/Rendering_Pipeline_Overview) So either implementation would work even with tessellation shaders. Either way, this series is: Reviewed-by: Kenneth Graunke kenn...@whitecape.org Cc: 10.0 mesa-sta...@lists.freedesktop.org That's a good point--I like your suggestion. Thanks for the review! ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [v2 23/23] i965/blorp: switch eu-emitter to use FS IR and fs_generator
On 22 January 2014 09:17, Topi Pohjolainen topi.pohjolai...@intel.comwrote: No regressions on IVB (piglit quick + unit tests). v2 (Paul): - no need to patch the unit tests anymore. Original logic was altered and unit tests updated to match the fs-generator - lrp emission moves from the blorp compiler core into the emitter here (previously there was a separate refactoring patch which is not really needed anymore as the lrp logic got refactored when the original lrp logic got fixed). - pass 'BRW_BLORP_RENDERBUFFER_BINDING_TABLE_INDEX' to the generator in fs_inst::target instead of hardcoding it CC: Paul Berry stereotype...@gmail.com Signed-off-by: Topi Pohjolainen topi.pohjolai...@intel.com Thanks, Topi. The whole series is now: Reviewed-by: Paul Berry stereotype...@gmail.com ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 1/4] radeon / r200: Fix incompatible pointer type warning
On Mon, Jan 20, 2014 at 2:26 PM, Ian Romanick i...@freedesktop.org wrote: From: Ian Romanick ian.d.roman...@intel.com When parameters were removed from dd_function_table::Viewport (commit 065bd6ff), radeon_viewport (in both radeon and r200) started generating a warning. radeon_common.c: In function 'r200_radeon_viewport': radeon_common.c:415:15: warning: assignment from incompatible pointer type [enabled by default] radeon_common.c:419:23: warning: assignment from incompatible pointer type [enabled by default] I didn't notice this initially, and it's harmless because the function is never called through the incorrectly typed pointer. Signed-off-by: Ian Romanick ian.d.roman...@intel.com Cc: Alex Deucher alexander.deuc...@amd.com Cc: Marek Olšák marek.ol...@amd.com For the series: Reviewed-by: Alex Deucher alexander.deuc...@amd.com --- src/mesa/drivers/dri/radeon/radeon_common.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/mesa/drivers/dri/radeon/radeon_common.c b/src/mesa/drivers/dri/radeon/radeon_common.c index 2df4388..5c2b823 100644 --- a/src/mesa/drivers/dri/radeon/radeon_common.c +++ b/src/mesa/drivers/dri/radeon/radeon_common.c @@ -400,8 +400,7 @@ void radeon_viewport(struct gl_context *ctx) { radeonContextPtr radeon = RADEON_CONTEXT(ctx); __DRIcontext *driContext = radeon-dri.context; - void (*old_viewport)(struct gl_context *ctx, GLint x, GLint y, -GLsizei w, GLsizei h); + void (*old_viewport)(struct gl_context *ctx); if (_mesa_is_winsys_fbo(ctx-DrawBuffer)) { if (radeon-is_front_buffer_rendering) { -- 1.8.1.4 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH V3 7/8] glsl: remove remaining is_array variables
On 22 January 2014 03:33, Timothy Arceri t_arc...@yahoo.com.au wrote: Previously the reason we needed is_array was because we used array_size == NULL to represent both non-arrays and unsized arrays. Now that we use a non-NULL array_specifier to represent an unsized array, is_array is redundant. Signed-off-by: Timothy Arceri t_arc...@yahoo.com.au --- src/glsl/ast.h | 32 +++- src/glsl/ast_to_hir.cpp | 10 +- src/glsl/ast_type.cpp | 6 ++ src/glsl/glsl_parser.yy | 30 +++--- src/glsl/glsl_parser_extras.cpp | 15 ++- 5 files changed, 35 insertions(+), 58 deletions(-) When I try to build this patch I get the compile error: CXX ast_to_hir.lo ../../src/glsl/ast_to_hir.cpp: In member function 'virtual ir_rvalue* ast_declarator_list::hir(exec_list*, _mesa_glsl_parse_state*)': ../../src/glsl/ast_to_hir.cpp:2877:12: error: 'class ast_declaration' has no member named 'is_array' assert(!decl-is_array); ^ I'm guessing you missed this because you are building in release mode, so asserts didn't get compiled. If that's the case, I'd like to encourage you to do a debug build and double-check that piglit tests still pass. You can do that by adding the --enable-debug option when you run ./autogen.sh. With that fixed, the series is: Reviewed-by: Paul Berry stereotype...@gmail.com I'll try to push my patch glsl: Simplify aggregate type inference to prepare for ARB_arrays_of_arrays. within the next hour. Do you have commit access to Mesa or would you like me to push the series for you? ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] Can we commit the Spanish and Catalan translations?
Hi, It's been a week and no new concerns have been raised with the proposed Spanish and Catalan translations: http://lists.freedesktop.org/archives/mesa-dev/2014-January/051610.html http://lists.freedesktop.org/archives/mesa-dev/2014-January/051611.html http://lists.freedesktop.org/archives/mesa-dev/2014-January/051612.html Can they be committed now? -Alex ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH] i965: Replace *_generator::shader with is_glsl boolean.
The shader field in fs_generator, vec4_generator, and gen8_generator was only used for one purpose; to figure out if we were compiling an assembly shader or a GLSL shader. And it wasn't being used properly: in vec4 shaders we were always initializing it based on prog-_LinkedShaders[MESA_SHADER_FRAGMENT], regardless of whether we were compiling a geometry shader or a vertex shader. This was a fairly benign problem, since it's unlikely that a real-world program will try to mix and match GLSL and assembly shaders using separate shader objects. But it seems worth fixing. This patch replaces the shader field with a new is_glsl boolean, and initializes it based on information from the caller, so that it always refers to the correct shader stage. --- src/mesa/drivers/dri/i965/brw_fs.cpp | 6 -- src/mesa/drivers/dri/i965/brw_fs.h| 8 +--- src/mesa/drivers/dri/i965/brw_fs_generator.cpp| 12 ++-- src/mesa/drivers/dri/i965/brw_vec4.cpp| 4 ++-- src/mesa/drivers/dri/i965/brw_vec4.h | 8 +--- src/mesa/drivers/dri/i965/brw_vec4_generator.cpp | 11 +-- src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp | 7 +-- src/mesa/drivers/dri/i965/gen8_fs_generator.cpp | 13 ++--- src/mesa/drivers/dri/i965/gen8_generator.cpp | 6 -- src/mesa/drivers/dri/i965/gen8_generator.h| 5 +++-- src/mesa/drivers/dri/i965/gen8_vec4_generator.cpp | 10 +- 11 files changed, 50 insertions(+), 40 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp index a0e4830..c0d65d5 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp @@ -3512,11 +3512,13 @@ brw_wm_fs_emit(struct brw_context *brw, struct brw_wm_compile *c, const unsigned *assembly = NULL; if (brw-gen = 8) { - gen8_fs_generator g(brw, c, prog, fp, v.dual_src_output.file != BAD_FILE); + gen8_fs_generator g(brw, c, prog, fp, v.dual_src_output.file != BAD_FILE, + shader != NULL); assembly = g.generate_assembly(v.instructions, simd16_instructions, final_assembly_size); } else { - fs_generator g(brw, c, prog, fp, v.dual_src_output.file != BAD_FILE); + fs_generator g(brw, c, prog, fp, v.dual_src_output.file != BAD_FILE, + shader != NULL); assembly = g.generate_assembly(v.instructions, simd16_instructions, final_assembly_size); } diff --git a/src/mesa/drivers/dri/i965/brw_fs.h b/src/mesa/drivers/dri/i965/brw_fs.h index a903908..ad0aa99 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.h +++ b/src/mesa/drivers/dri/i965/brw_fs.h @@ -512,7 +512,8 @@ public: struct brw_wm_compile *c, struct gl_shader_program *prog, struct gl_fragment_program *fp, -bool dual_source_output); +bool dual_source_output, +bool is_glsl); ~fs_generator(); const unsigned *generate_assembly(exec_list *simd8_instructions, @@ -615,7 +616,6 @@ private: struct brw_wm_compile *c; struct gl_shader_program *prog; - struct gl_shader *shader; const struct gl_fragment_program *fp; unsigned dispatch_width; /** 8 or 16 */ @@ -623,6 +623,7 @@ private: exec_list discard_halt_patches; bool dual_source_output; void *mem_ctx; + const bool is_glsl; }; /** @@ -637,7 +638,8 @@ public: struct brw_wm_compile *c, struct gl_shader_program *prog, struct gl_fragment_program *fp, - bool dual_source_output); + bool dual_source_output, + bool is_glsl); ~gen8_fs_generator(); const unsigned *generate_assembly(exec_list *simd8_instructions, diff --git a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp index e701fc5..a8e81b8 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp @@ -40,14 +40,14 @@ fs_generator::fs_generator(struct brw_context *brw, struct brw_wm_compile *c, struct gl_shader_program *prog, struct gl_fragment_program *fp, - bool dual_source_output) + bool dual_source_output, + bool is_glsl) - : brw(brw), c(c), prog(prog), fp(fp), dual_source_output(dual_source_output) + : brw(brw), c(c), prog(prog), fp(fp), + dual_source_output(dual_source_output), is_glsl(is_glsl) { ctx = brw-ctx; - shader = prog ? prog-_LinkedShaders[MESA_SHADER_FRAGMENT] : NULL; - mem_ctx = c; p = rzalloc(mem_ctx, struct brw_compile); @@ -1301,7 +1301,7 @@ fs_generator::generate_code(exec_list *instructions)
[Mesa-dev] [PATCH 02/22] r600g: only emit NOP relocations for queries if VM is disabled
From: Marek Olšák marek.ol...@amd.com --- src/gallium/drivers/radeon/r600_cs.h| 14 ++ src/gallium/drivers/radeon/r600_query.c | 14 +- src/gallium/drivers/radeon/r600_streamout.c | 14 -- 3 files changed, 19 insertions(+), 23 deletions(-) diff --git a/src/gallium/drivers/radeon/r600_cs.h b/src/gallium/drivers/radeon/r600_cs.h index fa749da..c3af3de 100644 --- a/src/gallium/drivers/radeon/r600_cs.h +++ b/src/gallium/drivers/radeon/r600_cs.h @@ -66,6 +66,20 @@ static INLINE unsigned r600_context_bo_reloc(struct r600_common_context *rctx, return rctx-ws-cs_add_reloc(ring-cs, rbo-cs_buf, usage, rbo-domains) * 4; } +static INLINE void r600_emit_reloc(struct r600_common_context *rctx, + struct r600_ring *ring, struct r600_resource *rbo, + enum radeon_bo_usage usage) +{ + struct radeon_winsys_cs *cs = ring-cs; + bool has_vm = ((struct r600_common_screen*)rctx-b.screen)-info.r600_virtual_address; + unsigned reloc = r600_context_bo_reloc(rctx, ring, rbo, usage); + + if (!has_vm) { + radeon_emit(cs, PKT3(PKT3_NOP, 0, 0)); + radeon_emit(cs, reloc); + } +} + static INLINE void r600_write_config_reg_seq(struct radeon_winsys_cs *cs, unsigned reg, unsigned num) { assert(reg R600_CONTEXT_REG_OFFSET); diff --git a/src/gallium/drivers/radeon/r600_query.c b/src/gallium/drivers/radeon/r600_query.c index ea9ad11..14e8427 100644 --- a/src/gallium/drivers/radeon/r600_query.c +++ b/src/gallium/drivers/radeon/r600_query.c @@ -206,8 +206,7 @@ static void r600_emit_query_begin(struct r600_common_context *ctx, struct r600_q default: assert(0); } - cs-buf[cs-cdw++] = PKT3(PKT3_NOP, 0, 0); - cs-buf[cs-cdw++] = r600_context_bo_reloc(ctx, ctx-rings.gfx, query-buffer.buf, RADEON_USAGE_WRITE); + r600_emit_reloc(ctx, ctx-rings.gfx, query-buffer.buf, RADEON_USAGE_WRITE); if (!r600_is_timer_query(query-type)) { ctx-num_cs_dw_nontimer_queries_suspend += query-num_cs_dw; @@ -272,8 +271,7 @@ static void r600_emit_query_end(struct r600_common_context *ctx, struct r600_que default: assert(0); } - cs-buf[cs-cdw++] = PKT3(PKT3_NOP, 0, 0); - cs-buf[cs-cdw++] = r600_context_bo_reloc(ctx, ctx-rings.gfx, query-buffer.buf, RADEON_USAGE_WRITE); + r600_emit_reloc(ctx, ctx-rings.gfx, query-buffer.buf, RADEON_USAGE_WRITE); query-buffer.results_end += query-result_size; @@ -322,8 +320,7 @@ static void r600_emit_query_predication(struct r600_common_context *ctx, struct cs-buf[cs-cdw++] = PKT3(PKT3_SET_PREDICATION, 1, 0); cs-buf[cs-cdw++] = (va + results_base) 0xUL; cs-buf[cs-cdw++] = op | (((va + results_base) 32UL) 0xFF); - cs-buf[cs-cdw++] = PKT3(PKT3_NOP, 0, 0); - cs-buf[cs-cdw++] = r600_context_bo_reloc(ctx, ctx-rings.gfx, qbuf-buf, RADEON_USAGE_READ); + r600_emit_reloc(ctx, ctx-rings.gfx, qbuf-buf, RADEON_USAGE_READ); results_base += query-result_size; /* set CONTINUE bit for all packets except the first */ @@ -818,10 +815,9 @@ void r600_query_init_backend_mask(struct r600_common_context *ctx) cs-buf[cs-cdw++] = PKT3(PKT3_EVENT_WRITE, 2, 0); cs-buf[cs-cdw++] = EVENT_TYPE(EVENT_TYPE_ZPASS_DONE) | EVENT_INDEX(1); cs-buf[cs-cdw++] = va; - cs-buf[cs-cdw++] = (va 32UL) 0xFF; + cs-buf[cs-cdw++] = va 32; - cs-buf[cs-cdw++] = PKT3(PKT3_NOP, 0, 0); - cs-buf[cs-cdw++] = r600_context_bo_reloc(ctx, ctx-rings.gfx, buffer, RADEON_USAGE_WRITE); + r600_emit_reloc(ctx, ctx-rings.gfx, buffer, RADEON_USAGE_WRITE); /* analyze results */ results = r600_buffer_map_sync_with_rings(ctx, buffer, PIPE_TRANSFER_READ); diff --git a/src/gallium/drivers/radeon/r600_streamout.c b/src/gallium/drivers/radeon/r600_streamout.c index 0f65de4..adc11e0 100644 --- a/src/gallium/drivers/radeon/r600_streamout.c +++ b/src/gallium/drivers/radeon/r600_streamout.c @@ -199,20 +199,6 @@ static void evergreen_set_streamout_enable(struct r600_common_context *rctx, uns } } -static void r600_emit_reloc(struct r600_common_context *rctx, - struct r600_ring *ring, struct r600_resource *rbo, - enum radeon_bo_usage usage) -{ - struct radeon_winsys_cs *cs = ring-cs; - bool has_vm = ((struct r600_common_screen*)rctx-b.screen)-info.r600_virtual_address; - unsigned reloc = r600_context_bo_reloc(rctx, ring, rbo, usage); - - if (!has_vm) { -
[Mesa-dev] [PATCH 10/22] r600g, radeonsi: consolidate get_timestamp, get_driver_query_info
From: Marek Olšák marek.ol...@amd.com This enables more queries for the Gallium HUD with radeonsi. --- src/gallium/drivers/r600/r600_pipe.c | 32 -- src/gallium/drivers/radeon/r600_pipe_common.c | 33 +++ src/gallium/drivers/radeonsi/si_pipe.c| 9 src/gallium/drivers/radeonsi/si_state_draw.c | 1 + 4 files changed, 34 insertions(+), 41 deletions(-) diff --git a/src/gallium/drivers/r600/r600_pipe.c b/src/gallium/drivers/r600/r600_pipe.c index 30cf8c4..182a122 100644 --- a/src/gallium/drivers/r600/r600_pipe.c +++ b/src/gallium/drivers/r600/r600_pipe.c @@ -750,36 +750,6 @@ static void r600_destroy_screen(struct pipe_screen* pscreen) FREE(rscreen); } -static uint64_t r600_get_timestamp(struct pipe_screen *screen) -{ - struct r600_screen *rscreen = (struct r600_screen*)screen; - - return 100 * rscreen-b.ws-query_value(rscreen-b.ws, RADEON_TIMESTAMP) / - rscreen-b.info.r600_clock_crystal_freq; -} - -static int r600_get_driver_query_info(struct pipe_screen *screen, - unsigned index, - struct pipe_driver_query_info *info) -{ - struct r600_screen *rscreen = (struct r600_screen*)screen; - struct pipe_driver_query_info list[] = { - {draw-calls, R600_QUERY_DRAW_CALLS, 0}, - {requested-VRAM, R600_QUERY_REQUESTED_VRAM, rscreen-b.info.vram_size, TRUE}, - {requested-GTT, R600_QUERY_REQUESTED_GTT, rscreen-b.info.gart_size, TRUE}, - {buffer-wait-time, R600_QUERY_BUFFER_WAIT_TIME, 0, FALSE} - }; - - if (!info) - return Elements(list); - - if (index = Elements(list)) - return 0; - - *info = list[index]; - return 1; -} - static struct pipe_resource *r600_resource_create(struct pipe_screen *screen, const struct pipe_resource *templ) { @@ -807,13 +777,11 @@ struct pipe_screen *r600_screen_create(struct radeon_winsys *ws) rscreen-b.b.get_shader_param = r600_get_shader_param; rscreen-b.b.get_paramf = r600_get_paramf; rscreen-b.b.get_compute_param = r600_get_compute_param; - rscreen-b.b.get_timestamp = r600_get_timestamp; if (rscreen-b.info.chip_class = EVERGREEN) { rscreen-b.b.is_format_supported = evergreen_is_format_supported; } else { rscreen-b.b.is_format_supported = r600_is_format_supported; } - rscreen-b.b.get_driver_query_info = r600_get_driver_query_info; if (rscreen-b.info.has_uvd) { rscreen-b.b.get_video_param = ruvd_get_video_param; rscreen-b.b.is_video_format_supported = ruvd_is_format_supported; diff --git a/src/gallium/drivers/radeon/r600_pipe_common.c b/src/gallium/drivers/radeon/r600_pipe_common.c index cabc6ef..ccd27fd 100644 --- a/src/gallium/drivers/radeon/r600_pipe_common.c +++ b/src/gallium/drivers/radeon/r600_pipe_common.c @@ -27,6 +27,7 @@ #include r600_pipe_common.h #include r600_cs.h #include tgsi/tgsi_parse.h +#include util/u_memory.h #include util/u_format_s3tc.h #include util/u_upload_mgr.h #include inttypes.h @@ -188,6 +189,36 @@ static const char* r600_get_name(struct pipe_screen* pscreen) } } +static uint64_t r600_get_timestamp(struct pipe_screen *screen) +{ + struct r600_common_screen *rscreen = (struct r600_common_screen*)screen; + + return 100 * rscreen-ws-query_value(rscreen-ws, RADEON_TIMESTAMP) / + rscreen-info.r600_clock_crystal_freq; +} + +static int r600_get_driver_query_info(struct pipe_screen *screen, + unsigned index, + struct pipe_driver_query_info *info) +{ + struct r600_common_screen *rscreen = (struct r600_common_screen*)screen; + struct pipe_driver_query_info list[] = { + {draw-calls, R600_QUERY_DRAW_CALLS, 0}, + {requested-VRAM, R600_QUERY_REQUESTED_VRAM, rscreen-info.vram_size, TRUE}, + {requested-GTT, R600_QUERY_REQUESTED_GTT, rscreen-info.gart_size, TRUE}, + {buffer-wait-time, R600_QUERY_BUFFER_WAIT_TIME, 0, FALSE} + }; + + if (!info) + return Elements(list); + + if (index = Elements(list)) + return 0; + + *info = list[index]; + return 1; +} + static void r600_fence_reference(struct pipe_screen *screen, struct pipe_fence_handle **ptr, struct pipe_fence_handle *fence) @@ -343,6 +374,8 @@ bool r600_common_screen_init(struct r600_common_screen *rscreen, rscreen-b.get_name = r600_get_name; rscreen-b.get_vendor = r600_get_vendor; + rscreen-b.get_driver_query_info = r600_get_driver_query_info; + rscreen-b.get_timestamp =
[Mesa-dev] [PATCH 12/22] r600g, radeonsi: consolidate get_paramf and get_video_param
From: Marek Olšák marek.ol...@amd.com radeonsi now reports PIPE_VIDEO_CAP_SUPPORTS_PROGRESSIVE = true if UVD support isn't available. It's what all the other drivers do. Also, some #include directives were missing in radeon_uvd.h. --- src/gallium/drivers/r600/r600_pipe.c | 64 - src/gallium/drivers/radeon/r600_pipe_common.c | 67 +++ src/gallium/drivers/radeon/radeon_uvd.h | 3 ++ src/gallium/drivers/radeonsi/si_pipe.c| 54 - 4 files changed, 70 insertions(+), 118 deletions(-) diff --git a/src/gallium/drivers/r600/r600_pipe.c b/src/gallium/drivers/r600/r600_pipe.c index e2d97e5..aa6ebc0 100644 --- a/src/gallium/drivers/r600/r600_pipe.c +++ b/src/gallium/drivers/r600/r600_pipe.c @@ -445,34 +445,6 @@ static int r600_get_param(struct pipe_screen* pscreen, enum pipe_cap param) return 0; } -static float r600_get_paramf(struct pipe_screen* pscreen, -enum pipe_capf param) -{ - struct r600_screen *rscreen = (struct r600_screen *)pscreen; - enum radeon_family family = rscreen-b.family; - - switch (param) { - case PIPE_CAPF_MAX_LINE_WIDTH: - case PIPE_CAPF_MAX_LINE_WIDTH_AA: - case PIPE_CAPF_MAX_POINT_WIDTH: - case PIPE_CAPF_MAX_POINT_WIDTH_AA: - if (family = CHIP_CEDAR) - return 16384.0f; - else - return 8192.0f; - case PIPE_CAPF_MAX_TEXTURE_ANISOTROPY: - return 16.0f; - case PIPE_CAPF_MAX_TEXTURE_LOD_BIAS: - return 16.0f; - case PIPE_CAPF_GUARD_BAND_LEFT: - case PIPE_CAPF_GUARD_BAND_TOP: - case PIPE_CAPF_GUARD_BAND_RIGHT: - case PIPE_CAPF_GUARD_BAND_BOTTOM: - return 0.0f; - } - return 0.0f; -} - static int r600_get_shader_param(struct pipe_screen* pscreen, unsigned shader, enum pipe_shader_cap param) { switch(shader) @@ -536,34 +508,6 @@ static int r600_get_shader_param(struct pipe_screen* pscreen, unsigned shader, e return 0; } -static int r600_get_video_param(struct pipe_screen *screen, - enum pipe_video_profile profile, - enum pipe_video_entrypoint entrypoint, - enum pipe_video_cap param) -{ - switch (param) { - case PIPE_VIDEO_CAP_SUPPORTED: - return vl_profile_supported(screen, profile, entrypoint); - case PIPE_VIDEO_CAP_NPOT_TEXTURES: - return 1; - case PIPE_VIDEO_CAP_MAX_WIDTH: - case PIPE_VIDEO_CAP_MAX_HEIGHT: - return vl_video_buffer_max_size(screen); - case PIPE_VIDEO_CAP_PREFERED_FORMAT: - return PIPE_FORMAT_NV12; - case PIPE_VIDEO_CAP_PREFERS_INTERLACED: - return false; - case PIPE_VIDEO_CAP_SUPPORTS_INTERLACED: - return false; - case PIPE_VIDEO_CAP_SUPPORTS_PROGRESSIVE: - return true; - case PIPE_VIDEO_CAP_MAX_LEVEL: - return vl_level_supported(screen, profile); - default: - return 0; - } -} - const char * r600_llvm_gpu_string(enum radeon_family family) { const char * gpu_family; @@ -775,20 +719,12 @@ struct pipe_screen *r600_screen_create(struct radeon_winsys *ws) rscreen-b.b.destroy = r600_destroy_screen; rscreen-b.b.get_param = r600_get_param; rscreen-b.b.get_shader_param = r600_get_shader_param; - rscreen-b.b.get_paramf = r600_get_paramf; rscreen-b.b.get_compute_param = r600_get_compute_param; if (rscreen-b.info.chip_class = EVERGREEN) { rscreen-b.b.is_format_supported = evergreen_is_format_supported; } else { rscreen-b.b.is_format_supported = r600_is_format_supported; } - if (rscreen-b.info.has_uvd) { - rscreen-b.b.get_video_param = ruvd_get_video_param; - rscreen-b.b.is_video_format_supported = ruvd_is_format_supported; - } else { - rscreen-b.b.get_video_param = r600_get_video_param; - rscreen-b.b.is_video_format_supported = vl_video_buffer_is_format_supported; - } rscreen-b.b.resource_create = r600_resource_create; if (!r600_common_screen_init(rscreen-b, ws)) { diff --git a/src/gallium/drivers/radeon/r600_pipe_common.c b/src/gallium/drivers/radeon/r600_pipe_common.c index ccd27fd..7447eea 100644 --- a/src/gallium/drivers/radeon/r600_pipe_common.c +++ b/src/gallium/drivers/radeon/r600_pipe_common.c @@ -24,12 +24,15 @@ * */ +#include radeon/radeon_uvd.h #include r600_pipe_common.h #include r600_cs.h #include tgsi/tgsi_parse.h #include util/u_memory.h #include util/u_format_s3tc.h #include util/u_upload_mgr.h +#include vl/vl_decoder.h +#include vl/vl_video_buffer.h #include inttypes.h /* @@ -189,6 +192,61 @@ static const char*
[Mesa-dev] [PATCH 05/22] radeonsi: use queries from r600g
From: Marek Olšák marek.ol...@amd.com --- src/gallium/drivers/radeonsi/Makefile.sources | 1 - src/gallium/drivers/radeonsi/si.h | 45 --- src/gallium/drivers/radeonsi/si_blit.c| 22 +- src/gallium/drivers/radeonsi/si_hw_context.c | 525 +- src/gallium/drivers/radeonsi/si_pipe.c| 13 +- src/gallium/drivers/radeonsi/si_pipe.h| 19 - src/gallium/drivers/radeonsi/si_query.c | 147 src/gallium/drivers/radeonsi/si_state.c | 15 + src/gallium/drivers/radeonsi/si_state_draw.c | 10 +- 9 files changed, 40 insertions(+), 757 deletions(-) delete mode 100644 src/gallium/drivers/radeonsi/si_query.c diff --git a/src/gallium/drivers/radeonsi/Makefile.sources b/src/gallium/drivers/radeonsi/Makefile.sources index 33f1492..5629572 100644 --- a/src/gallium/drivers/radeonsi/Makefile.sources +++ b/src/gallium/drivers/radeonsi/Makefile.sources @@ -7,7 +7,6 @@ C_SOURCES := \ si_hw_context.c \ si_pipe.c \ si_pm4.c \ - si_query.c \ si_resource.c \ si_shader.c \ si_state.c \ diff --git a/src/gallium/drivers/radeonsi/si.h b/src/gallium/drivers/radeonsi/si.h index 46184ec..5c69b07 100644 --- a/src/gallium/drivers/radeonsi/si.h +++ b/src/gallium/drivers/radeonsi/si.h @@ -33,56 +33,11 @@ #include si_resource.h struct winsys_handle; - -/* R600/R700 STATES */ -struct si_query { - union { - uint64_tu64; - boolean b; - struct pipe_query_data_so_statistics so; - } result; - /* The kind of query */ - unsignedtype; - /* Offset of the first result for current query */ - unsignedresults_start; - /* Offset of the next free result after current query data */ - unsignedresults_end; - /* Size of the result in memory for both begin_query and end_query, -* this can be one or two numbers, or it could even be a size of a structure. */ - unsignedresult_size; - /* The buffer where query results are stored. It's used as a ring, -* data blocks for current query are stored sequentially from -* results_start to results_end, with wrapping on the buffer end */ - struct r600_resource*buffer; - /* The number of dwords for begin_query or end_query. */ - unsignednum_cs_dw; - /* linked list of queries */ - struct list_headlist; -}; - struct si_context; struct si_screen; -void si_get_backend_mask(struct si_context *ctx); void si_context_flush(struct si_context *ctx, unsigned flags); void si_begin_new_cs(struct si_context *ctx); - -struct si_query *si_context_query_create(struct si_context *ctx, unsigned query_type); -void si_context_query_destroy(struct si_context *ctx, struct si_query *query); -boolean si_context_query_result(struct si_context *ctx, - struct si_query *query, - boolean wait, void *vresult); -void si_query_begin(struct si_context *ctx, struct si_query *query); -void si_query_end(struct si_context *ctx, struct si_query *query); -void si_context_queries_suspend(struct si_context *ctx); -void si_context_queries_resume(struct si_context *ctx); -void si_query_predication(struct si_context *ctx, struct si_query *query, int operation, - int flag_wait); - -bool si_is_timer_query(unsigned type); -bool si_query_needs_begin(unsigned type); void si_need_cs_space(struct si_context *ctx, unsigned num_dw, boolean count_draw_in); -int si_context_init(struct si_context *ctx); - #endif diff --git a/src/gallium/drivers/radeonsi/si_blit.c b/src/gallium/drivers/radeonsi/si_blit.c index ba2ebe7..250caab 100644 --- a/src/gallium/drivers/radeonsi/si_blit.c +++ b/src/gallium/drivers/radeonsi/si_blit.c @@ -51,7 +51,7 @@ static void si_blitter_begin(struct pipe_context *ctx, enum si_blitter_op op) { struct si_context *sctx = (struct si_context *)ctx; - si_context_queries_suspend(sctx); + r600_suspend_nontimer_queries(sctx-b); util_blitter_save_blend(sctx-blitter, sctx-queued.named.blend); util_blitter_save_depth_stencil_alpha(sctx-blitter, sctx-queued.named.dsa); @@ -81,26 +81,18 @@ static void si_blitter_begin(struct pipe_context *ctx, enum si_blitter_op op) sctx-samplers[PIPE_SHADER_FRAGMENT].views.views); } - if ((op SI_DISABLE_RENDER_COND) sctx-current_render_cond) { - sctx-saved_render_cond = sctx-current_render_cond; - sctx-saved_render_cond_cond = sctx-current_render_cond_cond; - sctx-saved_render_cond_mode = sctx-current_render_cond_mode; -
[Mesa-dev] [PATCH 13/22] r600g, radeonsi: consolidate get_compute_param
From: Marek Olšák marek.ol...@amd.com --- src/gallium/drivers/r600/r600_pipe.c | 162 -- src/gallium/drivers/r600/r600_pipe.h | 3 - src/gallium/drivers/radeon/r600_pipe_common.c | 157 + src/gallium/drivers/radeon/r600_pipe_common.h | 1 + src/gallium/drivers/radeonsi/si_pipe.c| 100 src/gallium/drivers/radeonsi/si_pipe.h| 1 - src/gallium/drivers/radeonsi/si_shader.c | 2 +- 7 files changed, 159 insertions(+), 267 deletions(-) diff --git a/src/gallium/drivers/r600/r600_pipe.c b/src/gallium/drivers/r600/r600_pipe.c index aa6ebc0..d95e717 100644 --- a/src/gallium/drivers/r600/r600_pipe.c +++ b/src/gallium/drivers/r600/r600_pipe.c @@ -508,167 +508,6 @@ static int r600_get_shader_param(struct pipe_screen* pscreen, unsigned shader, e return 0; } -const char * r600_llvm_gpu_string(enum radeon_family family) -{ - const char * gpu_family; - - switch (family) { - case CHIP_R600: - case CHIP_RV630: - case CHIP_RV635: - case CHIP_RV670: - gpu_family = r600; - break; - case CHIP_RV610: - case CHIP_RV620: - case CHIP_RS780: - case CHIP_RS880: - gpu_family = rs880; - break; - case CHIP_RV710: - gpu_family = rv710; - break; - case CHIP_RV730: - gpu_family = rv730; - break; - case CHIP_RV740: - case CHIP_RV770: - gpu_family = rv770; - break; - case CHIP_PALM: - case CHIP_CEDAR: - gpu_family = cedar; - break; - case CHIP_SUMO: - case CHIP_SUMO2: - gpu_family = sumo; - break; - case CHIP_REDWOOD: - gpu_family = redwood; - break; - case CHIP_JUNIPER: - gpu_family = juniper; - break; - case CHIP_HEMLOCK: - case CHIP_CYPRESS: - gpu_family = cypress; - break; - case CHIP_BARTS: - gpu_family = barts; - break; - case CHIP_TURKS: - gpu_family = turks; - break; - case CHIP_CAICOS: - gpu_family = caicos; - break; - case CHIP_CAYMAN: -case CHIP_ARUBA: - gpu_family = cayman; - break; - default: - gpu_family = ; - fprintf(stderr, Chip not supported by r600 llvm - backend, please file a bug at PACKAGE_BUGREPORT \n); - break; - } - return gpu_family; -} - - -static int r600_get_compute_param(struct pipe_screen *screen, -enum pipe_compute_cap param, -void *ret) -{ - struct r600_screen *rscreen = (struct r600_screen *)screen; - //TODO: select these params by asic - switch (param) { - case PIPE_COMPUTE_CAP_IR_TARGET: { - const char *gpu = r600_llvm_gpu_string(rscreen-b.family); - if (ret) { - sprintf(ret, %s-r600--, gpu); - } - return (8 + strlen(gpu)) * sizeof(char); - } - case PIPE_COMPUTE_CAP_GRID_DIMENSION: - if (ret) { - uint64_t * grid_dimension = ret; - grid_dimension[0] = 3; - } - return 1 * sizeof(uint64_t); - - case PIPE_COMPUTE_CAP_MAX_GRID_SIZE: - if (ret) { - uint64_t * grid_size = ret; - grid_size[0] = 65535; - grid_size[1] = 65535; - grid_size[2] = 1; - } - return 3 * sizeof(uint64_t) ; - - case PIPE_COMPUTE_CAP_MAX_BLOCK_SIZE: - if (ret) { - uint64_t * block_size = ret; - block_size[0] = 256; - block_size[1] = 256; - block_size[2] = 256; - } - return 3 * sizeof(uint64_t); - - case PIPE_COMPUTE_CAP_MAX_THREADS_PER_BLOCK: - if (ret) { - uint64_t * max_threads_per_block = ret; - *max_threads_per_block = 256; - } - return sizeof(uint64_t); - - case PIPE_COMPUTE_CAP_MAX_GLOBAL_SIZE: - if (ret) { - uint64_t * max_global_size = ret; - /* XXX: This is what the proprietary driver reports, we -* may want to use a different value. */ - *max_global_size = 201326592; - } - return sizeof(uint64_t); - - case PIPE_COMPUTE_CAP_MAX_INPUT_SIZE: - if (ret) { - uint64_t * max_input_size = ret; - *max_input_size = 1024; -
[Mesa-dev] [PATCH 18/22] radeonsi: remove si.h
From: Marek Olšák marek.ol...@amd.com --- src/gallium/drivers/radeonsi/si.h | 43 -- src/gallium/drivers/radeonsi/si_pipe.c | 1 - src/gallium/drivers/radeonsi/si_pipe.h | 6 - 3 files changed, 5 insertions(+), 45 deletions(-) delete mode 100644 src/gallium/drivers/radeonsi/si.h diff --git a/src/gallium/drivers/radeonsi/si.h b/src/gallium/drivers/radeonsi/si.h deleted file mode 100644 index 5c69b07..000 --- a/src/gallium/drivers/radeonsi/si.h +++ /dev/null @@ -1,43 +0,0 @@ -/* - * Copyright 2010 Jerome Glisse gli...@freedesktop.org - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the Software), - * to deal in the Software without restriction, including without limitation - * on the rights to use, copy, modify, merge, publish, distribute, sub - * license, and/or sell copies of the Software, and to permit persons to whom - * the Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED AS IS, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL - * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, - * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR - * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE - * USE OR OTHER DEALINGS IN THE SOFTWARE. - * - * Authors: - * Jerome Glisse - */ -#ifndef SI_H -#define SI_H - -#include ../../winsys/radeon/drm/radeon_winsys.h -#include util/u_double_list.h -#include util/u_transfer.h - -#include si_resource.h - -struct winsys_handle; -struct si_context; -struct si_screen; - -void si_context_flush(struct si_context *ctx, unsigned flags); -void si_begin_new_cs(struct si_context *ctx); -void si_need_cs_space(struct si_context *ctx, unsigned num_dw, boolean count_draw_in); - -#endif diff --git a/src/gallium/drivers/radeonsi/si_pipe.c b/src/gallium/drivers/radeonsi/si_pipe.c index f4df4b1..3d5b998 100644 --- a/src/gallium/drivers/radeonsi/si_pipe.c +++ b/src/gallium/drivers/radeonsi/si_pipe.c @@ -44,7 +44,6 @@ #include pipebuffer/pb_buffer.h #include si_pipe.h #include radeon/radeon_uvd.h -#include si.h #include sid.h #include si_resource.h #include si_pipe.h diff --git a/src/gallium/drivers/radeonsi/si_pipe.h b/src/gallium/drivers/radeonsi/si_pipe.h index fc75854..2828734 100644 --- a/src/gallium/drivers/radeonsi/si_pipe.h +++ b/src/gallium/drivers/radeonsi/si_pipe.h @@ -33,7 +33,6 @@ #include util/u_format.h #include util/u_math.h #include util/u_slab.h -#include si.h #include sid.h #include si_public.h #include si_pm4.h @@ -161,6 +160,11 @@ void si_flush_depth_textures(struct si_context *sctx, void si_decompress_color_textures(struct si_context *sctx, struct si_textures_info *textures); +/* si_hw_context.c */ +void si_context_flush(struct si_context *ctx, unsigned flags); +void si_begin_new_cs(struct si_context *ctx); +void si_need_cs_space(struct si_context *ctx, unsigned num_dw, boolean count_draw_in); + /* si_pipe.c */ void si_flush(struct pipe_context *ctx, struct pipe_fence_handle **fence, unsigned flags); -- 1.8.3.2 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 21/22] radeonsi: remove open-coded PS_PARTIAL_FLUSH event
From: Marek Olšák marek.ol...@amd.com --- src/gallium/drivers/radeonsi/si_hw_context.c | 8 +++- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/src/gallium/drivers/radeonsi/si_hw_context.c b/src/gallium/drivers/radeonsi/si_hw_context.c index bb3d080..2e1e9f2 100644 --- a/src/gallium/drivers/radeonsi/si_hw_context.c +++ b/src/gallium/drivers/radeonsi/si_hw_context.c @@ -107,13 +107,11 @@ void si_context_flush(struct si_context *ctx, unsigned flags) R600_CONTEXT_FLUSH_AND_INV_CB_META | R600_CONTEXT_FLUSH_AND_INV_DB | R600_CONTEXT_FLUSH_AND_INV_DB_META | - R600_CONTEXT_INV_TEX_CACHE; + R600_CONTEXT_INV_TEX_CACHE | + /* this is probably not needed anymore */ + R600_CONTEXT_PS_PARTIAL_FLUSH; si_emit_cache_flush(ctx-b, NULL); - /* this is probably not needed anymore */ - cs-buf[cs-cdw++] = PKT3(PKT3_EVENT_WRITE, 0, 0); - cs-buf[cs-cdw++] = EVENT_TYPE(EVENT_TYPE_PS_PARTIAL_FLUSH) | EVENT_INDEX(4); - /* force to keep tiling flags */ flags |= RADEON_FLUSH_KEEP_TILING_FLAGS; -- 1.8.3.2 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 04/22] r600g: remove a no-op while loop
From: Marek Olšák marek.ol...@amd.com for (;;) { } while (); I was surprised to see such a statement. --- src/gallium/drivers/radeon/r600_query.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/gallium/drivers/radeon/r600_query.c b/src/gallium/drivers/radeon/r600_query.c index bf73a14..915d37a 100644 --- a/src/gallium/drivers/radeon/r600_query.c +++ b/src/gallium/drivers/radeon/r600_query.c @@ -326,7 +326,7 @@ static void r600_emit_query_predication(struct r600_common_context *ctx, struct /* set CONTINUE bit for all packets except the first */ op |= PREDICATION_CONTINUE; } - } while (qbuf); + } } } -- 1.8.3.2 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 06/22] radeonsi: advertise the pipeline statistics query
From: Marek Olšák marek.ol...@amd.com Implemented by the common code. You can now visualize the statistics with the HUD, see GALLIUM_HUD=help for all available queries. For example: GALLIUM_HUD=clipper-primitives-generated --- src/gallium/drivers/radeonsi/si_pipe.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/gallium/drivers/radeonsi/si_pipe.c b/src/gallium/drivers/radeonsi/si_pipe.c index d2bcd5a..6680ee8 100644 --- a/src/gallium/drivers/radeonsi/si_pipe.c +++ b/src/gallium/drivers/radeonsi/si_pipe.c @@ -310,6 +310,7 @@ static int si_get_param(struct pipe_screen* pscreen, enum pipe_cap param) case PIPE_CAP_COMPUTE: case PIPE_CAP_TEXTURE_BUFFER_OBJECTS: case PIPE_CAP_TGSI_VS_LAYER: + case PIPE_CAP_QUERY_PIPELINE_STATISTICS: return 1; case PIPE_CAP_TEXTURE_MULTISAMPLE: @@ -342,7 +343,6 @@ static int si_get_param(struct pipe_screen* pscreen, enum pipe_cap param) case PIPE_CAP_VERTEX_COLOR_CLAMPED: case PIPE_CAP_QUADS_FOLLOW_PROVOKING_VERTEX_CONVENTION: case PIPE_CAP_USER_VERTEX_BUFFERS: - case PIPE_CAP_QUERY_PIPELINE_STATISTICS: case PIPE_CAP_CUBE_MAP_ARRAY: return 0; -- 1.8.3.2 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 17/22] radeonsi: move si_upload_const_buffer to a better place
From: Marek Olšák marek.ol...@amd.com This gets rid of another file. --- src/gallium/drivers/radeonsi/Makefile.sources | 1 - src/gallium/drivers/radeonsi/si_buffer.c | 63 --- src/gallium/drivers/radeonsi/si_descriptors.c | 27 src/gallium/drivers/radeonsi/si_resource.h| 6 --- src/gallium/drivers/radeonsi/si_state.h | 2 + 5 files changed, 29 insertions(+), 70 deletions(-) delete mode 100644 src/gallium/drivers/radeonsi/si_buffer.c diff --git a/src/gallium/drivers/radeonsi/Makefile.sources b/src/gallium/drivers/radeonsi/Makefile.sources index 4e1f971..11b3319 100644 --- a/src/gallium/drivers/radeonsi/Makefile.sources +++ b/src/gallium/drivers/radeonsi/Makefile.sources @@ -1,6 +1,5 @@ C_SOURCES := \ si_blit.c \ - si_buffer.c \ si_commands.c \ si_compute.c \ si_descriptors.c \ diff --git a/src/gallium/drivers/radeonsi/si_buffer.c b/src/gallium/drivers/radeonsi/si_buffer.c deleted file mode 100644 index 7994405..000 --- a/src/gallium/drivers/radeonsi/si_buffer.c +++ /dev/null @@ -1,63 +0,0 @@ -/* - * Copyright 2010 Jerome Glisse gli...@freedesktop.org - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the Software), - * to deal in the Software without restriction, including without limitation - * on the rights to use, copy, modify, merge, publish, distribute, sub - * license, and/or sell copies of the Software, and to permit persons to whom - * the Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED AS IS, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL - * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, - * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR - * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE - * USE OR OTHER DEALINGS IN THE SOFTWARE. - * - * Authors: - * Jerome Glisse - * Corbin Simpson mostawesomed...@gmail.com - */ - -#include pipe/p_screen.h -#include util/u_format.h -#include util/u_math.h -#include util/u_inlines.h -#include util/u_memory.h -#include util/u_upload_mgr.h - -#include si.h -#include si_pipe.h - -void si_upload_const_buffer(struct si_context *sctx, struct r600_resource **rbuffer, - const uint8_t *ptr, unsigned size, - uint32_t *const_offset) -{ - if (SI_BIG_ENDIAN) { - uint32_t *tmpPtr; - unsigned i; - - if (!(tmpPtr = malloc(size))) { - R600_ERR(Failed to allocate BE swap buffer.\n); - return; - } - - for (i = 0; i size / 4; ++i) { - tmpPtr[i] = util_bswap32(((uint32_t *)ptr)[i]); - } - - u_upload_data(sctx-b.uploader, 0, size, tmpPtr, const_offset, - (struct pipe_resource**)rbuffer); - - free(tmpPtr); - } else { - u_upload_data(sctx-b.uploader, 0, size, ptr, const_offset, - (struct pipe_resource**)rbuffer); - } -} diff --git a/src/gallium/drivers/radeonsi/si_descriptors.c b/src/gallium/drivers/radeonsi/si_descriptors.c index e64799d..f29d8bb 100644 --- a/src/gallium/drivers/radeonsi/si_descriptors.c +++ b/src/gallium/drivers/radeonsi/si_descriptors.c @@ -29,6 +29,7 @@ #include si_shader.h #include util/u_memory.h +#include util/u_upload_mgr.h #define SI_NUM_CONTEXTS 16 @@ -400,6 +401,32 @@ static void si_buffer_resources_begin_new_cs(struct si_context *sctx, /* CONSTANT BUFFERS */ +void si_upload_const_buffer(struct si_context *sctx, struct r600_resource **rbuffer, + const uint8_t *ptr, unsigned size, uint32_t *const_offset) +{ + if (SI_BIG_ENDIAN) { + uint32_t *tmpPtr; + unsigned i; + + if (!(tmpPtr = malloc(size))) { + R600_ERR(Failed to allocate BE swap buffer.\n); + return; + } + + for (i = 0; i size / 4; ++i) { + tmpPtr[i] = util_bswap32(((uint32_t *)ptr)[i]); + } + + u_upload_data(sctx-b.uploader, 0, size, tmpPtr, const_offset, + (struct pipe_resource**)rbuffer); + + free(tmpPtr); + } else { + u_upload_data(sctx-b.uploader, 0, size, ptr, const_offset, + (struct pipe_resource**)rbuffer); + } +} + static
[Mesa-dev] [PATCH 20/22] radeonsi: move some inline functions from si_pipe.h to si_state.c
From: Marek Olšák marek.ol...@amd.com And si_tex_aniso_filter is unused. --- src/gallium/drivers/radeonsi/si_pipe.h | 39 - src/gallium/drivers/radeonsi/si_state.c | 30 + 2 files changed, 30 insertions(+), 39 deletions(-) diff --git a/src/gallium/drivers/radeonsi/si_pipe.h b/src/gallium/drivers/radeonsi/si_pipe.h index 69f49d1..38eb128 100644 --- a/src/gallium/drivers/radeonsi/si_pipe.h +++ b/src/gallium/drivers/radeonsi/si_pipe.h @@ -189,45 +189,6 @@ struct pipe_video_buffer *si_video_buffer_create(struct pipe_context *pipe, /* * common helpers */ -static INLINE uint32_t S_FIXED(float value, uint32_t frac_bits) -{ - return value * (1 frac_bits); -} -#define ALIGN_DIVUP(x, y) (((x) + (y) - 1) / (y)) - -static INLINE unsigned si_map_swizzle(unsigned swizzle) -{ - switch (swizzle) { - case UTIL_FORMAT_SWIZZLE_Y: - return V_008F0C_SQ_SEL_Y; - case UTIL_FORMAT_SWIZZLE_Z: - return V_008F0C_SQ_SEL_Z; - case UTIL_FORMAT_SWIZZLE_W: - return V_008F0C_SQ_SEL_W; - case UTIL_FORMAT_SWIZZLE_0: - return V_008F0C_SQ_SEL_0; - case UTIL_FORMAT_SWIZZLE_1: - return V_008F0C_SQ_SEL_1; - default: /* UTIL_FORMAT_SWIZZLE_X */ - return V_008F0C_SQ_SEL_X; - } -} - -static inline unsigned si_tex_aniso_filter(unsigned filter) -{ - if (filter = 1) return 0; - if (filter = 2) return 1; - if (filter = 4) return 2; - if (filter = 8) return 3; -/* else */return 4; -} - -/* 12.4 fixed-point */ -static INLINE unsigned si_pack_float_12p4(float x) -{ - return x = 0? 0 : - x = 4096 ? 0x : x * 16; -} static INLINE struct r600_resource * si_resource_create_custom(struct pipe_screen *screen, diff --git a/src/gallium/drivers/radeonsi/si_state.c b/src/gallium/drivers/radeonsi/si_state.c index 3eab4d9..fd4e26a 100644 --- a/src/gallium/drivers/radeonsi/si_state.c +++ b/src/gallium/drivers/radeonsi/si_state.c @@ -166,6 +166,36 @@ static unsigned cik_db_pipe_config(struct si_screen *sscreen, unsigned tile_mode } } +static INLINE unsigned si_map_swizzle(unsigned swizzle) +{ + switch (swizzle) { + case UTIL_FORMAT_SWIZZLE_Y: + return V_008F0C_SQ_SEL_Y; + case UTIL_FORMAT_SWIZZLE_Z: + return V_008F0C_SQ_SEL_Z; + case UTIL_FORMAT_SWIZZLE_W: + return V_008F0C_SQ_SEL_W; + case UTIL_FORMAT_SWIZZLE_0: + return V_008F0C_SQ_SEL_0; + case UTIL_FORMAT_SWIZZLE_1: + return V_008F0C_SQ_SEL_1; + default: /* UTIL_FORMAT_SWIZZLE_X */ + return V_008F0C_SQ_SEL_X; + } +} + +static INLINE uint32_t S_FIXED(float value, uint32_t frac_bits) +{ + return value * (1 frac_bits); +} + +/* 12.4 fixed-point */ +static INLINE unsigned si_pack_float_12p4(float x) +{ + return x = 0? 0 : + x = 4096 ? 0x : x * 16; +} + /* * inferred framebuffer and blender state */ -- 1.8.3.2 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 15/22] radeonsi: inline si_upload_index_buffer
From: Marek Olšák marek.ol...@amd.com --- src/gallium/drivers/radeonsi/si_buffer.c | 7 --- src/gallium/drivers/radeonsi/si_pipe.h | 5 - src/gallium/drivers/radeonsi/si_state_draw.c | 3 ++- 3 files changed, 2 insertions(+), 13 deletions(-) diff --git a/src/gallium/drivers/radeonsi/si_buffer.c b/src/gallium/drivers/radeonsi/si_buffer.c index 6b05c9f..7994405 100644 --- a/src/gallium/drivers/radeonsi/si_buffer.c +++ b/src/gallium/drivers/radeonsi/si_buffer.c @@ -35,13 +35,6 @@ #include si.h #include si_pipe.h -void si_upload_index_buffer(struct si_context *sctx, - struct pipe_index_buffer *ib, unsigned count) -{ - u_upload_data(sctx-b.uploader, 0, count * ib-index_size, - ib-user_buffer, ib-offset, ib-buffer); -} - void si_upload_const_buffer(struct si_context *sctx, struct r600_resource **rbuffer, const uint8_t *ptr, unsigned size, uint32_t *const_offset) diff --git a/src/gallium/drivers/radeonsi/si_pipe.h b/src/gallium/drivers/radeonsi/si_pipe.h index d7d701b..bb4d82a 100644 --- a/src/gallium/drivers/radeonsi/si_pipe.h +++ b/src/gallium/drivers/radeonsi/si_pipe.h @@ -161,11 +161,6 @@ void si_flush_depth_textures(struct si_context *sctx, void si_decompress_color_textures(struct si_context *sctx, struct si_textures_info *textures); -/* si_buffer.c */ -void si_upload_index_buffer(struct si_context *sctx, - struct pipe_index_buffer *ib, unsigned count); - - /* si_pipe.c */ void si_flush(struct pipe_context *ctx, struct pipe_fence_handle **fence, unsigned flags); diff --git a/src/gallium/drivers/radeonsi/si_state_draw.c b/src/gallium/drivers/radeonsi/si_state_draw.c index 45dd4ba..f325a64 100644 --- a/src/gallium/drivers/radeonsi/si_state_draw.c +++ b/src/gallium/drivers/radeonsi/si_state_draw.c @@ -721,7 +721,8 @@ void si_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info) si_translate_index_buffer(sctx, ib, info-count); if (ib.user_buffer !ib.buffer) { - si_upload_index_buffer(sctx, ib, info-count); + u_upload_data(sctx-b.uploader, 0, info-count * ib.index_size, + ib.user_buffer, ib.offset, ib.buffer); } } -- 1.8.3.2 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 14/22] r600g, radeonsi: consolidate remaining obviously duplicated pipe_screen code
From: Marek Olšák marek.ol...@amd.com --- src/gallium/drivers/r600/r600_pipe.c | 35 ++- src/gallium/drivers/radeon/r600_pipe_common.c | 24 +++--- src/gallium/drivers/radeon/r600_pipe_common.h | 2 +- src/gallium/drivers/radeonsi/si_pipe.c| 26 +--- 4 files changed, 30 insertions(+), 57 deletions(-) diff --git a/src/gallium/drivers/r600/r600_pipe.c b/src/gallium/drivers/r600/r600_pipe.c index d95e717..49521e0 100644 --- a/src/gallium/drivers/r600/r600_pipe.c +++ b/src/gallium/drivers/r600/r600_pipe.c @@ -518,19 +518,11 @@ static void r600_destroy_screen(struct pipe_screen* pscreen) if (!radeon_winsys_unref(rscreen-b.ws)) return; - r600_common_screen_cleanup(rscreen-b); - if (rscreen-global_pool) { compute_memory_pool_delete(rscreen-global_pool); } - if (rscreen-b.trace_bo) { - rscreen-b.ws-buffer_unmap(rscreen-b.trace_bo-cs_buf); - pipe_resource_reference((struct pipe_resource**)rscreen-b.trace_bo, NULL); - } - - rscreen-b.ws-destroy(rscreen-b.ws); - FREE(rscreen); + r600_destroy_common_screen(rscreen-b); } static struct pipe_resource *r600_resource_create(struct pipe_screen *screen, @@ -551,18 +543,11 @@ struct pipe_screen *r600_screen_create(struct radeon_winsys *ws) return NULL; } - ws-query_info(ws, rscreen-b.info); - /* Set functions first. */ rscreen-b.b.context_create = r600_create_context; rscreen-b.b.destroy = r600_destroy_screen; rscreen-b.b.get_param = r600_get_param; rscreen-b.b.get_shader_param = r600_get_shader_param; - if (rscreen-b.info.chip_class = EVERGREEN) { - rscreen-b.b.is_format_supported = evergreen_is_format_supported; - } else { - rscreen-b.b.is_format_supported = r600_is_format_supported; - } rscreen-b.b.resource_create = r600_resource_create; if (!r600_common_screen_init(rscreen-b, ws)) { @@ -570,6 +555,12 @@ struct pipe_screen *r600_screen_create(struct radeon_winsys *ws) return NULL; } + if (rscreen-b.info.chip_class = EVERGREEN) { + rscreen-b.b.is_format_supported = evergreen_is_format_supported; + } else { + rscreen-b.b.is_format_supported = r600_is_format_supported; + } + rscreen-b.debug_flags |= debug_get_flags_option(R600_DEBUG, r600_debug_options, 0); if (debug_get_bool_option(R600_DEBUG_COMPUTE, FALSE)) rscreen-b.debug_flags |= DBG_COMPUTE; @@ -632,18 +623,6 @@ struct pipe_screen *r600_screen_create(struct radeon_winsys *ws) rscreen-global_pool = compute_memory_pool_new(rscreen); - rscreen-b.cs_count = 0; - if (rscreen-b.info.drm_minor = 28 (rscreen-b.debug_flags DBG_TRACE_CS)) { - rscreen-b.trace_bo = (struct r600_resource*)pipe_buffer_create(rscreen-b.b, - PIPE_BIND_CUSTOM, - PIPE_USAGE_STAGING, - 4096); - if (rscreen-b.trace_bo) { - rscreen-b.trace_ptr = rscreen-b.ws-buffer_map(rscreen-b.trace_bo-cs_buf, NULL, - PIPE_TRANSFER_UNSYNCHRONIZED); - } - } - /* Create the auxiliary context. This must be done last. */ rscreen-b.aux_context = rscreen-b.b.context_create(rscreen-b.b, NULL); diff --git a/src/gallium/drivers/radeon/r600_pipe_common.c b/src/gallium/drivers/radeon/r600_pipe_common.c index 931c91c..396ff86 100644 --- a/src/gallium/drivers/radeon/r600_pipe_common.c +++ b/src/gallium/drivers/radeon/r600_pipe_common.c @@ -616,17 +616,35 @@ bool r600_common_screen_init(struct r600_common_screen *rscreen, if (!r600_init_tiling(rscreen)) { return false; } - util_format_s3tc_init(); - pipe_mutex_init(rscreen-aux_context_lock); + + if (rscreen-info.drm_minor = 28 (rscreen-debug_flags DBG_TRACE_CS)) { + rscreen-trace_bo = (struct r600_resource*)pipe_buffer_create(rscreen-b, + PIPE_BIND_CUSTOM, + PIPE_USAGE_STAGING, + 4096); + if (rscreen-trace_bo) { + rscreen-trace_ptr = rscreen-ws-buffer_map(rscreen-trace_bo-cs_buf, NULL, + PIPE_TRANSFER_UNSYNCHRONIZED); + } + } + return
[Mesa-dev] [PATCH 07/22] r600g, radeonsi: consolidate the contents of r600_resource.c
From: Marek Olšák marek.ol...@amd.com --- src/gallium/drivers/r600/Makefile.sources | 1 - src/gallium/drivers/r600/r600_pipe.c | 13 - src/gallium/drivers/r600/r600_pipe.h | 3 -- src/gallium/drivers/r600/r600_resource.c | 76 --- src/gallium/drivers/r600/r600_resource.h | 3 -- src/gallium/drivers/radeon/r600_pipe_common.c | 19 +++ src/gallium/drivers/radeon/r600_pipe_common.h | 6 +-- src/gallium/drivers/radeon/r600_texture.c | 18 --- src/gallium/drivers/radeonsi/Makefile.sources | 1 - src/gallium/drivers/radeonsi/si_pipe.c| 2 - src/gallium/drivers/radeonsi/si_pipe.h| 3 -- src/gallium/drivers/radeonsi/si_resource.c| 61 - src/gallium/drivers/radeonsi/si_resource.h| 2 - 13 files changed, 45 insertions(+), 163 deletions(-) delete mode 100644 src/gallium/drivers/r600/r600_resource.c delete mode 100644 src/gallium/drivers/radeonsi/si_resource.c diff --git a/src/gallium/drivers/r600/Makefile.sources b/src/gallium/drivers/r600/Makefile.sources index f04e156..82560fc 100644 --- a/src/gallium/drivers/r600/Makefile.sources +++ b/src/gallium/drivers/r600/Makefile.sources @@ -4,7 +4,6 @@ C_SOURCES = \ r600_hw_context.c \ r600_isa.c \ r600_pipe.c \ - r600_resource.c \ r600_shader.c \ r600_state.c \ r700_asm.c \ diff --git a/src/gallium/drivers/r600/r600_pipe.c b/src/gallium/drivers/r600/r600_pipe.c index f9f7b0e..9c0cb0d 100644 --- a/src/gallium/drivers/r600/r600_pipe.c +++ b/src/gallium/drivers/r600/r600_pipe.c @@ -211,7 +211,6 @@ static struct pipe_context *r600_create_context(struct pipe_screen *screen, void rctx-keep_tiling_flags = rscreen-b.info.drm_minor = 12; r600_init_blit_functions(rctx); - r600_init_context_resource_functions(rctx); if (rscreen-b.info.has_uvd) { rctx-b.b.create_video_codec = r600_uvd_create_decoder; @@ -824,6 +823,16 @@ static int r600_get_driver_query_info(struct pipe_screen *screen, return 1; } +static struct pipe_resource *r600_resource_create(struct pipe_screen *screen, + const struct pipe_resource *templ) +{ + if (templ-target == PIPE_BUFFER + (templ-bind PIPE_BIND_GLOBAL)) + return r600_compute_global_buffer_create(screen, templ); + + return r600_resource_create_common(screen, templ); +} + struct pipe_screen *r600_screen_create(struct radeon_winsys *ws) { struct r600_screen *rscreen = CALLOC_STRUCT(r600_screen); @@ -857,7 +866,7 @@ struct pipe_screen *r600_screen_create(struct radeon_winsys *ws) rscreen-b.b.get_video_param = r600_get_video_param; rscreen-b.b.is_video_format_supported = vl_video_buffer_is_format_supported; } - r600_init_screen_resource_functions(rscreen-b.b); + rscreen-b.b.resource_create = r600_resource_create; if (!r600_common_screen_init(rscreen-b, ws)) { FREE(rscreen); diff --git a/src/gallium/drivers/r600/r600_pipe.h b/src/gallium/drivers/r600/r600_pipe.h index a3e4c24..7f4f482 100644 --- a/src/gallium/drivers/r600/r600_pipe.h +++ b/src/gallium/drivers/r600/r600_pipe.h @@ -545,9 +545,6 @@ void r600_decompress_color_textures(struct r600_context *rctx, /* r600_pipe.c */ const char * r600_llvm_gpu_string(enum radeon_family family); -/* r600_resource.c */ -void r600_init_context_resource_functions(struct r600_context *r600); - /* r600_shader.c */ int r600_pipe_shader_create(struct pipe_context *ctx, struct r600_pipe_shader *shader, diff --git a/src/gallium/drivers/r600/r600_resource.c b/src/gallium/drivers/r600/r600_resource.c deleted file mode 100644 index a8fa357..000 --- a/src/gallium/drivers/r600/r600_resource.c +++ /dev/null @@ -1,76 +0,0 @@ -/* - * Copyright 2010 Marek Olšák mar...@gmail.com - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the Software), - * to deal in the Software without restriction, including without limitation - * on the rights to use, copy, modify, merge, publish, distribute, sub - * license, and/or sell copies of the Software, and to permit persons to whom - * the Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED AS IS, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL - * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, - * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR - *
[Mesa-dev] [PATCH 11/22] r600g, radeonsi: consolidate variables for CS tracing
From: Marek Olšák marek.ol...@amd.com --- src/gallium/drivers/r600/r600_hw_context.c| 6 +++--- src/gallium/drivers/r600/r600_pipe.c | 18 +- src/gallium/drivers/r600/r600_pipe.h | 5 + src/gallium/drivers/r600/r600_state_common.c | 8 src/gallium/drivers/radeon/r600_pipe_common.h | 4 src/gallium/drivers/radeonsi/si_hw_context.c | 22 +++--- src/gallium/drivers/radeonsi/si_pipe.c| 14 +++--- src/gallium/drivers/radeonsi/si_pipe.h| 5 - src/gallium/drivers/radeonsi/si_pm4.c | 4 ++-- src/gallium/drivers/radeonsi/si_state_draw.c | 2 +- 10 files changed, 42 insertions(+), 46 deletions(-) diff --git a/src/gallium/drivers/r600/r600_hw_context.c b/src/gallium/drivers/r600/r600_hw_context.c index 790363f..23c9c1c 100644 --- a/src/gallium/drivers/r600/r600_hw_context.c +++ b/src/gallium/drivers/r600/r600_hw_context.c @@ -54,7 +54,7 @@ void r600_need_cs_space(struct r600_context *ctx, unsigned num_dw, for (i = 0; i R600_NUM_ATOMS; i++) { if (ctx-atoms[i] ctx-atoms[i]-dirty) { num_dw += ctx-atoms[i]-num_dw; - if (ctx-screen-trace_bo) { + if (ctx-screen-b.trace_bo) { num_dw += R600_TRACE_CS_DWORDS; } } @@ -62,7 +62,7 @@ void r600_need_cs_space(struct r600_context *ctx, unsigned num_dw, /* The upper-bound of how much space a draw command would take. */ num_dw += R600_MAX_FLUSH_CS_DWORDS + R600_MAX_DRAW_CS_DWORDS; - if (ctx-screen-trace_bo) { + if (ctx-screen-b.trace_bo) { num_dw += R600_TRACE_CS_DWORDS; } } @@ -270,7 +270,7 @@ void r600_context_flush(struct r600_context *ctx, unsigned flags) } /* Flush the CS. */ - ctx-b.ws-cs_flush(ctx-b.rings.gfx.cs, flags, ctx-screen-cs_count++); + ctx-b.ws-cs_flush(ctx-b.rings.gfx.cs, flags, ctx-screen-b.cs_count++); ctx-skip_surface_sync_on_next_cs_flush = false; } diff --git a/src/gallium/drivers/r600/r600_pipe.c b/src/gallium/drivers/r600/r600_pipe.c index 182a122..e2d97e5 100644 --- a/src/gallium/drivers/r600/r600_pipe.c +++ b/src/gallium/drivers/r600/r600_pipe.c @@ -259,8 +259,8 @@ static struct pipe_context *r600_create_context(struct pipe_screen *screen, void goto fail; } - if (rscreen-trace_bo) { - rctx-b.rings.gfx.cs = rctx-b.ws-cs_create(rctx-b.ws, RING_GFX, rscreen-trace_bo-cs_buf); + if (rscreen-b.trace_bo) { + rctx-b.rings.gfx.cs = rctx-b.ws-cs_create(rctx-b.ws, RING_GFX, rscreen-b.trace_bo-cs_buf); } else { rctx-b.rings.gfx.cs = rctx-b.ws-cs_create(rctx-b.ws, RING_GFX, NULL); } @@ -741,9 +741,9 @@ static void r600_destroy_screen(struct pipe_screen* pscreen) compute_memory_pool_delete(rscreen-global_pool); } - if (rscreen-trace_bo) { - rscreen-b.ws-buffer_unmap(rscreen-trace_bo-cs_buf); - pipe_resource_reference((struct pipe_resource**)rscreen-trace_bo, NULL); + if (rscreen-b.trace_bo) { + rscreen-b.ws-buffer_unmap(rscreen-b.trace_bo-cs_buf); + pipe_resource_reference((struct pipe_resource**)rscreen-b.trace_bo, NULL); } rscreen-b.ws-destroy(rscreen-b.ws); @@ -858,14 +858,14 @@ struct pipe_screen *r600_screen_create(struct radeon_winsys *ws) rscreen-global_pool = compute_memory_pool_new(rscreen); - rscreen-cs_count = 0; + rscreen-b.cs_count = 0; if (rscreen-b.info.drm_minor = 28 (rscreen-b.debug_flags DBG_TRACE_CS)) { - rscreen-trace_bo = (struct r600_resource*)pipe_buffer_create(rscreen-b.b, + rscreen-b.trace_bo = (struct r600_resource*)pipe_buffer_create(rscreen-b.b, PIPE_BIND_CUSTOM, PIPE_USAGE_STAGING, 4096); - if (rscreen-trace_bo) { - rscreen-trace_ptr = rscreen-b.ws-buffer_map(rscreen-trace_bo-cs_buf, NULL, + if (rscreen-b.trace_bo) { + rscreen-b.trace_ptr = rscreen-b.ws-buffer_map(rscreen-b.trace_bo-cs_buf, NULL, PIPE_TRANSFER_UNSYNCHRONIZED); } } diff --git a/src/gallium/drivers/r600/r600_pipe.h b/src/gallium/drivers/r600/r600_pipe.h index 7f4f482..bdaeb32 100644 --- a/src/gallium/drivers/r600/r600_pipe.h +++ b/src/gallium/drivers/r600/r600_pipe.h @@ -204,9 +204,6 @@ struct
[Mesa-dev] [PATCH 08/22] radeon: place context-related functions first in r600_pipe_common.c
From: Marek Olšák marek.ol...@amd.com To follow the unwritten convention of r600g and radeonsi. --- src/gallium/drivers/radeon/r600_pipe_common.c | 166 ++ 1 file changed, 87 insertions(+), 79 deletions(-) diff --git a/src/gallium/drivers/radeon/r600_pipe_common.c b/src/gallium/drivers/radeon/r600_pipe_common.c index 031f858..7462d43 100644 --- a/src/gallium/drivers/radeon/r600_pipe_common.c +++ b/src/gallium/drivers/radeon/r600_pipe_common.c @@ -31,6 +31,93 @@ #include util/u_upload_mgr.h #include inttypes.h +/* + * pipe_context + */ + +bool r600_common_context_init(struct r600_common_context *rctx, + struct r600_common_screen *rscreen) +{ + util_slab_create(rctx-pool_transfers, +sizeof(struct r600_transfer), 64, +UTIL_SLAB_SINGLETHREADED); + + rctx-screen = rscreen; + rctx-ws = rscreen-ws; + rctx-family = rscreen-family; + rctx-chip_class = rscreen-chip_class; + rctx-max_db = rscreen-chip_class = EVERGREEN ? 8 : 4; + + rctx-b.transfer_map = u_transfer_map_vtbl; + rctx-b.transfer_flush_region = u_default_transfer_flush_region; + rctx-b.transfer_unmap = u_transfer_unmap_vtbl; + rctx-b.transfer_inline_write = u_default_transfer_inline_write; + + r600_streamout_init(rctx); + r600_query_init(rctx); + + rctx-allocator_so_filled_size = u_suballocator_create(rctx-b, 4096, 4, + 0, PIPE_USAGE_STATIC, TRUE); + if (!rctx-allocator_so_filled_size) + return false; + + rctx-uploader = u_upload_create(rctx-b, 1024 * 1024, 256, + PIPE_BIND_INDEX_BUFFER | + PIPE_BIND_CONSTANT_BUFFER); + if (!rctx-uploader) + return false; + + return true; +} + +void r600_common_context_cleanup(struct r600_common_context *rctx) +{ + if (rctx-rings.gfx.cs) { + rctx-ws-cs_destroy(rctx-rings.gfx.cs); + } + if (rctx-rings.dma.cs) { + rctx-ws-cs_destroy(rctx-rings.dma.cs); + } + + if (rctx-uploader) { + u_upload_destroy(rctx-uploader); + } + + util_slab_destroy(rctx-pool_transfers); + + if (rctx-allocator_so_filled_size) { + u_suballocator_destroy(rctx-allocator_so_filled_size); + } +} + +void r600_context_add_resource_size(struct pipe_context *ctx, struct pipe_resource *r) +{ + struct r600_common_context *rctx = (struct r600_common_context *)ctx; + struct r600_resource *rr = (struct r600_resource *)r; + + if (r == NULL) { + return; + } + + /* +* The idea is to compute a gross estimate of memory requirement of +* each draw call. After each draw call, memory will be precisely +* accounted. So the uncertainty is only on the current draw call. +* In practice this gave very good estimate (+/- 10% of the target +* memory limit). +*/ + if (rr-domains RADEON_DOMAIN_GTT) { + rctx-gtt += rr-buf-size; + } + if (rr-domains RADEON_DOMAIN_VRAM) { + rctx-vram += rr-buf-size; + } +} + +/* + * pipe_screen + */ + static const struct debug_named_value common_debug_options[] = { /* logging */ { tex, DBG_TEX, Print texture info }, @@ -235,85 +322,6 @@ void r600_common_screen_cleanup(struct r600_common_screen *rscreen) rscreen-aux_context-destroy(rscreen-aux_context); } -bool r600_common_context_init(struct r600_common_context *rctx, - struct r600_common_screen *rscreen) -{ - util_slab_create(rctx-pool_transfers, -sizeof(struct r600_transfer), 64, -UTIL_SLAB_SINGLETHREADED); - - rctx-screen = rscreen; - rctx-ws = rscreen-ws; - rctx-family = rscreen-family; - rctx-chip_class = rscreen-chip_class; - rctx-max_db = rscreen-chip_class = EVERGREEN ? 8 : 4; - - rctx-b.transfer_map = u_transfer_map_vtbl; - rctx-b.transfer_flush_region = u_default_transfer_flush_region; - rctx-b.transfer_unmap = u_transfer_unmap_vtbl; - rctx-b.transfer_inline_write = u_default_transfer_inline_write; - - r600_streamout_init(rctx); - r600_query_init(rctx); - - rctx-allocator_so_filled_size = u_suballocator_create(rctx-b, 4096, 4, - 0, PIPE_USAGE_STATIC, TRUE); - if (!rctx-allocator_so_filled_size) - return false; - - rctx-uploader = u_upload_create(rctx-b, 1024 * 1024, 256, - PIPE_BIND_INDEX_BUFFER | - PIPE_BIND_CONSTANT_BUFFER); - if (!rctx-uploader) - return false; - - return true; -} - -void
[Mesa-dev] [PATCH 22/22] radeonsi: cleanup includes, add missing license
From: Marek Olšák marek.ol...@amd.com --- src/gallium/drivers/radeonsi/si_blit.c| 5 ++--- src/gallium/drivers/radeonsi/si_commands.c| 1 - src/gallium/drivers/radeonsi/si_compute.c | 25 +++ src/gallium/drivers/radeonsi/si_descriptors.c | 1 + src/gallium/drivers/radeonsi/si_hw_context.c | 7 +-- src/gallium/drivers/radeonsi/si_pipe.c| 29 +-- src/gallium/drivers/radeonsi/si_pipe.h| 10 - src/gallium/drivers/radeonsi/si_pm4.c | 1 - src/gallium/drivers/radeonsi/si_shader.c | 8 src/gallium/drivers/radeonsi/si_state.c | 21 +-- src/gallium/drivers/radeonsi/si_state_draw.c | 13 ++-- src/gallium/drivers/radeonsi/si_uvd.c | 14 - 12 files changed, 49 insertions(+), 86 deletions(-) diff --git a/src/gallium/drivers/radeonsi/si_blit.c b/src/gallium/drivers/radeonsi/si_blit.c index 250caab..aa3177a 100644 --- a/src/gallium/drivers/radeonsi/si_blit.c +++ b/src/gallium/drivers/radeonsi/si_blit.c @@ -20,11 +20,10 @@ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE * USE OR OTHER DEALINGS IN THE SOFTWARE. */ -#include util/u_surface.h + +#include si_pipe.h #include util/u_blitter.h #include util/u_format.h -#include si_pipe.h -#include si_state.h enum si_blitter_op /* bitmask */ { diff --git a/src/gallium/drivers/radeonsi/si_commands.c b/src/gallium/drivers/radeonsi/si_commands.c index a020ac3..5ddc40e 100644 --- a/src/gallium/drivers/radeonsi/si_commands.c +++ b/src/gallium/drivers/radeonsi/si_commands.c @@ -26,7 +26,6 @@ #include sid.h #include si_pipe.h -#include si_pm4.h void si_cmd_context_control(struct si_pm4_state *pm4) { diff --git a/src/gallium/drivers/radeonsi/si_compute.c b/src/gallium/drivers/radeonsi/si_compute.c index 3aea799..a7f49e7 100644 --- a/src/gallium/drivers/radeonsi/si_compute.c +++ b/src/gallium/drivers/radeonsi/si_compute.c @@ -1,8 +1,33 @@ +/* + * Copyright 2013 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the Software), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED AS IS, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + * + */ + #include util/u_memory.h #include ../radeon/r600_cs.h #include si_pipe.h #include si_shader.h +#include sid.h #include radeon_llvm_util.h diff --git a/src/gallium/drivers/radeonsi/si_descriptors.c b/src/gallium/drivers/radeonsi/si_descriptors.c index d48dbc0..5a7fac1 100644 --- a/src/gallium/drivers/radeonsi/si_descriptors.c +++ b/src/gallium/drivers/radeonsi/si_descriptors.c @@ -26,6 +26,7 @@ #include ../radeon/r600_cs.h #include si_pipe.h #include si_shader.h +#include sid.h #include util/u_memory.h #include util/u_upload_mgr.h diff --git a/src/gallium/drivers/radeonsi/si_hw_context.c b/src/gallium/drivers/radeonsi/si_hw_context.c index 2e1e9f2..7e40255 100644 --- a/src/gallium/drivers/radeonsi/si_hw_context.c +++ b/src/gallium/drivers/radeonsi/si_hw_context.c @@ -23,13 +23,8 @@ * Authors: * Jerome Glisse */ -#include ../radeon/r600_cs.h -#include sid.h -#include si_pm4.h -#include si_pipe.h -#include util/u_memory.h -#include errno.h +#include si_pipe.h /* initialize */ void si_need_cs_space(struct si_context *ctx, unsigned num_dw, diff --git a/src/gallium/drivers/radeonsi/si_pipe.c b/src/gallium/drivers/radeonsi/si_pipe.c index 85a9177..a08f872 100644 --- a/src/gallium/drivers/radeonsi/si_pipe.c +++ b/src/gallium/drivers/radeonsi/si_pipe.c @@ -20,34 +20,15 @@ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE * USE OR OTHER DEALINGS IN THE SOFTWARE. */ -#include stdio.h -#include errno.h -#include pipe/p_defines.h -#include pipe/p_state.h -#include pipe/p_context.h -#include tgsi/tgsi_scan.h -#include tgsi/tgsi_parse.h -#include tgsi/tgsi_util.h + +#include si_pipe.h +#include si_public.h + +#include radeon/radeon_uvd.h #include util/u_blitter.h -#include util/u_double_list.h -#include
[Mesa-dev] [PATCH 16/22] radeonsi: inline si_translate_index_buffer
From: Marek Olšák marek.ol...@amd.com --- src/gallium/drivers/radeonsi/Makefile.sources | 1 - src/gallium/drivers/radeonsi/si_pipe.h| 5 --- src/gallium/drivers/radeonsi/si_state_draw.c | 20 +- src/gallium/drivers/radeonsi/si_translate.c | 53 --- 4 files changed, 19 insertions(+), 60 deletions(-) delete mode 100644 src/gallium/drivers/radeonsi/si_translate.c diff --git a/src/gallium/drivers/radeonsi/Makefile.sources b/src/gallium/drivers/radeonsi/Makefile.sources index c24eb75..4e1f971 100644 --- a/src/gallium/drivers/radeonsi/Makefile.sources +++ b/src/gallium/drivers/radeonsi/Makefile.sources @@ -10,5 +10,4 @@ C_SOURCES := \ si_shader.c \ si_state.c \ si_state_draw.c \ - si_translate.c \ si_uvd.c diff --git a/src/gallium/drivers/radeonsi/si_pipe.h b/src/gallium/drivers/radeonsi/si_pipe.h index bb4d82a..fc75854 100644 --- a/src/gallium/drivers/radeonsi/si_pipe.h +++ b/src/gallium/drivers/radeonsi/si_pipe.h @@ -165,11 +165,6 @@ void si_decompress_color_textures(struct si_context *sctx, void si_flush(struct pipe_context *ctx, struct pipe_fence_handle **fence, unsigned flags); -/* si_translate.c */ -void si_translate_index_buffer(struct si_context *sctx, - struct pipe_index_buffer *ib, - unsigned count); - #if SI_TRACE_CS void si_trace_emit(struct si_context *sctx); #endif diff --git a/src/gallium/drivers/radeonsi/si_state_draw.c b/src/gallium/drivers/radeonsi/si_state_draw.c index f325a64..8dbf373 100644 --- a/src/gallium/drivers/radeonsi/si_state_draw.c +++ b/src/gallium/drivers/radeonsi/si_state_draw.c @@ -27,6 +27,8 @@ #include util/u_memory.h #include util/u_framebuffer.h #include util/u_blitter.h +#include util/u_index_modify.h +#include util/u_upload_mgr.h #include tgsi/tgsi_parse.h #include si_pipe.h #include si_shader.h @@ -718,7 +720,23 @@ void si_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info) ib.offset = sctx-index_buffer.offset + info-start * ib.index_size; /* Translate or upload, if needed. */ - si_translate_index_buffer(sctx, ib, info-count); + if (ib.index_size == 1) { + struct pipe_resource *out_buffer = NULL; + unsigned out_offset; + void *ptr; + + u_upload_alloc(sctx-b.uploader, 0, info-count * 2, + out_offset, out_buffer, ptr); + + util_shorten_ubyte_elts_to_userptr( + sctx-b.b, ib, 0, ib.offset, info-count, ptr); + + pipe_resource_reference(ib.buffer, NULL); + ib.user_buffer = NULL; + ib.buffer = out_buffer; + ib.offset = out_offset; + ib.index_size = 2; + } if (ib.user_buffer !ib.buffer) { u_upload_data(sctx-b.uploader, 0, info-count * ib.index_size, diff --git a/src/gallium/drivers/radeonsi/si_translate.c b/src/gallium/drivers/radeonsi/si_translate.c deleted file mode 100644 index be9d621..000 --- a/src/gallium/drivers/radeonsi/si_translate.c +++ /dev/null @@ -1,53 +0,0 @@ -/* - * Copyright 2010 Red Hat Inc. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the Software), - * to deal in the Software without restriction, including without limitation - * on the rights to use, copy, modify, merge, publish, distribute, sub - * license, and/or sell copies of the Software, and to permit persons to whom - * the Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED AS IS, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL - * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, - * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR - * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE - * USE OR OTHER DEALINGS IN THE SOFTWARE. - * - * Authors: Dave Airlie airl...@redhat.com - */ - -#include util/u_index_modify.h -#include util/u_upload_mgr.h -#include si_pipe.h - - -void si_translate_index_buffer(struct si_context *sctx, - struct pipe_index_buffer *ib, - unsigned count) -{ - struct pipe_resource *out_buffer = NULL; - unsigned out_offset; - void *ptr; - - switch (ib-index_size) { - case
Re: [Mesa-dev] [PATCH] rtasm: deal with size overflows by casting to ptrdiff_t
Does Nouveau still work if you report PIPE_CAP_USER_VERTEX_BUFFERS = 0? Marek On Wed, Jan 22, 2014 at 3:37 AM, Ilia Mirkin imir...@alum.mit.edu wrote: This was discovered as a result of the draw-elements-base-vertex-neg piglit test, which passes very negative offsets in, followed up by large indices. The nouveau code correctly adjusts the pointer, but the transfer code needs to do the proper inverse correction. Similarly fix up the SSE code to do a 64-bit multiply to compute the proper offset. Signed-off-by: Ilia Mirkin imir...@alum.mit.edu --- With this change, nouveau passes for the draw-elements-base-vertex-neg piglit test with user_varrays, on a 64-bit setup both with and without GALLIUM_NOSSE=1. I'm pretty sure that the change should be minimal to a non-x86 setup since the rexw will be a no-op. I guess there will be an extra register use for the mov, but it shouldn't be too expensive, esp on anything remotely current. src/gallium/auxiliary/translate/translate_generic.c | 2 +- src/gallium/auxiliary/translate/translate_sse.c | 8 ++-- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/src/gallium/auxiliary/translate/translate_generic.c b/src/gallium/auxiliary/translate/translate_generic.c index 5bf97db..5ffce32 100644 --- a/src/gallium/auxiliary/translate/translate_generic.c +++ b/src/gallium/auxiliary/translate/translate_generic.c @@ -638,7 +638,7 @@ static ALWAYS_INLINE void PIPE_CDECL generic_run_one( struct translate_generic * } src = tg-attrib[attr].input_ptr + - tg-attrib[attr].input_stride * index; + (ptrdiff_t)tg-attrib[attr].input_stride * index; copy_size = tg-attrib[attr].copy_size; if(likely(copy_size = 0)) diff --git a/src/gallium/auxiliary/translate/translate_sse.c b/src/gallium/auxiliary/translate/translate_sse.c index a78ea91..a72454a 100644 --- a/src/gallium/auxiliary/translate/translate_sse.c +++ b/src/gallium/auxiliary/translate/translate_sse.c @@ -1121,7 +1121,9 @@ static boolean init_inputs( struct translate_sse *p, x86_cmovcc(p-func, tmp_EAX, buf_max_index, cc_AE); } - x86_imul(p-func, tmp_EAX, buf_stride); + x86_mov(p-func, p-tmp2_EDX, buf_stride); + x64_rexw(p-func); + x86_imul(p-func, tmp_EAX, p-tmp2_EDX); x64_rexw(p-func); x86_add(p-func, tmp_EAX, buf_base_ptr); @@ -1207,7 +1209,9 @@ static struct x86_reg get_buffer_ptr( struct translate_sse *p, x86_cmp(p-func, ptr, buf_max_index); x86_cmovcc(p-func, ptr, buf_max_index, cc_AE); - x86_imul(p-func, ptr, buf_stride); + x86_mov(p-func, p-tmp2_EDX, buf_stride); + x64_rexw(p-func); + x86_imul(p-func, ptr, p-tmp2_EDX); x64_rexw(p-func); x86_add(p-func, ptr, buf_base_ptr); return ptr; -- 1.8.3.2 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH] rtasm: deal with size overflows by casting to ptrdiff_t
On Wed, Jan 22, 2014 at 3:27 PM, Marek Olšák mar...@gmail.com wrote: Does Nouveau still work if you report PIPE_CAP_USER_VERTEX_BUFFERS = 0? I'm not in front of a machine with nouveau, so I can't tell you right now, but I'll test it out later tonight. Out of curiousity though, why do you ask? Is it related to this patch, or just idle curiiousity on your end? Marek On Wed, Jan 22, 2014 at 3:37 AM, Ilia Mirkin imir...@alum.mit.edu wrote: This was discovered as a result of the draw-elements-base-vertex-neg piglit test, which passes very negative offsets in, followed up by large indices. The nouveau code correctly adjusts the pointer, but the transfer code needs to do the proper inverse correction. Similarly fix up the SSE code to do a 64-bit multiply to compute the proper offset. Signed-off-by: Ilia Mirkin imir...@alum.mit.edu --- With this change, nouveau passes for the draw-elements-base-vertex-neg piglit test with user_varrays, on a 64-bit setup both with and without GALLIUM_NOSSE=1. I'm pretty sure that the change should be minimal to a non-x86 setup since the rexw will be a no-op. I guess there will be an extra register use for the mov, but it shouldn't be too expensive, esp on anything remotely current. src/gallium/auxiliary/translate/translate_generic.c | 2 +- src/gallium/auxiliary/translate/translate_sse.c | 8 ++-- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/src/gallium/auxiliary/translate/translate_generic.c b/src/gallium/auxiliary/translate/translate_generic.c index 5bf97db..5ffce32 100644 --- a/src/gallium/auxiliary/translate/translate_generic.c +++ b/src/gallium/auxiliary/translate/translate_generic.c @@ -638,7 +638,7 @@ static ALWAYS_INLINE void PIPE_CDECL generic_run_one( struct translate_generic * } src = tg-attrib[attr].input_ptr + - tg-attrib[attr].input_stride * index; + (ptrdiff_t)tg-attrib[attr].input_stride * index; copy_size = tg-attrib[attr].copy_size; if(likely(copy_size = 0)) diff --git a/src/gallium/auxiliary/translate/translate_sse.c b/src/gallium/auxiliary/translate/translate_sse.c index a78ea91..a72454a 100644 --- a/src/gallium/auxiliary/translate/translate_sse.c +++ b/src/gallium/auxiliary/translate/translate_sse.c @@ -1121,7 +1121,9 @@ static boolean init_inputs( struct translate_sse *p, x86_cmovcc(p-func, tmp_EAX, buf_max_index, cc_AE); } - x86_imul(p-func, tmp_EAX, buf_stride); + x86_mov(p-func, p-tmp2_EDX, buf_stride); + x64_rexw(p-func); + x86_imul(p-func, tmp_EAX, p-tmp2_EDX); x64_rexw(p-func); x86_add(p-func, tmp_EAX, buf_base_ptr); @@ -1207,7 +1209,9 @@ static struct x86_reg get_buffer_ptr( struct translate_sse *p, x86_cmp(p-func, ptr, buf_max_index); x86_cmovcc(p-func, ptr, buf_max_index, cc_AE); - x86_imul(p-func, ptr, buf_stride); + x86_mov(p-func, p-tmp2_EDX, buf_stride); + x64_rexw(p-func); + x86_imul(p-func, ptr, p-tmp2_EDX); x64_rexw(p-func); x86_add(p-func, ptr, buf_base_ptr); return ptr; -- 1.8.3.2 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [V3 PATCH 1/8] mesa: 's/\bgl_format\b/mesa_format/g'. Use better name for Mesa Formats enum
On Fri, Jan 17, 2014 at 8:58 AM, Brian Paul bri...@vmware.com wrote: On 01/16/2014 10:13 PM, Mark Mueller wrote: This series encompases the much discussed specification and renaming of MESA_FORMATs, which now is packed into 8 patches Signed-off-by: Mark Mueller markkmuel...@gmail.com --- Well, our other enum typedefs (and structs) all use the gl_ prefix. But the other enum values don't use MESA_ prefixes so gl_formats are weird that way. I'm kind on the fence about this change. -Brian Obviously it's not critical, but the gl_ prefix is confusing because of the weirdness, and Ken recommended a name change thus I took a stab at it. I've left this change in V4 of the series. Would it be more convincing with a different name, like mgl_formats, or mesa_gl_formats? Mark ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 1/2] gallium: remove PIPE_CAP_MAX_COMBINED_SAMPLERS
On Mon, Jan 20, 2014 at 3:35 PM, Brian Paul bri...@vmware.com wrote: I'm not sure about this. The concept of MAX_COMBINED_TEXTURE_IMAGE_UNITS goes back to the first shader hardware where texture image units might be shared between the VS and FS. The question is whether any of the hardware we care about has that restriction. I'm not aware of any hardware which has the restriction. I think the reason MAX_COMBINED_TEXTURE_IMAGE_UNITS exists is that the OpenGL texture units are shared by all shader stages. Marek ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH] rtasm: deal with size overflows by casting to ptrdiff_t
The draw-elements-base-vertex-neg test passes on Radeon, which uses the common util/u_vbuf for uploading vertices. I know Nouveau is probably the only driver which doesn't use it, not counting the swrast drivers. I'm afraid that your change from fail to pass for Nouveau will break the test for everybody else. You can switch to using util/u_vbuf by reporting PIPE_CAP_USER_VERTEX_BUFFERS = 0. Then you will hit the same code path as Radeon. Marek On Wed, Jan 22, 2014 at 9:32 PM, Ilia Mirkin imir...@alum.mit.edu wrote: On Wed, Jan 22, 2014 at 3:27 PM, Marek Olšák mar...@gmail.com wrote: Does Nouveau still work if you report PIPE_CAP_USER_VERTEX_BUFFERS = 0? I'm not in front of a machine with nouveau, so I can't tell you right now, but I'll test it out later tonight. Out of curiousity though, why do you ask? Is it related to this patch, or just idle curiiousity on your end? Marek On Wed, Jan 22, 2014 at 3:37 AM, Ilia Mirkin imir...@alum.mit.edu wrote: This was discovered as a result of the draw-elements-base-vertex-neg piglit test, which passes very negative offsets in, followed up by large indices. The nouveau code correctly adjusts the pointer, but the transfer code needs to do the proper inverse correction. Similarly fix up the SSE code to do a 64-bit multiply to compute the proper offset. Signed-off-by: Ilia Mirkin imir...@alum.mit.edu --- With this change, nouveau passes for the draw-elements-base-vertex-neg piglit test with user_varrays, on a 64-bit setup both with and without GALLIUM_NOSSE=1. I'm pretty sure that the change should be minimal to a non-x86 setup since the rexw will be a no-op. I guess there will be an extra register use for the mov, but it shouldn't be too expensive, esp on anything remotely current. src/gallium/auxiliary/translate/translate_generic.c | 2 +- src/gallium/auxiliary/translate/translate_sse.c | 8 ++-- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/src/gallium/auxiliary/translate/translate_generic.c b/src/gallium/auxiliary/translate/translate_generic.c index 5bf97db..5ffce32 100644 --- a/src/gallium/auxiliary/translate/translate_generic.c +++ b/src/gallium/auxiliary/translate/translate_generic.c @@ -638,7 +638,7 @@ static ALWAYS_INLINE void PIPE_CDECL generic_run_one( struct translate_generic * } src = tg-attrib[attr].input_ptr + - tg-attrib[attr].input_stride * index; + (ptrdiff_t)tg-attrib[attr].input_stride * index; copy_size = tg-attrib[attr].copy_size; if(likely(copy_size = 0)) diff --git a/src/gallium/auxiliary/translate/translate_sse.c b/src/gallium/auxiliary/translate/translate_sse.c index a78ea91..a72454a 100644 --- a/src/gallium/auxiliary/translate/translate_sse.c +++ b/src/gallium/auxiliary/translate/translate_sse.c @@ -1121,7 +1121,9 @@ static boolean init_inputs( struct translate_sse *p, x86_cmovcc(p-func, tmp_EAX, buf_max_index, cc_AE); } - x86_imul(p-func, tmp_EAX, buf_stride); + x86_mov(p-func, p-tmp2_EDX, buf_stride); + x64_rexw(p-func); + x86_imul(p-func, tmp_EAX, p-tmp2_EDX); x64_rexw(p-func); x86_add(p-func, tmp_EAX, buf_base_ptr); @@ -1207,7 +1209,9 @@ static struct x86_reg get_buffer_ptr( struct translate_sse *p, x86_cmp(p-func, ptr, buf_max_index); x86_cmovcc(p-func, ptr, buf_max_index, cc_AE); - x86_imul(p-func, ptr, buf_stride); + x86_mov(p-func, p-tmp2_EDX, buf_stride); + x64_rexw(p-func); + x86_imul(p-func, ptr, p-tmp2_EDX); x64_rexw(p-func); x86_add(p-func, ptr, buf_base_ptr); return ptr; -- 1.8.3.2 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 10/10] i965: Support 32 texture image units on Haswell+.
On Sat, Jan 18, 2014 at 8:20 PM, Kenneth Graunke kenn...@whitecape.org wrote: The Intel closed source OpenGL driver recently began supporting 32 texture image units on Haswell. This makes the open source driver support 32 as well. Earlier generations don't have the message header field required to support more than 16 sampler states, so we continue to advertise 16 there. On Haswell, this causes us to advertise: - GL_MAX_TEXTURE_IMAGE_UNITS = 32 - GL_MAX_VERTEX_TEXTURE_IMAGE_UNITS = 32 - GL_MAX_COMBINED_TEXTURE_IMAGE_UNITS = 96 instead of the old values of 16, 16, and 48. Signed-off-by: Kenneth Graunke kenn...@whitecape.org --- src/mesa/drivers/dri/i965/brw_context.c | 9 ++--- src/mesa/drivers/dri/i965/brw_context.h | 2 +- 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_context.c b/src/mesa/drivers/dri/i965/brw_context.c index 1b42751..806f3db 100644 --- a/src/mesa/drivers/dri/i965/brw_context.c +++ b/src/mesa/drivers/dri/i965/brw_context.c @@ -282,20 +282,23 @@ brw_initialize_context_constants(struct brw_context *brw) { struct gl_context *ctx = brw-ctx; + unsigned max_samplers = + brw-gen = 8 || brw-is_haswell ? BRW_MAX_TEX_UNIT : 16; + ctx-Const.QueryCounterBits.Timestamp = 36; ctx-Const.StripTextureBorder = true; ctx-Const.MaxDualSourceDrawBuffers = 1; ctx-Const.MaxDrawBuffers = BRW_MAX_DRAW_BUFFERS; - ctx-Const.Program[MESA_SHADER_FRAGMENT].MaxTextureImageUnits = BRW_MAX_TEX_UNIT; + ctx-Const.Program[MESA_SHADER_FRAGMENT].MaxTextureImageUnits = max_samplers; ctx-Const.MaxTextureCoordUnits = 8; /* Mesa limit */ ctx-Const.MaxTextureUnits = MIN2(ctx-Const.MaxTextureCoordUnits, ctx-Const.Program[MESA_SHADER_FRAGMENT].MaxTextureImageUnits); - ctx-Const.Program[MESA_SHADER_VERTEX].MaxTextureImageUnits = BRW_MAX_TEX_UNIT; + ctx-Const.Program[MESA_SHADER_VERTEX].MaxTextureImageUnits = max_samplers; if (brw-gen = 7) - ctx-Const.Program[MESA_SHADER_GEOMETRY].MaxTextureImageUnits = BRW_MAX_TEX_UNIT; + ctx-Const.Program[MESA_SHADER_GEOMETRY].MaxTextureImageUnits = max_samplers; else ctx-Const.Program[MESA_SHADER_GEOMETRY].MaxTextureImageUnits = 0; ctx-Const.MaxCombinedTextureImageUnits = diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h index 63dd4a0..5908659 100644 --- a/src/mesa/drivers/dri/i965/brw_context.h +++ b/src/mesa/drivers/dri/i965/brw_context.h @@ -650,7 +650,7 @@ struct brw_gs_prog_data }; /** Number of texture sampler units */ -#define BRW_MAX_TEX_UNIT 16 +#define BRW_MAX_TEX_UNIT 32 /** Max number of render targets in a shader */ #define BRW_MAX_DRAW_BUFFERS 8 -- 1.8.5.2 Unfortunate that the PRMs that contain the relevant information for this series are missing... Anyway, the series is Reviewed-by: Matt Turner matts...@gmail.com ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev